gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48
  49 /* Classifies an address.
  50
  51    ADDRESS_REG_IMM
  52        A simple base register plus immediate offset.
  53
  54    ADDRESS_REG_WB
  55        A base register indexed by immediate offset with writeback.
  56
  57    ADDRESS_REG_REG
  58        A base register indexed by (optionally scaled) register.
  59
  60    ADDRESS_REG_UXTW
  61        A base register indexed by (optionally scaled) zero-extended register.
  62
  63    ADDRESS_REG_SXTW
  64        A base register indexed by (optionally scaled) sign-extended register.
  65
  66    ADDRESS_LO_SUM
  67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  68
  69    ADDRESS_SYMBOLIC:
  70        A constant symbolic address, in pc-relative literal pool.  */
  71
  72 enum aarch64_address_type {
  73   ADDRESS_REG_IMM,
  74   ADDRESS_REG_WB,
  75   ADDRESS_REG_REG,
  76   ADDRESS_REG_UXTW,
  77   ADDRESS_REG_SXTW,
  78   ADDRESS_LO_SUM,
  79   ADDRESS_SYMBOLIC
  80 };
  81
  82 struct aarch64_address_info {
  83   enum aarch64_address_type type;
  84   rtx base;
  85   rtx offset;
  86   int shift;
  87   enum aarch64_symbol_type symbol_type;
  88 };
  89
  90 /* The current code model.  */
  91 enum aarch64_code_model aarch64_cmodel;
  92
  93 #ifdef HAVE_AS_TLS
  94 #undef TARGET_HAVE_TLS
  95 #define TARGET_HAVE_TLS 1
  96 #endif
  97
  98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
  99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 100                                                      const_tree,
 101                                                      enum machine_mode *, int *,
 102                                                      bool *);
 103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 105 static void aarch64_override_options_after_change (void);
 106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
 107                                          int *, unsigned char *, int *, int *);
 108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 109 static unsigned bit_count (unsigned HOST_WIDE_INT);
 110 static bool aarch64_const_vec_all_same_int_p (rtx,
 111                                               HOST_WIDE_INT, HOST_WIDE_INT);
 112
 113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 114                                                  const unsigned char *sel);
 115
 116 /* The processor for which instructions should be scheduled.  */
 117 enum aarch64_processor aarch64_tune = generic;
 118
 119 /* The current tuning set.  */
 120 const struct tune_params *aarch64_tune_params;
 121
 122 /* Mask to specify which instructions we are allowed to generate.  */
 123 unsigned long aarch64_isa_flags = 0;
 124
 125 /* Mask to specify which instruction scheduling options should be used.  */
 126 unsigned long aarch64_tune_flags = 0;
 127
 128 /* Tuning parameters.  */
 129
 130 #if HAVE_DESIGNATED_INITIALIZERS
 131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 132 #else
 133 #define NAMED_PARAM(NAME, VAL) (VAL)
 134 #endif
 135
 136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 137 __extension__
 138 #endif
 139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 140 {
 141   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 142   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 143   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 144   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 145   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 146   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 147   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 148   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 149   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 150   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 151   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 152   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 153 };
 154
 155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 156 __extension__
 157 #endif
 158 static const struct cpu_addrcost_table generic_addrcost_table =
 159 {
 160   NAMED_PARAM (pre_modify, 0),
 161   NAMED_PARAM (post_modify, 0),
 162   NAMED_PARAM (register_offset, 0),
 163   NAMED_PARAM (register_extend, 0),
 164   NAMED_PARAM (imm_offset, 0)
 165 };
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170 static const struct cpu_regmove_cost generic_regmove_cost =
 171 {
 172   NAMED_PARAM (GP2GP, 1),
 173   NAMED_PARAM (GP2FP, 2),
 174   NAMED_PARAM (FP2GP, 2),
 175   /* We currently do not provide direct support for TFmode Q->Q move.
 176      Therefore we need to raise the cost above 2 in order to have
 177      reload handle the situation.  */
 178   NAMED_PARAM (FP2FP, 4)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct tune_params generic_tunings =
 185 {
 186   &generic_rtx_cost_table,
 187   &generic_addrcost_table,
 188   &generic_regmove_cost,
 189   NAMED_PARAM (memmov_cost, 4)
 190 };
 191
 192 /* A processor implementing AArch64.  */
 193 struct processor
 194 {
 195   const char *const name;
 196   enum aarch64_processor core;
 197   const char *arch;
 198   const unsigned long flags;
 199   const struct tune_params *const tune;
 200 };
 201
 202 /* Processor cores implementing AArch64.  */
 203 static const struct processor all_cores[] =
 204 {
 205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 206   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 207 #include "aarch64-cores.def"
 208 #undef AARCH64_CORE
 209   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 210   {NULL, aarch64_none, NULL, 0, NULL}
 211 };
 212
 213 /* Architectures implementing AArch64.  */
 214 static const struct processor all_architectures[] =
 215 {
 216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 217   {NAME, CORE, #ARCH, FLAGS, NULL},
 218 #include "aarch64-arches.def"
 219 #undef AARCH64_ARCH
 220   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 221   {NULL, aarch64_none, NULL, 0, NULL}
 222 };
 223
 224 /* Target specification.  These are populated as commandline arguments
 225    are processed, or NULL if not specified.  */
 226 static const struct processor *selected_arch;
 227 static const struct processor *selected_cpu;
 228 static const struct processor *selected_tune;
 229
 230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 231
 232 /* An ISA extension in the co-processor and main instruction set space.  */
 233 struct aarch64_option_extension
 234 {
 235   const char *const name;
 236   const unsigned long flags_on;
 237   const unsigned long flags_off;
 238 };
 239
 240 /* ISA extensions in AArch64.  */
 241 static const struct aarch64_option_extension all_extensions[] =
 242 {
 243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 244   {NAME, FLAGS_ON, FLAGS_OFF},
 245 #include "aarch64-option-extensions.def"
 246 #undef AARCH64_OPT_EXTENSION
 247   {NULL, 0, 0}
 248 };
 249
 250 /* Used to track the size of an address when generating a pre/post
 251    increment address.  */
 252 static enum machine_mode aarch64_memory_reference_mode;
 253
 254 /* Used to force GTY into this file.  */
 255 static GTY(()) int gty_dummy;
 256
 257 /* A table of valid AArch64 "bitmask immediate" values for
 258    logical instructions.  */
 259
 260 #define AARCH64_NUM_BITMASKS  5334
 261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 262
 263 /* Did we set flag_omit_frame_pointer just so
 264    aarch64_frame_pointer_required would be called? */
 265 static bool faked_omit_frame_pointer;
 266
 267 typedef enum aarch64_cond_code
 268 {
 269   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 270   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 271   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 272 }
 273 aarch64_cc;
 274
 275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 276
 277 /* The condition codes of the processor, and the inverse function.  */
 278 static const char * const aarch64_condition_codes[] =
 279 {
 280   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 281   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 282 };
 283
 284 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 285 unsigned
 286 aarch64_dbx_register_number (unsigned regno)
 287 {
 288    if (GP_REGNUM_P (regno))
 289      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 290    else if (regno == SP_REGNUM)
 291      return AARCH64_DWARF_SP;
 292    else if (FP_REGNUM_P (regno))
 293      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 294
 295    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 296       equivalent DWARF register.  */
 297    return DWARF_FRAME_REGISTERS;
 298 }
 299
 300 /* Return TRUE if MODE is any of the large INT modes.  */
 301 static bool
 302 aarch64_vect_struct_mode_p (enum machine_mode mode)
 303 {
 304   return mode == OImode || mode == CImode || mode == XImode;
 305 }
 306
 307 /* Return TRUE if MODE is any of the vector modes.  */
 308 static bool
 309 aarch64_vector_mode_p (enum machine_mode mode)
 310 {
 311   return aarch64_vector_mode_supported_p (mode)
 312          || aarch64_vect_struct_mode_p (mode);
 313 }
 314
 315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 316 static bool
 317 aarch64_array_mode_supported_p (enum machine_mode mode,
 318                                 unsigned HOST_WIDE_INT nelems)
 319 {
 320   if (TARGET_SIMD
 321       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 322       && (nelems >= 2 && nelems <= 4))
 323     return true;
 324
 325   return false;
 326 }
 327
 328 /* Implement HARD_REGNO_NREGS.  */
 329
 330 int
 331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 332 {
 333   switch (aarch64_regno_regclass (regno))
 334     {
 335     case FP_REGS:
 336     case FP_LO_REGS:
 337       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 338     default:
 339       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 340     }
 341   gcc_unreachable ();
 342 }
 343
 344 /* Implement HARD_REGNO_MODE_OK.  */
 345
 346 int
 347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 348 {
 349   if (GET_MODE_CLASS (mode) == MODE_CC)
 350     return regno == CC_REGNUM;
 351
 352   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 353       || regno == ARG_POINTER_REGNUM)
 354     return mode == Pmode;
 355
 356   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 357     return 1;
 358
 359   if (FP_REGNUM_P (regno))
 360     {
 361       if (aarch64_vect_struct_mode_p (mode))
 362         return
 363           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 364       else
 365         return 1;
 366     }
 367
 368   return 0;
 369 }
 370
 371 /* Return true if calls to DECL should be treated as
 372    long-calls (ie called via a register).  */
 373 static bool
 374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 375 {
 376   return false;
 377 }
 378
 379 /* Return true if calls to symbol-ref SYM should be treated as
 380    long-calls (ie called via a register).  */
 381 bool
 382 aarch64_is_long_call_p (rtx sym)
 383 {
 384   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 385 }
 386
 387 /* Return true if the offsets to a zero/sign-extract operation
 388    represent an expression that matches an extend operation.  The
 389    operands represent the paramters from
 390
 391    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 392 bool
 393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 394                                 rtx extract_imm)
 395 {
 396   HOST_WIDE_INT mult_val, extract_val;
 397
 398   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 399     return false;
 400
 401   mult_val = INTVAL (mult_imm);
 402   extract_val = INTVAL (extract_imm);
 403
 404   if (extract_val > 8
 405       && extract_val < GET_MODE_BITSIZE (mode)
 406       && exact_log2 (extract_val & ~7) > 0
 407       && (extract_val & 7) <= 4
 408       && mult_val == (1 << (extract_val & 7)))
 409     return true;
 410
 411   return false;
 412 }
 413
 414 /* Emit an insn that's a simple single-set.  Both the operands must be
 415    known to be valid.  */
 416 inline static rtx
 417 emit_set_insn (rtx x, rtx y)
 418 {
 419   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 420 }
 421
 422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 423    return the rtx for register 0 in the proper mode.  */
 424 rtx
 425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 426 {
 427   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 428   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 429
 430   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 431   return cc_reg;
 432 }
 433
 434 /* Build the SYMBOL_REF for __tls_get_addr.  */
 435
 436 static GTY(()) rtx tls_get_addr_libfunc;
 437
 438 rtx
 439 aarch64_tls_get_addr (void)
 440 {
 441   if (!tls_get_addr_libfunc)
 442     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 443   return tls_get_addr_libfunc;
 444 }
 445
 446 /* Return the TLS model to use for ADDR.  */
 447
 448 static enum tls_model
 449 tls_symbolic_operand_type (rtx addr)
 450 {
 451   enum tls_model tls_kind = TLS_MODEL_NONE;
 452   rtx sym, addend;
 453
 454   if (GET_CODE (addr) == CONST)
 455     {
 456       split_const (addr, &sym, &addend);
 457       if (GET_CODE (sym) == SYMBOL_REF)
 458         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 459     }
 460   else if (GET_CODE (addr) == SYMBOL_REF)
 461     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 462
 463   return tls_kind;
 464 }
 465
 466 /* We'll allow lo_sum's in addresses in our legitimate addresses
 467    so that combine would take care of combining addresses where
 468    necessary, but for generation purposes, we'll generate the address
 469    as :
 470    RTL                               Absolute
 471    tmp = hi (symbol_ref);            adrp  x1, foo
 472    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 473                                      nop
 474
 475    PIC                               TLS
 476    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 477    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 478                                      bl   __tls_get_addr
 479                                      nop
 480
 481    Load TLS symbol, depending on TLS mechanism and TLS access model.
 482
 483    Global Dynamic - Traditional TLS:
 484    adrp tmp, :tlsgd:imm
 485    add  dest, tmp, #:tlsgd_lo12:imm
 486    bl   __tls_get_addr
 487
 488    Global Dynamic - TLS Descriptors:
 489    adrp dest, :tlsdesc:imm
 490    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 491    add  dest, dest, #:tlsdesc_lo12:imm
 492    blr  tmp
 493    mrs  tp, tpidr_el0
 494    add  dest, dest, tp
 495
 496    Initial Exec:
 497    mrs  tp, tpidr_el0
 498    adrp tmp, :gottprel:imm
 499    ldr  dest, [tmp, #:gottprel_lo12:imm]
 500    add  dest, dest, tp
 501
 502    Local Exec:
 503    mrs  tp, tpidr_el0
 504    add  t0, tp, #:tprel_hi12:imm
 505    add  t0, #:tprel_lo12_nc:imm
 506 */
 507
 508 static void
 509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 510                                    enum aarch64_symbol_type type)
 511 {
 512   switch (type)
 513     {
 514     case SYMBOL_SMALL_ABSOLUTE:
 515       {
 516         rtx tmp_reg = dest;
 517         if (can_create_pseudo_p ())
 518           {
 519             tmp_reg =  gen_reg_rtx (Pmode);
 520           }
 521
 522         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 523         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 524         return;
 525       }
 526
 527     case SYMBOL_SMALL_GOT:
 528       {
 529         rtx tmp_reg = dest;
 530         if (can_create_pseudo_p ())
 531           {
 532             tmp_reg =  gen_reg_rtx (Pmode);
 533           }
 534         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 535         emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 536         return;
 537       }
 538
 539     case SYMBOL_SMALL_TLSGD:
 540       {
 541         rtx insns;
 542         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 543
 544         start_sequence ();
 545         emit_call_insn (gen_tlsgd_small (result, imm));
 546         insns = get_insns ();
 547         end_sequence ();
 548
 549         RTL_CONST_CALL_P (insns) = 1;
 550         emit_libcall_block (insns, dest, result, imm);
 551         return;
 552       }
 553
 554     case SYMBOL_SMALL_TLSDESC:
 555       {
 556         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 557         rtx tp;
 558
 559         emit_insn (gen_tlsdesc_small (imm));
 560         tp = aarch64_load_tp (NULL);
 561         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 562         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 563         return;
 564       }
 565
 566     case SYMBOL_SMALL_GOTTPREL:
 567       {
 568         rtx tmp_reg = gen_reg_rtx (Pmode);
 569         rtx tp = aarch64_load_tp (NULL);
 570         emit_insn (gen_tlsie_small (tmp_reg, imm));
 571         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 572         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 573         return;
 574       }
 575
 576     case SYMBOL_SMALL_TPREL:
 577       {
 578         rtx tp = aarch64_load_tp (NULL);
 579         emit_insn (gen_tlsle_small (dest, tp, imm));
 580         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 581         return;
 582       }
 583
 584     default:
 585       gcc_unreachable ();
 586     }
 587 }
 588
 589 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 590    handle all moves if !can_create_pseudo_p ().  The distinction is
 591    important because, unlike emit_move_insn, the move expanders know
 592    how to force Pmode objects into the constant pool even when the
 593    constant pool address is not itself legitimate.  */
 594 static rtx
 595 aarch64_emit_move (rtx dest, rtx src)
 596 {
 597   return (can_create_pseudo_p ()
 598           ? emit_move_insn (dest, src)
 599           : emit_move_insn_1 (dest, src));
 600 }
 601
 602 void
 603 aarch64_split_128bit_move (rtx dst, rtx src)
 604 {
 605   rtx low_dst;
 606
 607   gcc_assert (GET_MODE (dst) == TImode);
 608
 609   if (REG_P (dst) && REG_P (src))
 610     {
 611       int src_regno = REGNO (src);
 612       int dst_regno = REGNO (dst);
 613
 614       gcc_assert (GET_MODE (src) == TImode);
 615
 616       /* Handle r -> w, w -> r.  */
 617       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 618         {
 619           emit_insn (gen_aarch64_movtilow_di (dst,
 620                                               gen_lowpart (word_mode, src)));
 621           emit_insn (gen_aarch64_movtihigh_di (dst,
 622                                                gen_highpart (word_mode, src)));
 623           return;
 624         }
 625       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 626         {
 627           emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
 628                                               src));
 629           emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
 630                                                src));
 631           return;
 632         }
 633       /* Fall through to r -> r cases.  */
 634     }
 635
 636   low_dst = gen_lowpart (word_mode, dst);
 637   if (REG_P (low_dst)
 638       && reg_overlap_mentioned_p (low_dst, src))
 639     {
 640       aarch64_emit_move (gen_highpart (word_mode, dst),
 641                          gen_highpart_mode (word_mode, TImode, src));
 642       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 643     }
 644   else
 645     {
 646       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 647       aarch64_emit_move (gen_highpart (word_mode, dst),
 648                          gen_highpart_mode (word_mode, TImode, src));
 649     }
 650 }
 651
 652 bool
 653 aarch64_split_128bit_move_p (rtx dst, rtx src)
 654 {
 655   return (! REG_P (src)
 656           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 657 }
 658
 659 static rtx
 660 aarch64_force_temporary (rtx x, rtx value)
 661 {
 662   if (can_create_pseudo_p ())
 663     return force_reg (Pmode, value);
 664   else
 665     {
 666       x = aarch64_emit_move (x, value);
 667       return x;
 668     }
 669 }
 670
 671
 672 static rtx
 673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 674 {
 675   if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
 676     {
 677       rtx high;
 678       /* Load the full offset into a register.  This
 679          might be improvable in the future.  */
 680       high = GEN_INT (offset);
 681       offset = 0;
 682       high = aarch64_force_temporary (temp, high);
 683       reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
 684     }
 685   return plus_constant (mode, reg, offset);
 686 }
 687
 688 void
 689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 690 {
 691   enum machine_mode mode = GET_MODE (dest);
 692   unsigned HOST_WIDE_INT mask;
 693   int i;
 694   bool first;
 695   unsigned HOST_WIDE_INT val;
 696   bool subtargets;
 697   rtx subtarget;
 698   int one_match, zero_match;
 699
 700   gcc_assert (mode == SImode || mode == DImode);
 701
 702   /* Check on what type of symbol it is.  */
 703   if (GET_CODE (imm) == SYMBOL_REF
 704       || GET_CODE (imm) == LABEL_REF
 705       || GET_CODE (imm) == CONST)
 706     {
 707       rtx mem, base, offset;
 708       enum aarch64_symbol_type sty;
 709
 710       /* If we have (const (plus symbol offset)), separate out the offset
 711          before we start classifying the symbol.  */
 712       split_const (imm, &base, &offset);
 713
 714       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 715       switch (sty)
 716         {
 717         case SYMBOL_FORCE_TO_MEM:
 718           if (offset != const0_rtx
 719               && targetm.cannot_force_const_mem (mode, imm))
 720             {
 721               gcc_assert(can_create_pseudo_p ());
 722               base = aarch64_force_temporary (dest, base);
 723               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 724               aarch64_emit_move (dest, base);
 725               return;
 726             }
 727           mem = force_const_mem (mode, imm);
 728           gcc_assert (mem);
 729           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 730           return;
 731
 732         case SYMBOL_SMALL_TLSGD:
 733         case SYMBOL_SMALL_TLSDESC:
 734         case SYMBOL_SMALL_GOTTPREL:
 735         case SYMBOL_SMALL_GOT:
 736           if (offset != const0_rtx)
 737             {
 738               gcc_assert(can_create_pseudo_p ());
 739               base = aarch64_force_temporary (dest, base);
 740               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 741               aarch64_emit_move (dest, base);
 742               return;
 743             }
 744           /* FALLTHRU */
 745
 746         case SYMBOL_SMALL_TPREL:
 747         case SYMBOL_SMALL_ABSOLUTE:
 748           aarch64_load_symref_appropriately (dest, imm, sty);
 749           return;
 750
 751         default:
 752           gcc_unreachable ();
 753         }
 754     }
 755
 756   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 757     {
 758       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 759       return;
 760     }
 761
 762   if (!CONST_INT_P (imm))
 763     {
 764       if (GET_CODE (imm) == HIGH)
 765         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 766       else
 767         {
 768           rtx mem = force_const_mem (mode, imm);
 769           gcc_assert (mem);
 770           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 771         }
 772
 773       return;
 774     }
 775
 776   if (mode == SImode)
 777     {
 778       /* We know we can't do this in 1 insn, and we must be able to do it
 779          in two; so don't mess around looking for sequences that don't buy
 780          us anything.  */
 781       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 782       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 783                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 784       return;
 785     }
 786
 787   /* Remaining cases are all for DImode.  */
 788
 789   val = INTVAL (imm);
 790   subtargets = optimize && can_create_pseudo_p ();
 791
 792   one_match = 0;
 793   zero_match = 0;
 794   mask = 0xffff;
 795
 796   for (i = 0; i < 64; i += 16, mask <<= 16)
 797     {
 798       if ((val & mask) == 0)
 799         zero_match++;
 800       else if ((val & mask) == mask)
 801         one_match++;
 802     }
 803
 804   if (one_match == 2)
 805     {
 806       mask = 0xffff;
 807       for (i = 0; i < 64; i += 16, mask <<= 16)
 808         {
 809           if ((val & mask) != mask)
 810             {
 811               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
 812               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 813                                          GEN_INT ((val >> i) & 0xffff)));
 814               return;
 815             }
 816         }
 817       gcc_unreachable ();
 818     }
 819
 820   if (zero_match == 2)
 821     goto simple_sequence;
 822
 823   mask = 0x0ffff0000UL;
 824   for (i = 16; i < 64; i += 16, mask <<= 16)
 825     {
 826       HOST_WIDE_INT comp = mask & ~(mask - 1);
 827
 828       if (aarch64_uimm12_shift (val - (val & mask)))
 829         {
 830           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 831
 832           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
 833           emit_insn (gen_adddi3 (dest, subtarget,
 834                                  GEN_INT (val - (val & mask))));
 835           return;
 836         }
 837       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 838         {
 839           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 840
 841           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 842                                   GEN_INT ((val + comp) & mask)));
 843           emit_insn (gen_adddi3 (dest, subtarget,
 844                                  GEN_INT (val - ((val + comp) & mask))));
 845           return;
 846         }
 847       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 848         {
 849           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 850
 851           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 852                                   GEN_INT ((val - comp) | ~mask)));
 853           emit_insn (gen_adddi3 (dest, subtarget,
 854                                  GEN_INT (val - ((val - comp) | ~mask))));
 855           return;
 856         }
 857       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 858         {
 859           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 860
 861           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 862                                   GEN_INT (val | ~mask)));
 863           emit_insn (gen_adddi3 (dest, subtarget,
 864                                  GEN_INT (val - (val | ~mask))));
 865           return;
 866         }
 867     }
 868
 869   /* See if we can do it by arithmetically combining two
 870      immediates.  */
 871   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 872     {
 873       int j;
 874       mask = 0xffff;
 875
 876       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
 877           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
 878         {
 879           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 880           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 881                                   GEN_INT (aarch64_bitmasks[i])));
 882           emit_insn (gen_adddi3 (dest, subtarget,
 883                                  GEN_INT (val - aarch64_bitmasks[i])));
 884           return;
 885         }
 886
 887       for (j = 0; j < 64; j += 16, mask <<= 16)
 888         {
 889           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
 890             {
 891               emit_insn (gen_rtx_SET (VOIDmode, dest,
 892                                       GEN_INT (aarch64_bitmasks[i])));
 893               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
 894                                          GEN_INT ((val >> j) & 0xffff)));
 895               return;
 896             }
 897         }
 898     }
 899
 900   /* See if we can do it by logically combining two immediates.  */
 901   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 902     {
 903       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
 904         {
 905           int j;
 906
 907           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 908             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
 909               {
 910                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 911                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 912                                         GEN_INT (aarch64_bitmasks[i])));
 913                 emit_insn (gen_iordi3 (dest, subtarget,
 914                                        GEN_INT (aarch64_bitmasks[j])));
 915                 return;
 916               }
 917         }
 918       else if ((val & aarch64_bitmasks[i]) == val)
 919         {
 920           int j;
 921
 922           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 923             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
 924               {
 925
 926                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 927                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 928                                         GEN_INT (aarch64_bitmasks[j])));
 929                 emit_insn (gen_anddi3 (dest, subtarget,
 930                                        GEN_INT (aarch64_bitmasks[i])));
 931                 return;
 932               }
 933         }
 934     }
 935
 936  simple_sequence:
 937   first = true;
 938   mask = 0xffff;
 939   for (i = 0; i < 64; i += 16, mask <<= 16)
 940     {
 941       if ((val & mask) != 0)
 942         {
 943           if (first)
 944             {
 945               emit_insn (gen_rtx_SET (VOIDmode, dest,
 946                                       GEN_INT (val & mask)));
 947               first = false;
 948             }
 949           else
 950             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 951                                        GEN_INT ((val >> i) & 0xffff)));
 952         }
 953     }
 954 }
 955
 956 static bool
 957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 958 {
 959   /* Indirect calls are not currently supported.  */
 960   if (decl == NULL)
 961     return false;
 962
 963   /* Cannot tail-call to long-calls, since these are outside of the
 964      range of a branch instruction (we could handle this if we added
 965      support for indirect tail-calls.  */
 966   if (aarch64_decl_is_long_call_p (decl))
 967     return false;
 968
 969   return true;
 970 }
 971
 972 /* Implement TARGET_PASS_BY_REFERENCE.  */
 973
 974 static bool
 975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
 976                            enum machine_mode mode,
 977                            const_tree type,
 978                            bool named ATTRIBUTE_UNUSED)
 979 {
 980   HOST_WIDE_INT size;
 981   enum machine_mode dummymode;
 982   int nregs;
 983
 984   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
 985   size = (mode == BLKmode && type)
 986     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
 987
 988   if (type)
 989     {
 990       /* Arrays always passed by reference.  */
 991       if (TREE_CODE (type) == ARRAY_TYPE)
 992         return true;
 993       /* Other aggregates based on their size.  */
 994       if (AGGREGATE_TYPE_P (type))
 995         size = int_size_in_bytes (type);
 996     }
 997
 998   /* Variable sized arguments are always returned by reference.  */
 999   if (size < 0)
1000     return true;
1001
1002   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1003   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004                                                &dummymode, &nregs,
1005                                                NULL))
1006     return false;
1007
1008   /* Arguments which are variable sized or larger than 2 registers are
1009      passed by reference unless they are a homogenous floating point
1010      aggregate.  */
1011   return size > 2 * UNITS_PER_WORD;
1012 }
1013
1014 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1015 static bool
1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018   enum machine_mode dummy_mode;
1019   int dummy_int;
1020
1021   /* Never happens in little-endian mode.  */
1022   if (!BYTES_BIG_ENDIAN)
1023     return false;
1024
1025   /* Only composite types smaller than or equal to 16 bytes can
1026      be potentially returned in registers.  */
1027   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028       || int_size_in_bytes (valtype) <= 0
1029       || int_size_in_bytes (valtype) > 16)
1030     return false;
1031
1032   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034      is always passed/returned in the least significant bits of fp/simd
1035      register(s).  */
1036   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037                                                &dummy_mode, &dummy_int, NULL))
1038     return false;
1039
1040   return true;
1041 }
1042
1043 /* Implement TARGET_FUNCTION_VALUE.
1044    Define how to find the value returned by a function.  */
1045
1046 static rtx
1047 aarch64_function_value (const_tree type, const_tree func,
1048                         bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050   enum machine_mode mode;
1051   int unsignedp;
1052   int count;
1053   enum machine_mode ag_mode;
1054
1055   mode = TYPE_MODE (type);
1056   if (INTEGRAL_TYPE_P (type))
1057     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059   if (aarch64_return_in_msb (type))
1060     {
1061       HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063       if (size % UNITS_PER_WORD != 0)
1064         {
1065           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067         }
1068     }
1069
1070   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071                                                &ag_mode, &count, NULL))
1072     {
1073       if (!aarch64_composite_type_p (type, mode))
1074         {
1075           gcc_assert (count == 1 && mode == ag_mode);
1076           return gen_rtx_REG (mode, V0_REGNUM);
1077         }
1078       else
1079         {
1080           int i;
1081           rtx par;
1082
1083           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084           for (i = 0; i < count; i++)
1085             {
1086               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089               XVECEXP (par, 0, i) = tmp;
1090             }
1091           return par;
1092         }
1093     }
1094   else
1095     return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099    Return true if REGNO is the number of a hard register in which the values
1100    of called function may come back.  */
1101
1102 static bool
1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1106      of 16-byte return values are: 128-bit integers and 16-byte small
1107      structures (excluding homogeneous floating-point aggregates).  */
1108   if (regno == R0_REGNUM || regno == R1_REGNUM)
1109     return true;
1110
1111   /* Up to four fp/simd registers can return a function value, e.g. a
1112      homogeneous floating-point aggregate having four members.  */
1113   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114     return !TARGET_GENERAL_REGS_ONLY;
1115
1116   return false;
1117 }
1118
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120
1121    If the type T of the result of a function is such that
1122      void func (T arg)
1123    would require that arg be passed as a value in a register (or set of
1124    registers) according to the parameter passing rules, then the result
1125    is returned in the same registers as would be used for such an
1126    argument.  */
1127
1128 static bool
1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131   HOST_WIDE_INT size;
1132   enum machine_mode ag_mode;
1133   int count;
1134
1135   if (!AGGREGATE_TYPE_P (type)
1136       && TREE_CODE (type) != COMPLEX_TYPE
1137       && TREE_CODE (type) != VECTOR_TYPE)
1138     /* Simple scalar types always returned in registers.  */
1139     return false;
1140
1141   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142                                                type,
1143                                                &ag_mode,
1144                                                &count,
1145                                                NULL))
1146     return false;
1147
1148   /* Types larger than 2 registers returned in memory.  */
1149   size = int_size_in_bytes (type);
1150   return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152
1153 static bool
1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155                                const_tree type, int *nregs)
1156 {
1157   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158   return aarch64_vfp_is_call_or_return_candidate (mode,
1159                                                   type,
1160                                                   &pcum->aapcs_vfp_rmode,
1161                                                   nregs,
1162                                                   NULL);
1163 }
1164
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166    bits.  The idea is to suppress any stronger alignment requested by
1167    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168    This is a helper function for local use only.  */
1169
1170 static unsigned int
1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173   unsigned int alignment;
1174
1175   if (type)
1176     {
1177       if (!integer_zerop (TYPE_SIZE (type)))
1178         {
1179           if (TYPE_MODE (type) == mode)
1180             alignment = TYPE_ALIGN (type);
1181           else
1182             alignment = GET_MODE_ALIGNMENT (mode);
1183         }
1184       else
1185         alignment = 0;
1186     }
1187   else
1188     alignment = GET_MODE_ALIGNMENT (mode);
1189
1190   return alignment;
1191 }
1192
1193 /* Layout a function argument according to the AAPCS64 rules.  The rule
1194    numbers refer to the rule numbers in the AAPCS64.  */
1195
1196 static void
1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198                     const_tree type,
1199                     bool named ATTRIBUTE_UNUSED)
1200 {
1201   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202   int ncrn, nvrn, nregs;
1203   bool allocate_ncrn, allocate_nvrn;
1204
1205   /* We need to do this once per argument.  */
1206   if (pcum->aapcs_arg_processed)
1207     return;
1208
1209   pcum->aapcs_arg_processed = true;
1210
1211   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213                                                  mode,
1214                                                  type,
1215                                                  &nregs);
1216
1217   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218      The following code thus handles passing by SIMD/FP registers first.  */
1219
1220   nvrn = pcum->aapcs_nvrn;
1221
1222   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223      and homogenous short-vector aggregates (HVA).  */
1224   if (allocate_nvrn)
1225     {
1226       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227         {
1228           pcum->aapcs_nextnvrn = nvrn + nregs;
1229           if (!aarch64_composite_type_p (type, mode))
1230             {
1231               gcc_assert (nregs == 1);
1232               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233             }
1234           else
1235             {
1236               rtx par;
1237               int i;
1238               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239               for (i = 0; i < nregs; i++)
1240                 {
1241                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242                                          V0_REGNUM + nvrn + i);
1243                   tmp = gen_rtx_EXPR_LIST
1244                     (VOIDmode, tmp,
1245                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246                   XVECEXP (par, 0, i) = tmp;
1247                 }
1248               pcum->aapcs_reg = par;
1249             }
1250           return;
1251         }
1252       else
1253         {
1254           /* C.3 NSRN is set to 8.  */
1255           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256           goto on_stack;
1257         }
1258     }
1259
1260   ncrn = pcum->aapcs_ncrn;
1261   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265   /* C6 - C9.  though the sign and zero extension semantics are
1266      handled elsewhere.  This is the case where the argument fits
1267      entirely general registers.  */
1268   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269     {
1270       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274       /* C.8 if the argument has an alignment of 16 then the NGRN is
1275          rounded up to the next even number.  */
1276       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277         {
1278           ++ncrn;
1279           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280         }
1281       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282          A reg is still generated for it, but the caller should be smart
1283          enough not to use it.  */
1284       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285         {
1286           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287         }
1288       else
1289         {
1290           rtx par;
1291           int i;
1292
1293           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294           for (i = 0; i < nregs; i++)
1295             {
1296               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298                                        GEN_INT (i * UNITS_PER_WORD));
1299               XVECEXP (par, 0, i) = tmp;
1300             }
1301           pcum->aapcs_reg = par;
1302         }
1303
1304       pcum->aapcs_nextncrn = ncrn + nregs;
1305       return;
1306     }
1307
1308   /* C.11  */
1309   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311   /* The argument is passed on stack; record the needed number of words for
1312      this argument (we can re-use NREGS) and align the total size if
1313      necessary.  */
1314 on_stack:
1315   pcum->aapcs_stack_words = nregs;
1316   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318                                                16 / UNITS_PER_WORD) + 1;
1319   return;
1320 }
1321
1322 /* Implement TARGET_FUNCTION_ARG.  */
1323
1324 static rtx
1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326                       const_tree type, bool named)
1327 {
1328   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331   if (mode == VOIDmode)
1332     return NULL_RTX;
1333
1334   aarch64_layout_arg (pcum_v, mode, type, named);
1335   return pcum->aapcs_reg;
1336 }
1337
1338 void
1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340                            const_tree fntype ATTRIBUTE_UNUSED,
1341                            rtx libname ATTRIBUTE_UNUSED,
1342                            const_tree fndecl ATTRIBUTE_UNUSED,
1343                            unsigned n_named ATTRIBUTE_UNUSED)
1344 {
1345   pcum->aapcs_ncrn = 0;
1346   pcum->aapcs_nvrn = 0;
1347   pcum->aapcs_nextncrn = 0;
1348   pcum->aapcs_nextnvrn = 0;
1349   pcum->pcs_variant = ARM_PCS_AAPCS64;
1350   pcum->aapcs_reg = NULL_RTX;
1351   pcum->aapcs_arg_processed = false;
1352   pcum->aapcs_stack_words = 0;
1353   pcum->aapcs_stack_size = 0;
1354
1355   return;
1356 }
1357
1358 static void
1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360                               enum machine_mode mode,
1361                               const_tree type,
1362                               bool named)
1363 {
1364   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366     {
1367       aarch64_layout_arg (pcum_v, mode, type, named);
1368       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369                   != (pcum->aapcs_stack_words != 0));
1370       pcum->aapcs_arg_processed = false;
1371       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374       pcum->aapcs_stack_words = 0;
1375       pcum->aapcs_reg = NULL_RTX;
1376     }
1377 }
1378
1379 bool
1380 aarch64_function_arg_regno_p (unsigned regno)
1381 {
1382   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384 }
1385
1386 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1387    PARM_BOUNDARY bits of alignment, but will be given anything up
1388    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1389    that both before and after the layout of each argument, the Next
1390    Stacked Argument Address (NSAA) will have a minimum alignment of
1391    8 bytes.  */
1392
1393 static unsigned int
1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395 {
1396   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398   if (alignment < PARM_BOUNDARY)
1399     alignment = PARM_BOUNDARY;
1400   if (alignment > STACK_BOUNDARY)
1401     alignment = STACK_BOUNDARY;
1402   return alignment;
1403 }
1404
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407    Return true if an argument passed on the stack should be padded upwards,
1408    i.e. if the least-significant byte of the stack slot has useful data.
1409
1410    Small aggregate types are placed in the lowest memory address.
1411
1412    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1413
1414 bool
1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416 {
1417   /* On little-endian targets, the least significant byte of every stack
1418      argument is passed at the lowest byte address of the stack slot.  */
1419   if (!BYTES_BIG_ENDIAN)
1420     return true;
1421
1422   /* Otherwise, integral types and floating point types are padded downward:
1423      the least significant byte of a stack argument is passed at the highest
1424      byte address of the stack slot.  */
1425   if (type
1426       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428     return false;
1429
1430   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1431   return true;
1432 }
1433
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436    It specifies padding for the last (may also be the only)
1437    element of a block move between registers and memory.  If
1438    assuming the block is in the memory, padding upward means that
1439    the last element is padded after its highest significant byte,
1440    while in downward padding, the last element is padded at the
1441    its least significant byte side.
1442
1443    Small aggregates and small complex types are always padded
1444    upwards.
1445
1446    We don't need to worry about homogeneous floating-point or
1447    short-vector aggregates; their move is not affected by the
1448    padding direction determined here.  Regardless of endianness,
1449    each element of such an aggregate is put in the least
1450    significant bits of a fp/simd register.
1451
1452    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453    register has useful data, and return the opposite if the most
1454    significant byte does.  */
1455
1456 bool
1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458                      bool first ATTRIBUTE_UNUSED)
1459 {
1460
1461   /* Small composite types are always padded upward.  */
1462   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463     {
1464       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465                             : GET_MODE_SIZE (mode));
1466       if (size < 2 * UNITS_PER_WORD)
1467         return true;
1468     }
1469
1470   /* Otherwise, use the default padding.  */
1471   return !BYTES_BIG_ENDIAN;
1472 }
1473
1474 static enum machine_mode
1475 aarch64_libgcc_cmp_return_mode (void)
1476 {
1477   return SImode;
1478 }
1479
1480 static bool
1481 aarch64_frame_pointer_required (void)
1482 {
1483   /* If the function contains dynamic stack allocations, we need to
1484      use the frame pointer to access the static parts of the frame.  */
1485   if (cfun->calls_alloca)
1486     return true;
1487
1488   /* We may have turned flag_omit_frame_pointer on in order to have this
1489      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490      and we'll check it here.
1491      If we really did set flag_omit_frame_pointer normally, then we return false
1492      (no frame pointer required) in all cases.  */
1493
1494   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495     return false;
1496   else if (flag_omit_leaf_frame_pointer)
1497     return !crtl->is_leaf;
1498   return true;
1499 }
1500
1501 /* Mark the registers that need to be saved by the callee and calculate
1502    the size of the callee-saved registers area and frame record (both FP
1503    and LR may be omitted).  */
1504 static void
1505 aarch64_layout_frame (void)
1506 {
1507   HOST_WIDE_INT offset = 0;
1508   int regno;
1509
1510   if (reload_completed && cfun->machine->frame.laid_out)
1511     return;
1512
1513   cfun->machine->frame.fp_lr_offset = 0;
1514
1515   /* First mark all the registers that really need to be saved...  */
1516   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517     cfun->machine->frame.reg_offset[regno] = -1;
1518
1519   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520     cfun->machine->frame.reg_offset[regno] = -1;
1521
1522   /* ... that includes the eh data registers (if needed)...  */
1523   if (crtl->calls_eh_return)
1524     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527   /* ... and any callee saved register that dataflow says is live.  */
1528   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529     if (df_regs_ever_live_p (regno)
1530         && !call_used_regs[regno])
1531       cfun->machine->frame.reg_offset[regno] = 0;
1532
1533   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534     if (df_regs_ever_live_p (regno)
1535         && !call_used_regs[regno])
1536       cfun->machine->frame.reg_offset[regno] = 0;
1537
1538   if (frame_pointer_needed)
1539     {
1540       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543     }
1544
1545   /* Now assign stack slots for them.  */
1546   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547     if (cfun->machine->frame.reg_offset[regno] != -1)
1548       {
1549         cfun->machine->frame.reg_offset[regno] = offset;
1550         offset += UNITS_PER_WORD;
1551       }
1552
1553   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554     if (cfun->machine->frame.reg_offset[regno] != -1)
1555       {
1556         cfun->machine->frame.reg_offset[regno] = offset;
1557         offset += UNITS_PER_WORD;
1558       }
1559
1560   if (frame_pointer_needed)
1561     {
1562       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563       offset += UNITS_PER_WORD;
1564       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565     }
1566
1567   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568     {
1569       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570       offset += UNITS_PER_WORD;
1571       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572     }
1573
1574   cfun->machine->frame.padding0 =
1575     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578   cfun->machine->frame.saved_regs_size = offset;
1579   cfun->machine->frame.laid_out = true;
1580 }
1581
1582 /* Make the last instruction frame-related and note that it performs
1583    the operation described by FRAME_PATTERN.  */
1584
1585 static void
1586 aarch64_set_frame_expr (rtx frame_pattern)
1587 {
1588   rtx insn;
1589
1590   insn = get_last_insn ();
1591   RTX_FRAME_RELATED_P (insn) = 1;
1592   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594                                       frame_pattern,
1595                                       REG_NOTES (insn));
1596 }
1597
1598 static bool
1599 aarch64_register_saved_on_entry (int regno)
1600 {
1601   return cfun->machine->frame.reg_offset[regno] != -1;
1602 }
1603
1604
1605 static void
1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607                               bool restore, rtx base_rtx)
1608
1609 {
1610   unsigned regno;
1611   unsigned regno2;
1612   rtx insn;
1613   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617     {
1618       if (aarch64_register_saved_on_entry (regno))
1619         {
1620           rtx mem;
1621           mem = gen_mem_ref (DFmode,
1622                              plus_constant (Pmode,
1623                                             base_rtx,
1624                                             start_offset));
1625
1626           for (regno2 = regno + 1;
1627                regno2 <= V31_REGNUM
1628                  && !aarch64_register_saved_on_entry (regno2);
1629                regno2++)
1630             {
1631               /* Empty loop.  */
1632             }
1633           if (regno2 <= V31_REGNUM &&
1634               aarch64_register_saved_on_entry (regno2))
1635             {
1636               rtx mem2;
1637               /* Next highest register to be saved.  */
1638               mem2 = gen_mem_ref (DFmode,
1639                                   plus_constant
1640                                   (Pmode,
1641                                    base_rtx,
1642                                    start_offset + increment));
1643               if (restore == false)
1644                 {
1645                   insn = emit_insn
1646                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647                                         mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649                 }
1650               else
1651                 {
1652                   insn = emit_insn
1653                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654                                        gen_rtx_REG (DFmode, regno2), mem2));
1655
1656                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658                 }
1659
1660                   /* The first part of a frame-related parallel insn
1661                      is always assumed to be relevant to the frame
1662                      calculations; subsequent parts, are only
1663                      frame-related if explicitly marked.  */
1664               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665                                             1)) = 1;
1666               regno = regno2;
1667               start_offset += increment * 2;
1668             }
1669           else
1670             {
1671               if (restore == false)
1672                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673               else
1674                 {
1675                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677                 }
1678               start_offset += increment;
1679             }
1680           RTX_FRAME_RELATED_P (insn) = 1;
1681         }
1682     }
1683
1684 }
1685
1686
1687 /* offset from the stack pointer of where the saves and
1688    restore's have to happen.  */
1689 static void
1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691                                             bool restore)
1692 {
1693   rtx insn;
1694   rtx base_rtx = stack_pointer_rtx;
1695   HOST_WIDE_INT start_offset = offset;
1696   HOST_WIDE_INT increment = UNITS_PER_WORD;
1697   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699   unsigned regno;
1700   unsigned regno2;
1701
1702   for (regno = R0_REGNUM; regno <= limit; regno++)
1703     {
1704       if (aarch64_register_saved_on_entry (regno))
1705         {
1706           rtx mem;
1707           mem = gen_mem_ref (Pmode,
1708                              plus_constant (Pmode,
1709                                             base_rtx,
1710                                             start_offset));
1711
1712           for (regno2 = regno + 1;
1713                regno2 <= limit
1714                  && !aarch64_register_saved_on_entry (regno2);
1715                regno2++)
1716             {
1717               /* Empty loop.  */
1718             }
1719           if (regno2 <= limit &&
1720               aarch64_register_saved_on_entry (regno2))
1721             {
1722               rtx mem2;
1723               /* Next highest register to be saved.  */
1724               mem2 = gen_mem_ref (Pmode,
1725                                   plus_constant
1726                                   (Pmode,
1727                                    base_rtx,
1728                                    start_offset + increment));
1729               if (restore == false)
1730                 {
1731                   insn = emit_insn
1732                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733                                         mem2, gen_rtx_REG (DImode, regno2)));
1734
1735                 }
1736               else
1737                 {
1738                   insn = emit_insn
1739                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740                                      gen_rtx_REG (DImode, regno2), mem2));
1741
1742                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744                 }
1745
1746                   /* The first part of a frame-related parallel insn
1747                      is always assumed to be relevant to the frame
1748                      calculations; subsequent parts, are only
1749                      frame-related if explicitly marked.  */
1750               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751                                             1)) = 1;
1752               regno = regno2;
1753               start_offset += increment * 2;
1754             }
1755           else
1756             {
1757               if (restore == false)
1758                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759               else
1760                 {
1761                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763                 }
1764               start_offset += increment;
1765             }
1766           RTX_FRAME_RELATED_P (insn) = 1;
1767         }
1768     }
1769
1770   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772 }
1773
1774 /* AArch64 stack frames generated by this compiler look like:
1775
1776         +-------------------------------+
1777         |                               |
1778         |  incoming stack arguments     |
1779         |                               |
1780         +-------------------------------+ <-- arg_pointer_rtx
1781         |                               |
1782         |  callee-allocated save area   |
1783         |  for register varargs         |
1784         |                               |
1785         +-------------------------------+
1786         |                               |
1787         |  local variables              |
1788         |                               |
1789         +-------------------------------+ <-- frame_pointer_rtx
1790         |                               |
1791         |  callee-saved registers       |
1792         |                               |
1793         +-------------------------------+
1794         |  LR'                          |
1795         +-------------------------------+
1796         |  FP'                          |
1797       P +-------------------------------+ <-- hard_frame_pointer_rtx
1798         |  dynamic allocation           |
1799         +-------------------------------+
1800         |                               |
1801         |  outgoing stack arguments     |
1802         |                               |
1803         +-------------------------------+ <-- stack_pointer_rtx
1804
1805    Dynamic stack allocations such as alloca insert data at point P.
1806    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807    hard_frame_pointer_rtx unchanged.  */
1808
1809 /* Generate the prologue instructions for entry into a function.
1810    Establish the stack frame by decreasing the stack pointer with a
1811    properly calculated size and, if necessary, create a frame record
1812    filled with the values of LR and previous frame pointer.  The
1813    current FP is also set up is it is in use.  */
1814
1815 void
1816 aarch64_expand_prologue (void)
1817 {
1818   /* sub sp, sp, #<frame_size>
1819      stp {fp, lr}, [sp, #<frame_size> - 16]
1820      add fp, sp, #<frame_size> - hardfp_offset
1821      stp {cs_reg}, [fp, #-16] etc.
1822
1823      sub sp, sp, <final_adjustment_if_any>
1824   */
1825   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
1826   HOST_WIDE_INT frame_size, offset;
1827   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
1828   rtx insn;
1829
1830   aarch64_layout_frame ();
1831   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835                 + crtl->outgoing_args_size);
1836   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837                                           STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839   if (flag_stack_usage_info)
1840     current_function_static_stack_size = frame_size;
1841
1842   fp_offset = (offset
1843                - original_frame_size
1844                - cfun->machine->frame.saved_regs_size);
1845
1846   /* Store pairs and load pairs have a range only -512 to 504.  */
1847   if (offset >= 512)
1848     {
1849       /* When the frame has a large size, an initial decrease is done on
1850          the stack pointer to jump over the callee-allocated save area for
1851          register varargs, the local variable area and/or the callee-saved
1852          register area.  This will allow the pre-index write-back
1853          store pair instructions to be used for setting up the stack frame
1854          efficiently.  */
1855       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856       if (offset >= 512)
1857         offset = cfun->machine->frame.saved_regs_size;
1858
1859       frame_size -= (offset + crtl->outgoing_args_size);
1860       fp_offset = 0;
1861
1862       if (frame_size >= 0x1000000)
1863         {
1864           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865           emit_move_insn (op0, GEN_INT (-frame_size));
1866           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867           aarch64_set_frame_expr (gen_rtx_SET
1868                                   (Pmode, stack_pointer_rtx,
1869                                    gen_rtx_PLUS (Pmode,
1870                                                  stack_pointer_rtx,
1871                                                  GEN_INT (-frame_size))));
1872         }
1873       else if (frame_size > 0)
1874         {
1875           if ((frame_size & 0xfff) != frame_size)
1876             {
1877               insn = emit_insn (gen_add2_insn
1878                                 (stack_pointer_rtx,
1879                                  GEN_INT (-(frame_size
1880                                             & ~(HOST_WIDE_INT)0xfff))));
1881               RTX_FRAME_RELATED_P (insn) = 1;
1882             }
1883           if ((frame_size & 0xfff) != 0)
1884             {
1885               insn = emit_insn (gen_add2_insn
1886                                 (stack_pointer_rtx,
1887                                  GEN_INT (-(frame_size
1888                                             & (HOST_WIDE_INT)0xfff))));
1889               RTX_FRAME_RELATED_P (insn) = 1;
1890             }
1891         }
1892     }
1893   else
1894     frame_size = -1;
1895
1896   if (offset > 0)
1897     {
1898       /* Save the frame pointer and lr if the frame pointer is needed
1899          first.  Make the frame pointer point to the location of the
1900          old frame pointer on the stack.  */
1901       if (frame_pointer_needed)
1902         {
1903           rtx mem_fp, mem_lr;
1904
1905           if (fp_offset)
1906             {
1907               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908                                                GEN_INT (-offset)));
1909               RTX_FRAME_RELATED_P (insn) = 1;
1910               aarch64_set_frame_expr (gen_rtx_SET
1911                                       (Pmode, stack_pointer_rtx,
1912                                        gen_rtx_MINUS (Pmode,
1913                                                       stack_pointer_rtx,
1914                                                       GEN_INT (offset))));
1915               mem_fp = gen_frame_mem (DImode,
1916                                       plus_constant (Pmode,
1917                                                      stack_pointer_rtx,
1918                                                      fp_offset));
1919               mem_lr = gen_frame_mem (DImode,
1920                                       plus_constant (Pmode,
1921                                                      stack_pointer_rtx,
1922                                                      fp_offset
1923                                                      + UNITS_PER_WORD));
1924               insn = emit_insn (gen_store_pairdi (mem_fp,
1925                                                   hard_frame_pointer_rtx,
1926                                                   mem_lr,
1927                                                   gen_rtx_REG (DImode,
1928                                                                LR_REGNUM)));
1929             }
1930           else
1931             {
1932               insn = emit_insn (gen_storewb_pairdi_di
1933                                 (stack_pointer_rtx, stack_pointer_rtx,
1934                                  hard_frame_pointer_rtx,
1935                                  gen_rtx_REG (DImode, LR_REGNUM),
1936                                  GEN_INT (-offset),
1937                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939             }
1940
1941           /* The first part of a frame-related parallel insn is always
1942              assumed to be relevant to the frame calculations;
1943              subsequent parts, are only frame-related if explicitly
1944              marked.  */
1945           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946           RTX_FRAME_RELATED_P (insn) = 1;
1947
1948           /* Set up frame pointer to point to the location of the
1949              previous frame pointer on the stack.  */
1950           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951                                            stack_pointer_rtx,
1952                                            GEN_INT (fp_offset)));
1953           aarch64_set_frame_expr (gen_rtx_SET
1954                                   (Pmode, hard_frame_pointer_rtx,
1955                                    gen_rtx_PLUS (Pmode,
1956                                                  stack_pointer_rtx,
1957                                                  GEN_INT (fp_offset))));
1958           RTX_FRAME_RELATED_P (insn) = 1;
1959           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960                                            hard_frame_pointer_rtx));
1961         }
1962       else
1963         {
1964           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965                                            GEN_INT (-offset)));
1966           RTX_FRAME_RELATED_P (insn) = 1;
1967         }
1968
1969       aarch64_save_or_restore_callee_save_registers
1970         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971     }
1972
1973   /* when offset >= 512,
1974      sub sp, sp, #<outgoing_args_size> */
1975   if (frame_size > -1)
1976     {
1977       if (crtl->outgoing_args_size > 0)
1978         {
1979           insn = emit_insn (gen_add2_insn
1980                             (stack_pointer_rtx,
1981                              GEN_INT (- crtl->outgoing_args_size)));
1982           RTX_FRAME_RELATED_P (insn) = 1;
1983         }
1984     }
1985 }
1986
1987 /* Generate the epilogue instructions for returning from a function.  */
1988 void
1989 aarch64_expand_epilogue (bool for_sibcall)
1990 {
1991   HOST_WIDE_INT original_frame_size, frame_size, offset;
1992   HOST_WIDE_INT fp_offset;
1993   rtx insn;
1994   rtx cfa_reg;
1995
1996   aarch64_layout_frame ();
1997   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999                 + crtl->outgoing_args_size);
2000   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001                                           STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003   fp_offset = (offset
2004                - original_frame_size
2005                - cfun->machine->frame.saved_regs_size);
2006
2007   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009   /* Store pairs and load pairs have a range only -512 to 504.  */
2010   if (offset >= 512)
2011     {
2012       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013       if (offset >= 512)
2014         offset = cfun->machine->frame.saved_regs_size;
2015
2016       frame_size -= (offset + crtl->outgoing_args_size);
2017       fp_offset = 0;
2018       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019         {
2020           insn = emit_insn (gen_add2_insn
2021                             (stack_pointer_rtx,
2022                              GEN_INT (crtl->outgoing_args_size)));
2023           RTX_FRAME_RELATED_P (insn) = 1;
2024         }
2025     }
2026   else
2027     frame_size = -1;
2028
2029   /* If there were outgoing arguments or we've done dynamic stack
2030      allocation, then restore the stack pointer from the frame
2031      pointer.  This is at most one insn and more efficient than using
2032      GCC's internal mechanism.  */
2033   if (frame_pointer_needed
2034       && (crtl->outgoing_args_size || cfun->calls_alloca))
2035     {
2036       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037                                        hard_frame_pointer_rtx,
2038                                        GEN_INT (- fp_offset)));
2039       RTX_FRAME_RELATED_P (insn) = 1;
2040       /* As SP is set to (FP - fp_offset), according to the rules in
2041          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042          from the value of SP from now on.  */
2043       cfa_reg = stack_pointer_rtx;
2044     }
2045
2046   aarch64_save_or_restore_callee_save_registers
2047     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2050   if (offset > 0)
2051     {
2052       if (frame_pointer_needed)
2053         {
2054           rtx mem_fp, mem_lr;
2055
2056           if (fp_offset)
2057             {
2058               mem_fp = gen_frame_mem (DImode,
2059                                       plus_constant (Pmode,
2060                                                      stack_pointer_rtx,
2061                                                      fp_offset));
2062               mem_lr = gen_frame_mem (DImode,
2063                                       plus_constant (Pmode,
2064                                                      stack_pointer_rtx,
2065                                                      fp_offset
2066                                                      + UNITS_PER_WORD));
2067               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068                                                  mem_fp,
2069                                                  gen_rtx_REG (DImode,
2070                                                               LR_REGNUM),
2071                                                  mem_lr));
2072             }
2073           else
2074             {
2075               insn = emit_insn (gen_loadwb_pairdi_di
2076                                 (stack_pointer_rtx,
2077                                  stack_pointer_rtx,
2078                                  hard_frame_pointer_rtx,
2079                                  gen_rtx_REG (DImode, LR_REGNUM),
2080                                  GEN_INT (offset),
2081                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085                                           plus_constant (Pmode, cfa_reg,
2086                                                          offset))));
2087             }
2088
2089           /* The first part of a frame-related parallel insn
2090              is always assumed to be relevant to the frame
2091              calculations; subsequent parts, are only
2092              frame-related if explicitly marked.  */
2093           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094           RTX_FRAME_RELATED_P (insn) = 1;
2095           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096           add_reg_note (insn, REG_CFA_RESTORE,
2097                         gen_rtx_REG (DImode, LR_REGNUM));
2098
2099           if (fp_offset)
2100             {
2101               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102                                                GEN_INT (offset)));
2103               RTX_FRAME_RELATED_P (insn) = 1;
2104             }
2105         }
2106       else
2107         {
2108           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109                                            GEN_INT (offset)));
2110           RTX_FRAME_RELATED_P (insn) = 1;
2111         }
2112     }
2113
2114   /* Stack adjustment for exception handler.  */
2115   if (crtl->calls_eh_return)
2116     {
2117       /* We need to unwind the stack by the offset computed by
2118          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2119          based on SP.  Ideally we would update the SP and define the
2120          CFA along the lines of:
2121
2122          SP = SP + EH_RETURN_STACKADJ_RTX
2123          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125          However the dwarf emitter only understands a constant
2126          register offset.
2127
2128          The solution choosen here is to use the otherwise unused IP0
2129          as a temporary register to hold the current SP value.  The
2130          CFA is described using IP0 then SP is modified.  */
2131
2132       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134       insn = emit_move_insn (ip0, stack_pointer_rtx);
2135       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136       RTX_FRAME_RELATED_P (insn) = 1;
2137
2138       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140       /* Ensure the assignment to IP0 does not get optimized away.  */
2141       emit_use (ip0);
2142     }
2143
2144   if (frame_size > -1)
2145     {
2146       if (frame_size >= 0x1000000)
2147         {
2148           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149           emit_move_insn (op0, GEN_INT (frame_size));
2150           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151           aarch64_set_frame_expr (gen_rtx_SET
2152                                   (Pmode, stack_pointer_rtx,
2153                                    gen_rtx_PLUS (Pmode,
2154                                                  stack_pointer_rtx,
2155                                                  GEN_INT (frame_size))));
2156         }
2157       else if (frame_size > 0)
2158         {
2159           if ((frame_size & 0xfff) != 0)
2160             {
2161               insn = emit_insn (gen_add2_insn
2162                                 (stack_pointer_rtx,
2163                                  GEN_INT ((frame_size
2164                                            & (HOST_WIDE_INT) 0xfff))));
2165               RTX_FRAME_RELATED_P (insn) = 1;
2166             }
2167           if ((frame_size & 0xfff) != frame_size)
2168             {
2169               insn = emit_insn (gen_add2_insn
2170                                 (stack_pointer_rtx,
2171                                  GEN_INT ((frame_size
2172                                            & ~ (HOST_WIDE_INT) 0xfff))));
2173               RTX_FRAME_RELATED_P (insn) = 1;
2174             }
2175         }
2176
2177       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178                                            gen_rtx_PLUS (Pmode,
2179                                                          stack_pointer_rtx,
2180                                                          GEN_INT (offset))));
2181     }
2182
2183   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184   if (!for_sibcall)
2185     emit_jump_insn (ret_rtx);
2186 }
2187
2188 /* Return the place to copy the exception unwinding return address to.
2189    This will probably be a stack slot, but could (in theory be the
2190    return register).  */
2191 rtx
2192 aarch64_final_eh_return_addr (void)
2193 {
2194   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195   aarch64_layout_frame ();
2196   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198                 + crtl->outgoing_args_size);
2199   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200                                           STACK_BOUNDARY / BITS_PER_UNIT);
2201   fp_offset = offset
2202     - original_frame_size
2203     - cfun->machine->frame.saved_regs_size;
2204
2205   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206     return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2209      result in a store to save LR introduced by builtin_eh_return () being
2210      incorrectly deleted because the alias is not detected.
2211      So in the calculation of the address to copy the exception unwinding
2212      return address to, we note 2 cases.
2213      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214      we return a SP-relative location since all the addresses are SP-relative
2215      in this case.  This prevents the store from being optimized away.
2216      If the fp_offset is not 0, then the addresses will be FP-relative and
2217      therefore we return a FP-relative location.  */
2218
2219   if (frame_pointer_needed)
2220     {
2221       if (fp_offset)
2222         return gen_frame_mem (DImode,
2223                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224       else
2225         return gen_frame_mem (DImode,
2226                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227     }
2228
2229   /* If FP is not needed, we calculate the location of LR, which would be
2230      at the top of the saved registers block.  */
2231
2232   return gen_frame_mem (DImode,
2233                         plus_constant (Pmode,
2234                                        stack_pointer_rtx,
2235                                        fp_offset
2236                                        + cfun->machine->frame.saved_regs_size
2237                                        - 2 * UNITS_PER_WORD));
2238 }
2239
2240 /* Output code to build up a constant in a register.  */
2241 static void
2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2243 {
2244   if (aarch64_bitmask_imm (val, DImode))
2245     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246   else
2247     {
2248       int i;
2249       int ncount = 0;
2250       int zcount = 0;
2251       HOST_WIDE_INT valp = val >> 16;
2252       HOST_WIDE_INT valm;
2253       HOST_WIDE_INT tval;
2254
2255       for (i = 16; i < 64; i += 16)
2256         {
2257           valm = (valp & 0xffff);
2258
2259           if (valm != 0)
2260             ++ zcount;
2261
2262           if (valm != 0xffff)
2263             ++ ncount;
2264
2265           valp >>= 16;
2266         }
2267
2268       /* zcount contains the number of additional MOVK instructions
2269          required if the constant is built up with an initial MOVZ instruction,
2270          while ncount is the number of MOVK instructions required if starting
2271          with a MOVN instruction.  Choose the sequence that yields the fewest
2272          number of instructions, preferring MOVZ instructions when they are both
2273          the same.  */
2274       if (ncount < zcount)
2275         {
2276           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277                           GEN_INT ((~val) & 0xffff));
2278           tval = 0xffff;
2279         }
2280       else
2281         {
2282           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283                           GEN_INT (val & 0xffff));
2284           tval = 0;
2285         }
2286
2287       val >>= 16;
2288
2289       for (i = 16; i < 64; i += 16)
2290         {
2291           if ((val & 0xffff) != tval)
2292             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2294           val >>= 16;
2295         }
2296     }
2297 }
2298
2299 static void
2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2301 {
2302   HOST_WIDE_INT mdelta = delta;
2303   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2305
2306   if (mdelta < 0)
2307     mdelta = -mdelta;
2308
2309   if (mdelta >= 4096 * 4096)
2310     {
2311       aarch64_build_constant (scratchreg, delta);
2312       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2313     }
2314   else if (mdelta > 0)
2315     {
2316       if (mdelta >= 4096)
2317         {
2318           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320           if (delta < 0)
2321             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323           else
2324             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326         }
2327       if (mdelta % 4096 != 0)
2328         {
2329           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332         }
2333     }
2334 }
2335
2336 /* Output code to add DELTA to the first argument, and then jump
2337    to FUNCTION.  Used for C++ multiple inheritance.  */
2338 static void
2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340                          HOST_WIDE_INT delta,
2341                          HOST_WIDE_INT vcall_offset,
2342                          tree function)
2343 {
2344   /* The this pointer is always in x0.  Note that this differs from
2345      Arm where the this pointer maybe bumped to r1 if r0 is required
2346      to return a pointer to an aggregate.  On AArch64 a result value
2347      pointer will be in x8.  */
2348   int this_regno = R0_REGNUM;
2349   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2350
2351   reload_completed = 1;
2352   emit_note (NOTE_INSN_PROLOGUE_END);
2353
2354   if (vcall_offset == 0)
2355     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356   else
2357     {
2358       gcc_assert ((vcall_offset & 0x7) == 0);
2359
2360       this_rtx = gen_rtx_REG (Pmode, this_regno);
2361       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2363
2364       addr = this_rtx;
2365       if (delta != 0)
2366         {
2367           if (delta >= -256 && delta < 256)
2368             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369                                        plus_constant (Pmode, this_rtx, delta));
2370           else
2371             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2372         }
2373
2374       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
2376       if (vcall_offset >= -256 && vcall_offset < 32768)
2377           addr = plus_constant (Pmode, temp0, vcall_offset);
2378       else
2379         {
2380           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2382         }
2383
2384       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385       emit_insn (gen_add2_insn (this_rtx, temp1));
2386     }
2387
2388   /* Generate a tail call to the target function.  */
2389   if (!TREE_USED (function))
2390     {
2391       assemble_external (function);
2392       TREE_USED (function) = 1;
2393     }
2394   funexp = XEXP (DECL_RTL (function), 0);
2395   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397   SIBLING_CALL_P (insn) = 1;
2398
2399   insn = get_insns ();
2400   shorten_branches (insn);
2401   final_start_function (insn, file, 1);
2402   final (insn, file, 1);
2403   final_end_function ();
2404
2405   /* Stop pretending to be a post-reload pass.  */
2406   reload_completed = 0;
2407 }
2408
2409 static int
2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411 {
2412   if (GET_CODE (*x) == SYMBOL_REF)
2413     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416      TLS offsets, not real symbol references.  */
2417   if (GET_CODE (*x) == UNSPEC
2418       && XINT (*x, 1) == UNSPEC_TLS)
2419     return -1;
2420
2421   return 0;
2422 }
2423
2424 static bool
2425 aarch64_tls_referenced_p (rtx x)
2426 {
2427   if (!TARGET_HAVE_TLS)
2428     return false;
2429
2430   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431 }
2432
2433
2434 static int
2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436 {
2437   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440   if (*imm1 < *imm2)
2441     return -1;
2442   if (*imm1 > *imm2)
2443     return +1;
2444   return 0;
2445 }
2446
2447
2448 static void
2449 aarch64_build_bitmask_table (void)
2450 {
2451   unsigned HOST_WIDE_INT mask, imm;
2452   unsigned int log_e, e, s, r;
2453   unsigned int nimms = 0;
2454
2455   for (log_e = 1; log_e <= 6; log_e++)
2456     {
2457       e = 1 << log_e;
2458       if (e == 64)
2459         mask = ~(HOST_WIDE_INT) 0;
2460       else
2461         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462       for (s = 1; s < e; s++)
2463         {
2464           for (r = 0; r < e; r++)
2465             {
2466               /* set s consecutive bits to 1 (s < 64) */
2467               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468               /* rotate right by r */
2469               if (r != 0)
2470                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471               /* replicate the constant depending on SIMD size */
2472               switch (log_e) {
2473               case 1: imm |= (imm <<  2);
2474               case 2: imm |= (imm <<  4);
2475               case 3: imm |= (imm <<  8);
2476               case 4: imm |= (imm << 16);
2477               case 5: imm |= (imm << 32);
2478               case 6:
2479                 break;
2480               default:
2481                 gcc_unreachable ();
2482               }
2483               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484               aarch64_bitmasks[nimms++] = imm;
2485             }
2486         }
2487     }
2488
2489   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491          aarch64_bitmasks_cmp);
2492 }
2493
2494
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496    a left shift of 0 or 12 bits.  */
2497 bool
2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2499 {
2500   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502           );
2503 }
2504
2505
2506 /* Return true if val is an immediate that can be loaded into a
2507    register by a MOVZ instruction.  */
2508 static bool
2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510 {
2511   if (GET_MODE_SIZE (mode) > 4)
2512     {
2513       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515         return 1;
2516     }
2517   else
2518     {
2519       /* Ignore sign extension.  */
2520       val &= (HOST_WIDE_INT) 0xffffffff;
2521     }
2522   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524 }
2525
2526
2527 /* Return true if val is a valid bitmask immediate.  */
2528 bool
2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530 {
2531   if (GET_MODE_SIZE (mode) < 8)
2532     {
2533       /* Replicate bit pattern.  */
2534       val &= (HOST_WIDE_INT) 0xffffffff;
2535       val |= val << 32;
2536     }
2537   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539 }
2540
2541
2542 /* Return true if val is an immediate that can be loaded into a
2543    register in a single instruction.  */
2544 bool
2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546 {
2547   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548     return 1;
2549   return aarch64_bitmask_imm (val, mode);
2550 }
2551
2552 static bool
2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554 {
2555   rtx base, offset;
2556   if (GET_CODE (x) == HIGH)
2557     return true;
2558
2559   split_const (x, &base, &offset);
2560   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563   return aarch64_tls_referenced_p (x);
2564 }
2565
2566 /* Return true if register REGNO is a valid index register.
2567    STRICT_P is true if REG_OK_STRICT is in effect.  */
2568
2569 bool
2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571 {
2572   if (!HARD_REGISTER_NUM_P (regno))
2573     {
2574       if (!strict_p)
2575         return true;
2576
2577       if (!reg_renumber)
2578         return false;
2579
2580       regno = reg_renumber[regno];
2581     }
2582   return GP_REGNUM_P (regno);
2583 }
2584
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586    STRICT_P is true if REG_OK_STRICT is in effect.  */
2587
2588 bool
2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590 {
2591   if (!HARD_REGISTER_NUM_P (regno))
2592     {
2593       if (!strict_p)
2594         return true;
2595
2596       if (!reg_renumber)
2597         return false;
2598
2599       regno = reg_renumber[regno];
2600     }
2601
2602   /* The fake registers will be eliminated to either the stack or
2603      hard frame pointer, both of which are usually valid base registers.
2604      Reload deals with the cases where the eliminated form isn't valid.  */
2605   return (GP_REGNUM_P (regno)
2606           || regno == SP_REGNUM
2607           || regno == FRAME_POINTER_REGNUM
2608           || regno == ARG_POINTER_REGNUM);
2609 }
2610
2611 /* Return true if X is a valid base register for mode MODE.
2612    STRICT_P is true if REG_OK_STRICT is in effect.  */
2613
2614 static bool
2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616 {
2617   if (!strict_p && GET_CODE (x) == SUBREG)
2618     x = SUBREG_REG (x);
2619
2620   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621 }
2622
2623 /* Return true if address offset is a valid index.  If it is, fill in INFO
2624    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2625
2626 static bool
2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628                         enum machine_mode mode, bool strict_p)
2629 {
2630   enum aarch64_address_type type;
2631   rtx index;
2632   int shift;
2633
2634   /* (reg:P) */
2635   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636       && GET_MODE (x) == Pmode)
2637     {
2638       type = ADDRESS_REG_REG;
2639       index = x;
2640       shift = 0;
2641     }
2642   /* (sign_extend:DI (reg:SI)) */
2643   else if ((GET_CODE (x) == SIGN_EXTEND
2644             || GET_CODE (x) == ZERO_EXTEND)
2645            && GET_MODE (x) == DImode
2646            && GET_MODE (XEXP (x, 0)) == SImode)
2647     {
2648       type = (GET_CODE (x) == SIGN_EXTEND)
2649         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650       index = XEXP (x, 0);
2651       shift = 0;
2652     }
2653   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654   else if (GET_CODE (x) == MULT
2655            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657            && GET_MODE (XEXP (x, 0)) == DImode
2658            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659            && CONST_INT_P (XEXP (x, 1)))
2660     {
2661       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663       index = XEXP (XEXP (x, 0), 0);
2664       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665     }
2666   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667   else if (GET_CODE (x) == ASHIFT
2668            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670            && GET_MODE (XEXP (x, 0)) == DImode
2671            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672            && CONST_INT_P (XEXP (x, 1)))
2673     {
2674       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676       index = XEXP (XEXP (x, 0), 0);
2677       shift = INTVAL (XEXP (x, 1));
2678     }
2679   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680   else if ((GET_CODE (x) == SIGN_EXTRACT
2681             || GET_CODE (x) == ZERO_EXTRACT)
2682            && GET_MODE (x) == DImode
2683            && GET_CODE (XEXP (x, 0)) == MULT
2684            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686     {
2687       type = (GET_CODE (x) == SIGN_EXTRACT)
2688         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689       index = XEXP (XEXP (x, 0), 0);
2690       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691       if (INTVAL (XEXP (x, 1)) != 32 + shift
2692           || INTVAL (XEXP (x, 2)) != 0)
2693         shift = -1;
2694     }
2695   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696      (const_int 0xffffffff<<shift)) */
2697   else if (GET_CODE (x) == AND
2698            && GET_MODE (x) == DImode
2699            && GET_CODE (XEXP (x, 0)) == MULT
2700            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702            && CONST_INT_P (XEXP (x, 1)))
2703     {
2704       type = ADDRESS_REG_UXTW;
2705       index = XEXP (XEXP (x, 0), 0);
2706       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708         shift = -1;
2709     }
2710   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711   else if ((GET_CODE (x) == SIGN_EXTRACT
2712             || GET_CODE (x) == ZERO_EXTRACT)
2713            && GET_MODE (x) == DImode
2714            && GET_CODE (XEXP (x, 0)) == ASHIFT
2715            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717     {
2718       type = (GET_CODE (x) == SIGN_EXTRACT)
2719         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720       index = XEXP (XEXP (x, 0), 0);
2721       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722       if (INTVAL (XEXP (x, 1)) != 32 + shift
2723           || INTVAL (XEXP (x, 2)) != 0)
2724         shift = -1;
2725     }
2726   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727      (const_int 0xffffffff<<shift)) */
2728   else if (GET_CODE (x) == AND
2729            && GET_MODE (x) == DImode
2730            && GET_CODE (XEXP (x, 0)) == ASHIFT
2731            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733            && CONST_INT_P (XEXP (x, 1)))
2734     {
2735       type = ADDRESS_REG_UXTW;
2736       index = XEXP (XEXP (x, 0), 0);
2737       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739         shift = -1;
2740     }
2741   /* (mult:P (reg:P) (const_int scale)) */
2742   else if (GET_CODE (x) == MULT
2743            && GET_MODE (x) == Pmode
2744            && GET_MODE (XEXP (x, 0)) == Pmode
2745            && CONST_INT_P (XEXP (x, 1)))
2746     {
2747       type = ADDRESS_REG_REG;
2748       index = XEXP (x, 0);
2749       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750     }
2751   /* (ashift:P (reg:P) (const_int shift)) */
2752   else if (GET_CODE (x) == ASHIFT
2753            && GET_MODE (x) == Pmode
2754            && GET_MODE (XEXP (x, 0)) == Pmode
2755            && CONST_INT_P (XEXP (x, 1)))
2756     {
2757       type = ADDRESS_REG_REG;
2758       index = XEXP (x, 0);
2759       shift = INTVAL (XEXP (x, 1));
2760     }
2761   else
2762     return false;
2763
2764   if (GET_CODE (index) == SUBREG)
2765     index = SUBREG_REG (index);
2766
2767   if ((shift == 0 ||
2768        (shift > 0 && shift <= 3
2769         && (1 << shift) == GET_MODE_SIZE (mode)))
2770       && REG_P (index)
2771       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772     {
2773       info->type = type;
2774       info->offset = index;
2775       info->shift = shift;
2776       return true;
2777     }
2778
2779   return false;
2780 }
2781
2782 static inline bool
2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784 {
2785   return (offset >= -64 * GET_MODE_SIZE (mode)
2786           && offset < 64 * GET_MODE_SIZE (mode)
2787           && offset % GET_MODE_SIZE (mode) == 0);
2788 }
2789
2790 static inline bool
2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792                                HOST_WIDE_INT offset)
2793 {
2794   return offset >= -256 && offset < 256;
2795 }
2796
2797 static inline bool
2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799 {
2800   return (offset >= 0
2801           && offset < 4096 * GET_MODE_SIZE (mode)
2802           && offset % GET_MODE_SIZE (mode) == 0);
2803 }
2804
2805 /* Return true if X is a valid address for machine mode MODE.  If it is,
2806    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2807    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2808
2809 static bool
2810 aarch64_classify_address (struct aarch64_address_info *info,
2811                           rtx x, enum machine_mode mode,
2812                           RTX_CODE outer_code, bool strict_p)
2813 {
2814   enum rtx_code code = GET_CODE (x);
2815   rtx op0, op1;
2816   bool allow_reg_index_p =
2817     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819   /* Don't support anything other than POST_INC or REG addressing for
2820      AdvSIMD.  */
2821   if (aarch64_vector_mode_p (mode)
2822       && (code != POST_INC && code != REG))
2823     return false;
2824
2825   switch (code)
2826     {
2827     case REG:
2828     case SUBREG:
2829       info->type = ADDRESS_REG_IMM;
2830       info->base = x;
2831       info->offset = const0_rtx;
2832       return aarch64_base_register_rtx_p (x, strict_p);
2833
2834     case PLUS:
2835       op0 = XEXP (x, 0);
2836       op1 = XEXP (x, 1);
2837       if (GET_MODE_SIZE (mode) != 0
2838           && CONST_INT_P (op1)
2839           && aarch64_base_register_rtx_p (op0, strict_p))
2840         {
2841           HOST_WIDE_INT offset = INTVAL (op1);
2842
2843           info->type = ADDRESS_REG_IMM;
2844           info->base = op0;
2845           info->offset = op1;
2846
2847           /* TImode and TFmode values are allowed in both pairs of X
2848              registers and individual Q registers.  The available
2849              address modes are:
2850              X,X: 7-bit signed scaled offset
2851              Q:   9-bit signed offset
2852              We conservatively require an offset representable in either mode.
2853            */
2854           if (mode == TImode || mode == TFmode)
2855             return (offset_7bit_signed_scaled_p (mode, offset)
2856                     && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858           if (outer_code == PARALLEL)
2859             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860                     && offset_7bit_signed_scaled_p (mode, offset));
2861           else
2862             return (offset_9bit_signed_unscaled_p (mode, offset)
2863                     || offset_12bit_unsigned_scaled_p (mode, offset));
2864         }
2865
2866       if (allow_reg_index_p)
2867         {
2868           /* Look for base + (scaled/extended) index register.  */
2869           if (aarch64_base_register_rtx_p (op0, strict_p)
2870               && aarch64_classify_index (info, op1, mode, strict_p))
2871             {
2872               info->base = op0;
2873               return true;
2874             }
2875           if (aarch64_base_register_rtx_p (op1, strict_p)
2876               && aarch64_classify_index (info, op0, mode, strict_p))
2877             {
2878               info->base = op1;
2879               return true;
2880             }
2881         }
2882
2883       return false;
2884
2885     case POST_INC:
2886     case POST_DEC:
2887     case PRE_INC:
2888     case PRE_DEC:
2889       info->type = ADDRESS_REG_WB;
2890       info->base = XEXP (x, 0);
2891       info->offset = NULL_RTX;
2892       return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894     case POST_MODIFY:
2895     case PRE_MODIFY:
2896       info->type = ADDRESS_REG_WB;
2897       info->base = XEXP (x, 0);
2898       if (GET_CODE (XEXP (x, 1)) == PLUS
2899           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901           && aarch64_base_register_rtx_p (info->base, strict_p))
2902         {
2903           HOST_WIDE_INT offset;
2904           info->offset = XEXP (XEXP (x, 1), 1);
2905           offset = INTVAL (info->offset);
2906
2907           /* TImode and TFmode values are allowed in both pairs of X
2908              registers and individual Q registers.  The available
2909              address modes are:
2910              X,X: 7-bit signed scaled offset
2911              Q:   9-bit signed offset
2912              We conservatively require an offset representable in either mode.
2913            */
2914           if (mode == TImode || mode == TFmode)
2915             return (offset_7bit_signed_scaled_p (mode, offset)
2916                     && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918           if (outer_code == PARALLEL)
2919             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920                     && offset_7bit_signed_scaled_p (mode, offset));
2921           else
2922             return offset_9bit_signed_unscaled_p (mode, offset);
2923         }
2924       return false;
2925
2926     case CONST:
2927     case SYMBOL_REF:
2928     case LABEL_REF:
2929       /* load literal: pc-relative constant pool entry.  Only supported
2930          for SI mode or larger.  */
2931       info->type = ADDRESS_SYMBOLIC;
2932       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2933         {
2934           rtx sym, addend;
2935
2936           split_const (x, &sym, &addend);
2937           return (GET_CODE (sym) == LABEL_REF
2938                   || (GET_CODE (sym) == SYMBOL_REF
2939                       && CONSTANT_POOL_ADDRESS_P (sym)));
2940         }
2941       return false;
2942
2943     case LO_SUM:
2944       info->type = ADDRESS_LO_SUM;
2945       info->base = XEXP (x, 0);
2946       info->offset = XEXP (x, 1);
2947       if (allow_reg_index_p
2948           && aarch64_base_register_rtx_p (info->base, strict_p))
2949         {
2950           rtx sym, offs;
2951           split_const (info->offset, &sym, &offs);
2952           if (GET_CODE (sym) == SYMBOL_REF
2953               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2954                   == SYMBOL_SMALL_ABSOLUTE))
2955             {
2956               /* The symbol and offset must be aligned to the access size.  */
2957               unsigned int align;
2958               unsigned int ref_size;
2959
2960               if (CONSTANT_POOL_ADDRESS_P (sym))
2961                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2962               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2963                 {
2964                   tree exp = SYMBOL_REF_DECL (sym);
2965                   align = TYPE_ALIGN (TREE_TYPE (exp));
2966                   align = CONSTANT_ALIGNMENT (exp, align);
2967                 }
2968               else if (SYMBOL_REF_DECL (sym))
2969                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970               else
2971                 align = BITS_PER_UNIT;
2972
2973               ref_size = GET_MODE_SIZE (mode);
2974               if (ref_size == 0)
2975                 ref_size = GET_MODE_SIZE (DImode);
2976
2977               return ((INTVAL (offs) & (ref_size - 1)) == 0
2978                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2979             }
2980         }
2981       return false;
2982
2983     default:
2984       return false;
2985     }
2986 }
2987
2988 bool
2989 aarch64_symbolic_address_p (rtx x)
2990 {
2991   rtx offset;
2992
2993   split_const (x, &x, &offset);
2994   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2995 }
2996
2997 /* Classify the base of symbolic expression X, given that X appears in
2998    context CONTEXT.  */
2999 static enum aarch64_symbol_type
3000 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3001 {
3002   rtx offset;
3003   split_const (x, &x, &offset);
3004   return aarch64_classify_symbol (x, context);
3005 }
3006
3007
3008 /* Return TRUE if X is a legitimate address for accessing memory in
3009    mode MODE.  */
3010 static bool
3011 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3012 {
3013   struct aarch64_address_info addr;
3014
3015   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3016 }
3017
3018 /* Return TRUE if X is a legitimate address for accessing memory in
3019    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3020    pair operation.  */
3021 bool
3022 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3023                            RTX_CODE outer_code, bool strict_p)
3024 {
3025   struct aarch64_address_info addr;
3026
3027   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3028 }
3029
3030 /* Return TRUE if rtx X is immediate constant 0.0 */
3031 bool
3032 aarch64_float_const_zero_rtx_p (rtx x)
3033 {
3034   REAL_VALUE_TYPE r;
3035
3036   if (GET_MODE (x) == VOIDmode)
3037     return false;
3038
3039   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3040   if (REAL_VALUE_MINUS_ZERO (r))
3041     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3042   return REAL_VALUES_EQUAL (r, dconst0);
3043 }
3044
3045 /* Return the fixed registers used for condition codes.  */
3046
3047 static bool
3048 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3049 {
3050   *p1 = CC_REGNUM;
3051   *p2 = INVALID_REGNUM;
3052   return true;
3053 }
3054
3055 enum machine_mode
3056 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3057 {
3058   /* All floating point compares return CCFP if it is an equality
3059      comparison, and CCFPE otherwise.  */
3060   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3061     {
3062       switch (code)
3063         {
3064         case EQ:
3065         case NE:
3066         case UNORDERED:
3067         case ORDERED:
3068         case UNLT:
3069         case UNLE:
3070         case UNGT:
3071         case UNGE:
3072         case UNEQ:
3073         case LTGT:
3074           return CCFPmode;
3075
3076         case LT:
3077         case LE:
3078         case GT:
3079         case GE:
3080           return CCFPEmode;
3081
3082         default:
3083           gcc_unreachable ();
3084         }
3085     }
3086
3087   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088       && y == const0_rtx
3089       && (code == EQ || code == NE || code == LT || code == GE)
3090       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3091           || GET_CODE (x) == NEG))
3092     return CC_NZmode;
3093
3094   /* A compare with a shifted operand.  Because of canonicalization,
3095      the comparison will have to be swapped when we emit the assembly
3096      code.  */
3097   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3098       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3099       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3100           || GET_CODE (x) == LSHIFTRT
3101           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3102     return CC_SWPmode;
3103
3104   /* A compare of a mode narrower than SI mode against zero can be done
3105      by extending the value in the comparison.  */
3106   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3107       && y == const0_rtx)
3108     /* Only use sign-extension if we really need it.  */
3109     return ((code == GT || code == GE || code == LE || code == LT)
3110             ? CC_SESWPmode : CC_ZESWPmode);
3111
3112   /* For everything else, return CCmode.  */
3113   return CCmode;
3114 }
3115
3116 static unsigned
3117 aarch64_get_condition_code (rtx x)
3118 {
3119   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3120   enum rtx_code comp_code = GET_CODE (x);
3121
3122   if (GET_MODE_CLASS (mode) != MODE_CC)
3123     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3124
3125   switch (mode)
3126     {
3127     case CCFPmode:
3128     case CCFPEmode:
3129       switch (comp_code)
3130         {
3131         case GE: return AARCH64_GE;
3132         case GT: return AARCH64_GT;
3133         case LE: return AARCH64_LS;
3134         case LT: return AARCH64_MI;
3135         case NE: return AARCH64_NE;
3136         case EQ: return AARCH64_EQ;
3137         case ORDERED: return AARCH64_VC;
3138         case UNORDERED: return AARCH64_VS;
3139         case UNLT: return AARCH64_LT;
3140         case UNLE: return AARCH64_LE;
3141         case UNGT: return AARCH64_HI;
3142         case UNGE: return AARCH64_PL;
3143         default: gcc_unreachable ();
3144         }
3145       break;
3146
3147     case CCmode:
3148       switch (comp_code)
3149         {
3150         case NE: return AARCH64_NE;
3151         case EQ: return AARCH64_EQ;
3152         case GE: return AARCH64_GE;
3153         case GT: return AARCH64_GT;
3154         case LE: return AARCH64_LE;
3155         case LT: return AARCH64_LT;
3156         case GEU: return AARCH64_CS;
3157         case GTU: return AARCH64_HI;
3158         case LEU: return AARCH64_LS;
3159         case LTU: return AARCH64_CC;
3160         default: gcc_unreachable ();
3161         }
3162       break;
3163
3164     case CC_SWPmode:
3165     case CC_ZESWPmode:
3166     case CC_SESWPmode:
3167       switch (comp_code)
3168         {
3169         case NE: return AARCH64_NE;
3170         case EQ: return AARCH64_EQ;
3171         case GE: return AARCH64_LE;
3172         case GT: return AARCH64_LT;
3173         case LE: return AARCH64_GE;
3174         case LT: return AARCH64_GT;
3175         case GEU: return AARCH64_LS;
3176         case GTU: return AARCH64_CC;
3177         case LEU: return AARCH64_CS;
3178         case LTU: return AARCH64_HI;
3179         default: gcc_unreachable ();
3180         }
3181       break;
3182
3183     case CC_NZmode:
3184       switch (comp_code)
3185         {
3186         case NE: return AARCH64_NE;
3187         case EQ: return AARCH64_EQ;
3188         case GE: return AARCH64_PL;
3189         case LT: return AARCH64_MI;
3190         default: gcc_unreachable ();
3191         }
3192       break;
3193
3194     default:
3195       gcc_unreachable ();
3196       break;
3197     }
3198 }
3199
3200 static unsigned
3201 bit_count (unsigned HOST_WIDE_INT value)
3202 {
3203   unsigned count = 0;
3204
3205   while (value)
3206     {
3207       count++;
3208       value &= value - 1;
3209     }
3210
3211   return count;
3212 }
3213
3214 void
3215 aarch64_print_operand (FILE *f, rtx x, char code)
3216 {
3217   switch (code)
3218     {
3219     case 'e':
3220       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3221       {
3222         int n;
3223
3224         if (GET_CODE (x) != CONST_INT
3225             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3226           {
3227             output_operand_lossage ("invalid operand for '%%%c'", code);
3228             return;
3229           }
3230
3231         switch (n)
3232           {
3233           case 3:
3234             fputc ('b', f);
3235             break;
3236           case 4:
3237             fputc ('h', f);
3238             break;
3239           case 5:
3240             fputc ('w', f);
3241             break;
3242           default:
3243             output_operand_lossage ("invalid operand for '%%%c'", code);
3244             return;
3245           }
3246       }
3247       break;
3248
3249     case 'p':
3250       {
3251         int n;
3252
3253         /* Print N such that 2^N == X.  */
3254         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3255           {
3256             output_operand_lossage ("invalid operand for '%%%c'", code);
3257             return;
3258           }
3259
3260         asm_fprintf (f, "%d", n);
3261       }
3262       break;
3263
3264     case 'P':
3265       /* Print the number of non-zero bits in X (a const_int).  */
3266       if (GET_CODE (x) != CONST_INT)
3267         {
3268           output_operand_lossage ("invalid operand for '%%%c'", code);
3269           return;
3270         }
3271
3272       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3273       break;
3274
3275     case 'H':
3276       /* Print the higher numbered register of a pair (TImode) of regs.  */
3277       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3278         {
3279           output_operand_lossage ("invalid operand for '%%%c'", code);
3280           return;
3281         }
3282
3283       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3284       break;
3285
3286     case 'Q':
3287       /* Print the least significant register of a pair (TImode) of regs.  */
3288       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3289         {
3290           output_operand_lossage ("invalid operand for '%%%c'", code);
3291           return;
3292         }
3293       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3294       break;
3295
3296     case 'R':
3297       /* Print the most significant register of a pair (TImode) of regs.  */
3298       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3299         {
3300           output_operand_lossage ("invalid operand for '%%%c'", code);
3301           return;
3302         }
3303       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3304       break;
3305
3306     case 'm':
3307       /* Print a condition (eq, ne, etc).  */
3308
3309       /* CONST_TRUE_RTX means always -- that's the default.  */
3310       if (x == const_true_rtx)
3311         return;
3312
3313       if (!COMPARISON_P (x))
3314         {
3315           output_operand_lossage ("invalid operand for '%%%c'", code);
3316           return;
3317         }
3318
3319       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3320       break;
3321
3322     case 'M':
3323       /* Print the inverse of a condition (eq <-> ne, etc).  */
3324
3325       /* CONST_TRUE_RTX means never -- that's the default.  */
3326       if (x == const_true_rtx)
3327         {
3328           fputs ("nv", f);
3329           return;
3330         }
3331
3332       if (!COMPARISON_P (x))
3333         {
3334           output_operand_lossage ("invalid operand for '%%%c'", code);
3335           return;
3336         }
3337
3338       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3339                                   (aarch64_get_condition_code (x))], f);
3340       break;
3341
3342     case 'b':
3343     case 'h':
3344     case 's':
3345     case 'd':
3346     case 'q':
3347       /* Print a scalar FP/SIMD register name.  */
3348       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3349         {
3350           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3351           return;
3352         }
3353       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3354       break;
3355
3356     case 'S':
3357     case 'T':
3358     case 'U':
3359     case 'V':
3360       /* Print the first FP/SIMD register name in a list.  */
3361       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3362         {
3363           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3364           return;
3365         }
3366       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3367       break;
3368
3369     case 'X':
3370       /* Print integer constant in hex.  */
3371       if (GET_CODE (x) != CONST_INT)
3372         {
3373           output_operand_lossage ("invalid operand for '%%%c'", code);
3374           return;
3375         }
3376       asm_fprintf (f, "0x%wx", UINTVAL (x));
3377       break;
3378
3379     case 'w':
3380     case 'x':
3381       /* Print a general register name or the zero register (32-bit or
3382          64-bit).  */
3383       if (x == const0_rtx
3384           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3385         {
3386           asm_fprintf (f, "%czr", code);
3387           break;
3388         }
3389
3390       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3391         {
3392           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3393           break;
3394         }
3395
3396       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3397         {
3398           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3399           break;
3400         }
3401
3402       /* Fall through */
3403
3404     case 0:
3405       /* Print a normal operand, if it's a general register, then we
3406          assume DImode.  */
3407       if (x == NULL)
3408         {
3409           output_operand_lossage ("missing operand");
3410           return;
3411         }
3412
3413       switch (GET_CODE (x))
3414         {
3415         case REG:
3416           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3417           break;
3418
3419         case MEM:
3420           aarch64_memory_reference_mode = GET_MODE (x);
3421           output_address (XEXP (x, 0));
3422           break;
3423
3424         case LABEL_REF:
3425         case SYMBOL_REF:
3426           output_addr_const (asm_out_file, x);
3427           break;
3428
3429         case CONST_INT:
3430           asm_fprintf (f, "%wd", INTVAL (x));
3431           break;
3432
3433         case CONST_VECTOR:
3434           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3435             {
3436               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3437                                                             HOST_WIDE_INT_MIN,
3438                                                             HOST_WIDE_INT_MAX));
3439               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3440             }
3441           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3442             {
3443               fputc ('0', f);
3444             }
3445           else
3446             gcc_unreachable ();
3447           break;
3448
3449         case CONST_DOUBLE:
3450           /* CONST_DOUBLE can represent a double-width integer.
3451              In this case, the mode of x is VOIDmode.  */
3452           if (GET_MODE (x) == VOIDmode)
3453             ; /* Do Nothing.  */
3454           else if (aarch64_float_const_zero_rtx_p (x))
3455             {
3456               fputc ('0', f);
3457               break;
3458             }
3459           else if (aarch64_float_const_representable_p (x))
3460             {
3461 #define buf_size 20
3462               char float_buf[buf_size] = {'\0'};
3463               REAL_VALUE_TYPE r;
3464               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3465               real_to_decimal_for_mode (float_buf, &r,
3466                                         buf_size, buf_size,
3467                                         1, GET_MODE (x));
3468               asm_fprintf (asm_out_file, "%s", float_buf);
3469               break;
3470 #undef buf_size
3471             }
3472           output_operand_lossage ("invalid constant");
3473           return;
3474         default:
3475           output_operand_lossage ("invalid operand");
3476           return;
3477         }
3478       break;
3479
3480     case 'A':
3481       if (GET_CODE (x) == HIGH)
3482         x = XEXP (x, 0);
3483
3484       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3485         {
3486         case SYMBOL_SMALL_GOT:
3487           asm_fprintf (asm_out_file, ":got:");
3488           break;
3489
3490         case SYMBOL_SMALL_TLSGD:
3491           asm_fprintf (asm_out_file, ":tlsgd:");
3492           break;
3493
3494         case SYMBOL_SMALL_TLSDESC:
3495           asm_fprintf (asm_out_file, ":tlsdesc:");
3496           break;
3497
3498         case SYMBOL_SMALL_GOTTPREL:
3499           asm_fprintf (asm_out_file, ":gottprel:");
3500           break;
3501
3502         case SYMBOL_SMALL_TPREL:
3503           asm_fprintf (asm_out_file, ":tprel:");
3504           break;
3505
3506         default:
3507           break;
3508         }
3509       output_addr_const (asm_out_file, x);
3510       break;
3511
3512     case 'L':
3513       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3514         {
3515         case SYMBOL_SMALL_GOT:
3516           asm_fprintf (asm_out_file, ":lo12:");
3517           break;
3518
3519         case SYMBOL_SMALL_TLSGD:
3520           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3521           break;
3522
3523         case SYMBOL_SMALL_TLSDESC:
3524           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3525           break;
3526
3527         case SYMBOL_SMALL_GOTTPREL:
3528           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3529           break;
3530
3531         case SYMBOL_SMALL_TPREL:
3532           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3533           break;
3534
3535         default:
3536           break;
3537         }
3538       output_addr_const (asm_out_file, x);
3539       break;
3540
3541     case 'G':
3542
3543       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3544         {
3545         case SYMBOL_SMALL_TPREL:
3546           asm_fprintf (asm_out_file, ":tprel_hi12:");
3547           break;
3548         default:
3549           break;
3550         }
3551       output_addr_const (asm_out_file, x);
3552       break;
3553
3554     default:
3555       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3556       return;
3557     }
3558 }
3559
3560 void
3561 aarch64_print_operand_address (FILE *f, rtx x)
3562 {
3563   struct aarch64_address_info addr;
3564
3565   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3566                              MEM, true))
3567     switch (addr.type)
3568       {
3569       case ADDRESS_REG_IMM:
3570         if (addr.offset == const0_rtx)
3571           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3572         else
3573           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3574                        INTVAL (addr.offset));
3575         return;
3576
3577       case ADDRESS_REG_REG:
3578         if (addr.shift == 0)
3579           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3580                        reg_names [REGNO (addr.offset)]);
3581         else
3582           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3583                        reg_names [REGNO (addr.offset)], addr.shift);
3584         return;
3585
3586       case ADDRESS_REG_UXTW:
3587         if (addr.shift == 0)
3588           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3589                        REGNO (addr.offset) - R0_REGNUM);
3590         else
3591           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3592                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3593         return;
3594
3595       case ADDRESS_REG_SXTW:
3596         if (addr.shift == 0)
3597           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3598                        REGNO (addr.offset) - R0_REGNUM);
3599         else
3600           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3601                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3602         return;
3603
3604       case ADDRESS_REG_WB:
3605         switch (GET_CODE (x))
3606           {
3607           case PRE_INC:
3608             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3609                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3610             return;
3611           case POST_INC:
3612             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3613                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3614             return;
3615           case PRE_DEC:
3616             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3617                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3618             return;
3619           case POST_DEC:
3620             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3621                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3622             return;
3623           case PRE_MODIFY:
3624             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3625                          INTVAL (addr.offset));
3626             return;
3627           case POST_MODIFY:
3628             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3629                          INTVAL (addr.offset));
3630             return;
3631           default:
3632             break;
3633           }
3634         break;
3635
3636       case ADDRESS_LO_SUM:
3637         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3638         output_addr_const (f, addr.offset);
3639         asm_fprintf (f, "]");
3640         return;
3641
3642       case ADDRESS_SYMBOLIC:
3643         break;
3644       }
3645
3646   output_addr_const (f, x);
3647 }
3648
3649 void
3650 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3651                            int labelno ATTRIBUTE_UNUSED)
3652 {
3653   sorry ("function profiling");
3654 }
3655
3656 bool
3657 aarch64_label_mentioned_p (rtx x)
3658 {
3659   const char *fmt;
3660   int i;
3661
3662   if (GET_CODE (x) == LABEL_REF)
3663     return true;
3664
3665   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3666      referencing instruction, but they are constant offsets, not
3667      symbols.  */
3668   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3669     return false;
3670
3671   fmt = GET_RTX_FORMAT (GET_CODE (x));
3672   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3673     {
3674       if (fmt[i] == 'E')
3675         {
3676           int j;
3677
3678           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3679             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3680               return 1;
3681         }
3682       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3683         return 1;
3684     }
3685
3686   return 0;
3687 }
3688
3689 /* Implement REGNO_REG_CLASS.  */
3690
3691 enum reg_class
3692 aarch64_regno_regclass (unsigned regno)
3693 {
3694   if (GP_REGNUM_P (regno))
3695     return CORE_REGS;
3696
3697   if (regno == SP_REGNUM)
3698     return STACK_REG;
3699
3700   if (regno == FRAME_POINTER_REGNUM
3701       || regno == ARG_POINTER_REGNUM)
3702     return CORE_REGS;
3703
3704   if (FP_REGNUM_P (regno))
3705     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3706
3707   return NO_REGS;
3708 }
3709
3710 /* Try a machine-dependent way of reloading an illegitimate address
3711    operand.  If we find one, push the reload and return the new rtx.  */
3712
3713 rtx
3714 aarch64_legitimize_reload_address (rtx *x_p,
3715                                    enum machine_mode mode,
3716                                    int opnum, int type,
3717                                    int ind_levels ATTRIBUTE_UNUSED)
3718 {
3719   rtx x = *x_p;
3720
3721   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3722   if (aarch64_vector_mode_p (mode)
3723       && GET_CODE (x) == PLUS
3724       && REG_P (XEXP (x, 0))
3725       && CONST_INT_P (XEXP (x, 1)))
3726     {
3727       rtx orig_rtx = x;
3728       x = copy_rtx (x);
3729       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3730                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3731                    opnum, (enum reload_type) type);
3732       return x;
3733     }
3734
3735   /* We must recognize output that we have already generated ourselves.  */
3736   if (GET_CODE (x) == PLUS
3737       && GET_CODE (XEXP (x, 0)) == PLUS
3738       && REG_P (XEXP (XEXP (x, 0), 0))
3739       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3740       && CONST_INT_P (XEXP (x, 1)))
3741     {
3742       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3743                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3744                    opnum, (enum reload_type) type);
3745       return x;
3746     }
3747
3748   /* We wish to handle large displacements off a base register by splitting
3749      the addend across an add and the mem insn.  This can cut the number of
3750      extra insns needed from 3 to 1.  It is only useful for load/store of a
3751      single register with 12 bit offset field.  */
3752   if (GET_CODE (x) == PLUS
3753       && REG_P (XEXP (x, 0))
3754       && CONST_INT_P (XEXP (x, 1))
3755       && HARD_REGISTER_P (XEXP (x, 0))
3756       && mode != TImode
3757       && mode != TFmode
3758       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3759     {
3760       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3761       HOST_WIDE_INT low = val & 0xfff;
3762       HOST_WIDE_INT high = val - low;
3763       HOST_WIDE_INT offs;
3764       rtx cst;
3765
3766       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3767          BLKmode alignment.  */
3768       if (GET_MODE_SIZE (mode) == 0)
3769         return NULL_RTX;
3770
3771       offs = low % GET_MODE_SIZE (mode);
3772
3773       /* Align misaligned offset by adjusting high part to compensate.  */
3774       if (offs != 0)
3775         {
3776           if (aarch64_uimm12_shift (high + offs))
3777             {
3778               /* Align down.  */
3779               low = low - offs;
3780               high = high + offs;
3781             }
3782           else
3783             {
3784               /* Align up.  */
3785               offs = GET_MODE_SIZE (mode) - offs;
3786               low = low + offs;
3787               high = high + (low & 0x1000) - offs;
3788               low &= 0xfff;
3789             }
3790         }
3791
3792       /* Check for overflow.  */
3793       if (high + low != val)
3794         return NULL_RTX;
3795
3796       cst = GEN_INT (high);
3797       if (!aarch64_uimm12_shift (high))
3798         cst = force_const_mem (Pmode, cst);
3799
3800       /* Reload high part into base reg, leaving the low part
3801          in the mem instruction.  */
3802       x = gen_rtx_PLUS (Pmode,
3803                         gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3804                         GEN_INT (low));
3805
3806       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3807                    BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3808                    opnum, (enum reload_type) type);
3809       return x;
3810     }
3811
3812   return NULL_RTX;
3813 }
3814
3815
3816 static reg_class_t
3817 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3818                           reg_class_t rclass,
3819                           enum machine_mode mode,
3820                           secondary_reload_info *sri)
3821 {
3822   /* Address expressions of the form PLUS (SP, large_offset) need two
3823      scratch registers, one for the constant, and one for holding a
3824      copy of SP, since SP cannot be used on the RHS of an add-reg
3825      instruction.  */
3826   if (mode == DImode
3827       && GET_CODE (x) == PLUS
3828       && XEXP (x, 0) == stack_pointer_rtx
3829       && CONST_INT_P (XEXP (x, 1))
3830       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3831     {
3832       sri->icode = CODE_FOR_reload_sp_immediate;
3833       return NO_REGS;
3834     }
3835
3836   /* Without the TARGET_SIMD instructions we cannot move a Q register
3837      to a Q register directly.  We need a scratch.  */
3838   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3839       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3840       && reg_class_subset_p (rclass, FP_REGS))
3841     {
3842       if (mode == TFmode)
3843         sri->icode = CODE_FOR_aarch64_reload_movtf;
3844       else if (mode == TImode)
3845         sri->icode = CODE_FOR_aarch64_reload_movti;
3846       return NO_REGS;
3847     }
3848
3849   /* A TFmode or TImode memory access should be handled via an FP_REGS
3850      because AArch64 has richer addressing modes for LDR/STR instructions
3851      than LDP/STP instructions.  */
3852   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3853       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3854     return FP_REGS;
3855
3856   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3857       return CORE_REGS;
3858
3859   return NO_REGS;
3860 }
3861
3862 static bool
3863 aarch64_can_eliminate (const int from, const int to)
3864 {
3865   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3866      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3867
3868   if (frame_pointer_needed)
3869     {
3870       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3871         return true;
3872       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3873         return false;
3874       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3875           && !cfun->calls_alloca)
3876         return true;
3877       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3878         return true;
3879     return false;
3880     }
3881   else
3882     {
3883       /* If we decided that we didn't need a leaf frame pointer but then used
3884          LR in the function, then we'll want a frame pointer after all, so
3885          prevent this elimination to ensure a frame pointer is used.
3886
3887          NOTE: the original value of flag_omit_frame_pointer gets trashed
3888          IFF flag_omit_leaf_frame_pointer is true, so we check the value
3889          of faked_omit_frame_pointer here (which is true when we always
3890          wish to keep non-leaf frame pointers but only wish to keep leaf frame
3891          pointers when LR is clobbered).  */
3892       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3893           && df_regs_ever_live_p (LR_REGNUM)
3894           && faked_omit_frame_pointer)
3895         return false;
3896     }
3897
3898   return true;
3899 }
3900
3901 HOST_WIDE_INT
3902 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3903 {
3904   HOST_WIDE_INT frame_size;
3905   HOST_WIDE_INT offset;
3906
3907   aarch64_layout_frame ();
3908   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3909                 + crtl->outgoing_args_size
3910                 + cfun->machine->saved_varargs_size);
3911
3912    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3913    offset = frame_size;
3914
3915    if (to == HARD_FRAME_POINTER_REGNUM)
3916      {
3917        if (from == ARG_POINTER_REGNUM)
3918          return offset - crtl->outgoing_args_size;
3919
3920        if (from == FRAME_POINTER_REGNUM)
3921          return cfun->machine->frame.saved_regs_size;
3922      }
3923
3924    if (to == STACK_POINTER_REGNUM)
3925      {
3926        if (from == FRAME_POINTER_REGNUM)
3927          {
3928            HOST_WIDE_INT elim = crtl->outgoing_args_size
3929                               + cfun->machine->frame.saved_regs_size
3930                               - cfun->machine->frame.fp_lr_offset;
3931            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3932            return elim;
3933          }
3934      }
3935
3936    return offset;
3937 }
3938
3939
3940 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3941    previous frame.  */
3942
3943 rtx
3944 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3945 {
3946   if (count != 0)
3947     return const0_rtx;
3948   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3949 }
3950
3951
3952 static void
3953 aarch64_asm_trampoline_template (FILE *f)
3954 {
3955   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3956   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3957   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3958   assemble_aligned_integer (4, const0_rtx);
3959   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3960   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3961 }
3962
3963 unsigned
3964 aarch64_trampoline_size (void)
3965 {
3966   return 32;  /* 3 insns + padding + 2 dwords.  */
3967 }
3968
3969 static void
3970 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3971 {
3972   rtx fnaddr, mem, a_tramp;
3973
3974   /* Don't need to copy the trailing D-words, we fill those in below.  */
3975   emit_block_move (m_tramp, assemble_trampoline_template (),
3976                    GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3977   mem = adjust_address (m_tramp, DImode, 16);
3978   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3979   emit_move_insn (mem, fnaddr);
3980
3981   mem = adjust_address (m_tramp, DImode, 24);
3982   emit_move_insn (mem, chain_value);
3983
3984   /* XXX We should really define a "clear_cache" pattern and use
3985      gen_clear_cache().  */
3986   a_tramp = XEXP (m_tramp, 0);
3987   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3988                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3989                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3990 }
3991
3992 static unsigned char
3993 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3994 {
3995   switch (regclass)
3996     {
3997     case CORE_REGS:
3998     case POINTER_REGS:
3999     case GENERAL_REGS:
4000     case ALL_REGS:
4001     case FP_REGS:
4002     case FP_LO_REGS:
4003       return
4004         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4005                                        (GET_MODE_SIZE (mode) + 7) / 8;
4006     case STACK_REG:
4007       return 1;
4008
4009     case NO_REGS:
4010       return 0;
4011
4012     default:
4013       break;
4014     }
4015   gcc_unreachable ();
4016 }
4017
4018 static reg_class_t
4019 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4020 {
4021   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4022           ? GENERAL_REGS : regclass);
4023 }
4024
4025 void
4026 aarch64_asm_output_labelref (FILE* f, const char *name)
4027 {
4028   asm_fprintf (f, "%U%s", name);
4029 }
4030
4031 static void
4032 aarch64_elf_asm_constructor (rtx symbol, int priority)
4033 {
4034   if (priority == DEFAULT_INIT_PRIORITY)
4035     default_ctor_section_asm_out_constructor (symbol, priority);
4036   else
4037     {
4038       section *s;
4039       char buf[18];
4040       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4041       s = get_section (buf, SECTION_WRITE, NULL);
4042       switch_to_section (s);
4043       assemble_align (POINTER_SIZE);
4044       fputs ("\t.dword\t", asm_out_file);
4045       output_addr_const (asm_out_file, symbol);
4046       fputc ('\n', asm_out_file);
4047     }
4048 }
4049
4050 static void
4051 aarch64_elf_asm_destructor (rtx symbol, int priority)
4052 {
4053   if (priority == DEFAULT_INIT_PRIORITY)
4054     default_dtor_section_asm_out_destructor (symbol, priority);
4055   else
4056     {
4057       section *s;
4058       char buf[18];
4059       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4060       s = get_section (buf, SECTION_WRITE, NULL);
4061       switch_to_section (s);
4062       assemble_align (POINTER_SIZE);
4063       fputs ("\t.dword\t", asm_out_file);
4064       output_addr_const (asm_out_file, symbol);
4065       fputc ('\n', asm_out_file);
4066     }
4067 }
4068
4069 const char*
4070 aarch64_output_casesi (rtx *operands)
4071 {
4072   char buf[100];
4073   char label[100];
4074   rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4075   int index;
4076   static const char *const patterns[4][2] =
4077   {
4078     {
4079       "ldrb\t%w3, [%0,%w1,uxtw]",
4080       "add\t%3, %4, %w3, sxtb #2"
4081     },
4082     {
4083       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4084       "add\t%3, %4, %w3, sxth #2"
4085     },
4086     {
4087       "ldr\t%w3, [%0,%w1,uxtw #2]",
4088       "add\t%3, %4, %w3, sxtw #2"
4089     },
4090     /* We assume that DImode is only generated when not optimizing and
4091        that we don't really need 64-bit address offsets.  That would
4092        imply an object file with 8GB of code in a single function!  */
4093     {
4094       "ldr\t%w3, [%0,%w1,uxtw #2]",
4095       "add\t%3, %4, %w3, sxtw #2"
4096     }
4097   };
4098
4099   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4100
4101   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4102
4103   gcc_assert (index >= 0 && index <= 3);
4104
4105   /* Need to implement table size reduction, by chaning the code below.  */
4106   output_asm_insn (patterns[index][0], operands);
4107   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4108   snprintf (buf, sizeof (buf),
4109             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4110   output_asm_insn (buf, operands);
4111   output_asm_insn (patterns[index][1], operands);
4112   output_asm_insn ("br\t%3", operands);
4113   assemble_label (asm_out_file, label);
4114   return "";
4115 }
4116
4117
4118 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4119    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4120    operator.  */
4121
4122 int
4123 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4124 {
4125   if (shift >= 0 && shift <= 3)
4126     {
4127       int size;
4128       for (size = 8; size <= 32; size *= 2)
4129         {
4130           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4131           if (mask == bits << shift)
4132             return size;
4133         }
4134     }
4135   return 0;
4136 }
4137
4138 static bool
4139 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4140                                    const_rtx x ATTRIBUTE_UNUSED)
4141 {
4142   /* We can't use blocks for constants when we're using a per-function
4143      constant pool.  */
4144   return false;
4145 }
4146
4147 static section *
4148 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4149                             rtx x ATTRIBUTE_UNUSED,
4150                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4151 {
4152   /* Force all constant pool entries into the current function section.  */
4153   return function_section (current_function_decl);
4154 }
4155
4156
4157 /* Costs.  */
4158
4159 /* Helper function for rtx cost calculation.  Strip a shift expression
4160    from X.  Returns the inner operand if successful, or the original
4161    expression on failure.  */
4162 static rtx
4163 aarch64_strip_shift (rtx x)
4164 {
4165   rtx op = x;
4166
4167   if ((GET_CODE (op) == ASHIFT
4168        || GET_CODE (op) == ASHIFTRT
4169        || GET_CODE (op) == LSHIFTRT)
4170       && CONST_INT_P (XEXP (op, 1)))
4171     return XEXP (op, 0);
4172
4173   if (GET_CODE (op) == MULT
4174       && CONST_INT_P (XEXP (op, 1))
4175       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4176     return XEXP (op, 0);
4177
4178   return x;
4179 }
4180
4181 /* Helper function for rtx cost calculation.  Strip a shift or extend
4182    expression from X.  Returns the inner operand if successful, or the
4183    original expression on failure.  We deal with a number of possible
4184    canonicalization variations here.  */
4185 static rtx
4186 aarch64_strip_shift_or_extend (rtx x)
4187 {
4188   rtx op = x;
4189
4190   /* Zero and sign extraction of a widened value.  */
4191   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4192       && XEXP (op, 2) == const0_rtx
4193       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4194                                          XEXP (op, 1)))
4195     return XEXP (XEXP (op, 0), 0);
4196
4197   /* It can also be represented (for zero-extend) as an AND with an
4198      immediate.  */
4199   if (GET_CODE (op) == AND
4200       && GET_CODE (XEXP (op, 0)) == MULT
4201       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4202       && CONST_INT_P (XEXP (op, 1))
4203       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4204                            INTVAL (XEXP (op, 1))) != 0)
4205     return XEXP (XEXP (op, 0), 0);
4206
4207   /* Now handle extended register, as this may also have an optional
4208      left shift by 1..4.  */
4209   if (GET_CODE (op) == ASHIFT
4210       && CONST_INT_P (XEXP (op, 1))
4211       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4212     op = XEXP (op, 0);
4213
4214   if (GET_CODE (op) == ZERO_EXTEND
4215       || GET_CODE (op) == SIGN_EXTEND)
4216     op = XEXP (op, 0);
4217
4218   if (op != x)
4219     return op;
4220
4221   return aarch64_strip_shift (x);
4222 }
4223
4224 /* Calculate the cost of calculating X, storing it in *COST.  Result
4225    is true if the total cost of the operation has now been calculated.  */
4226 static bool
4227 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4228                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4229 {
4230   rtx op0, op1;
4231   const struct cpu_rtx_cost_table *extra_cost
4232     = aarch64_tune_params->insn_extra_cost;
4233
4234   switch (code)
4235     {
4236     case SET:
4237       op0 = SET_DEST (x);
4238       op1 = SET_SRC (x);
4239
4240       switch (GET_CODE (op0))
4241         {
4242         case MEM:
4243           if (speed)
4244             *cost += extra_cost->memory_store;
4245
4246           if (op1 != const0_rtx)
4247             *cost += rtx_cost (op1, SET, 1, speed);
4248           return true;
4249
4250         case SUBREG:
4251           if (! REG_P (SUBREG_REG (op0)))
4252             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4253           /* Fall through.  */
4254         case REG:
4255           /* Cost is just the cost of the RHS of the set.  */
4256           *cost += rtx_cost (op1, SET, 1, true);
4257           return true;
4258
4259         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4260         case SIGN_EXTRACT:
4261           /* Strip any redundant widening of the RHS to meet the width of
4262              the target.  */
4263           if (GET_CODE (op1) == SUBREG)
4264             op1 = SUBREG_REG (op1);
4265           if ((GET_CODE (op1) == ZERO_EXTEND
4266                || GET_CODE (op1) == SIGN_EXTEND)
4267               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4268               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4269                   >= INTVAL (XEXP (op0, 1))))
4270             op1 = XEXP (op1, 0);
4271           *cost += rtx_cost (op1, SET, 1, speed);
4272           return true;
4273
4274         default:
4275           break;
4276         }
4277       return false;
4278
4279     case MEM:
4280       if (speed)
4281         *cost += extra_cost->memory_load;
4282
4283       return true;
4284
4285     case NEG:
4286       op0 = CONST0_RTX (GET_MODE (x));
4287       op1 = XEXP (x, 0);
4288       goto cost_minus;
4289
4290     case COMPARE:
4291       op0 = XEXP (x, 0);
4292       op1 = XEXP (x, 1);
4293
4294       if (op1 == const0_rtx
4295           && GET_CODE (op0) == AND)
4296         {
4297           x = op0;
4298           goto cost_logic;
4299         }
4300
4301       /* Comparisons can work if the order is swapped.
4302          Canonicalization puts the more complex operation first, but
4303          we want it in op1.  */
4304       if (! (REG_P (op0)
4305              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4306         {
4307           op0 = XEXP (x, 1);
4308           op1 = XEXP (x, 0);
4309         }
4310       goto cost_minus;
4311
4312     case MINUS:
4313       op0 = XEXP (x, 0);
4314       op1 = XEXP (x, 1);
4315
4316     cost_minus:
4317       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4318           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4319               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4320         {
4321           if (op0 != const0_rtx)
4322             *cost += rtx_cost (op0, MINUS, 0, speed);
4323
4324           if (CONST_INT_P (op1))
4325             {
4326               if (!aarch64_uimm12_shift (INTVAL (op1)))
4327                 *cost += rtx_cost (op1, MINUS, 1, speed);
4328             }
4329           else
4330             {
4331               op1 = aarch64_strip_shift_or_extend (op1);
4332               *cost += rtx_cost (op1, MINUS, 1, speed);
4333             }
4334           return true;
4335         }
4336
4337       return false;
4338
4339     case PLUS:
4340       op0 = XEXP (x, 0);
4341       op1 = XEXP (x, 1);
4342
4343       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4344         {
4345           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4346             {
4347               *cost += rtx_cost (op0, PLUS, 0, speed);
4348             }
4349           else
4350             {
4351               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4352
4353               if (new_op0 == op0
4354                   && GET_CODE (op0) == MULT)
4355                 {
4356                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4357                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4358                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4359                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4360                     {
4361                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4362                                           speed)
4363                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4364                                             speed)
4365                                 + rtx_cost (op1, PLUS, 1, speed));
4366                       if (speed)
4367                         *cost += extra_cost->int_multiply_extend_add;
4368                       return true;
4369                     }
4370                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4371                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4372                             + rtx_cost (op1, PLUS, 1, speed));
4373
4374                   if (speed)
4375                     *cost += extra_cost->int_multiply_add;
4376                 }
4377
4378               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4379                         + rtx_cost (op1, PLUS, 1, speed));
4380             }
4381           return true;
4382         }
4383
4384       return false;
4385
4386     case IOR:
4387     case XOR:
4388     case AND:
4389     cost_logic:
4390       op0 = XEXP (x, 0);
4391       op1 = XEXP (x, 1);
4392
4393       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4394         {
4395           if (CONST_INT_P (op1)
4396               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4397             {
4398               *cost += rtx_cost (op0, AND, 0, speed);
4399             }
4400           else
4401             {
4402               if (GET_CODE (op0) == NOT)
4403                 op0 = XEXP (op0, 0);
4404               op0 = aarch64_strip_shift (op0);
4405               *cost += (rtx_cost (op0, AND, 0, speed)
4406                         + rtx_cost (op1, AND, 1, speed));
4407             }
4408           return true;
4409         }
4410       return false;
4411
4412     case ZERO_EXTEND:
4413       if ((GET_MODE (x) == DImode
4414            && GET_MODE (XEXP (x, 0)) == SImode)
4415           || GET_CODE (XEXP (x, 0)) == MEM)
4416         {
4417           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4418           return true;
4419         }
4420       return false;
4421
4422     case SIGN_EXTEND:
4423       if (GET_CODE (XEXP (x, 0)) == MEM)
4424         {
4425           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4426           return true;
4427         }
4428       return false;
4429
4430     case ROTATE:
4431       if (!CONST_INT_P (XEXP (x, 1)))
4432         *cost += COSTS_N_INSNS (2);
4433       /* Fall through.  */
4434     case ROTATERT:
4435     case LSHIFTRT:
4436     case ASHIFT:
4437     case ASHIFTRT:
4438
4439       /* Shifting by a register often takes an extra cycle.  */
4440       if (speed && !CONST_INT_P (XEXP (x, 1)))
4441         *cost += extra_cost->register_shift;
4442
4443       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4444       return true;
4445
4446     case HIGH:
4447       if (!CONSTANT_P (XEXP (x, 0)))
4448         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4449       return true;
4450
4451     case LO_SUM:
4452       if (!CONSTANT_P (XEXP (x, 1)))
4453         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4454       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4455       return true;
4456
4457     case ZERO_EXTRACT:
4458     case SIGN_EXTRACT:
4459       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4460       return true;
4461
4462     case MULT:
4463       op0 = XEXP (x, 0);
4464       op1 = XEXP (x, 1);
4465
4466       *cost = COSTS_N_INSNS (1);
4467       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4468         {
4469           if (CONST_INT_P (op1)
4470               && exact_log2 (INTVAL (op1)) > 0)
4471             {
4472               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4473               return true;
4474             }
4475
4476           if ((GET_CODE (op0) == ZERO_EXTEND
4477                && GET_CODE (op1) == ZERO_EXTEND)
4478               || (GET_CODE (op0) == SIGN_EXTEND
4479                   && GET_CODE (op1) == SIGN_EXTEND))
4480             {
4481               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4482                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4483               if (speed)
4484                 *cost += extra_cost->int_multiply_extend;
4485               return true;
4486             }
4487
4488           if (speed)
4489             *cost += extra_cost->int_multiply;
4490         }
4491       else if (speed)
4492         {
4493           if (GET_MODE (x) == DFmode)
4494             *cost += extra_cost->double_multiply;
4495           else if (GET_MODE (x) == SFmode)
4496             *cost += extra_cost->float_multiply;
4497         }
4498
4499       return false;  /* All arguments need to be in registers.  */
4500
4501     case MOD:
4502     case UMOD:
4503       *cost = COSTS_N_INSNS (2);
4504       if (speed)
4505         {
4506           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507             *cost += (extra_cost->int_multiply_add
4508                       + extra_cost->int_divide);
4509           else if (GET_MODE (x) == DFmode)
4510             *cost += (extra_cost->double_multiply
4511                       + extra_cost->double_divide);
4512           else if (GET_MODE (x) == SFmode)
4513             *cost += (extra_cost->float_multiply
4514                       + extra_cost->float_divide);
4515         }
4516       return false;  /* All arguments need to be in registers.  */
4517
4518     case DIV:
4519     case UDIV:
4520       *cost = COSTS_N_INSNS (1);
4521       if (speed)
4522         {
4523           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4524             *cost += extra_cost->int_divide;
4525           else if (GET_MODE (x) == DFmode)
4526             *cost += extra_cost->double_divide;
4527           else if (GET_MODE (x) == SFmode)
4528             *cost += extra_cost->float_divide;
4529         }
4530       return false;  /* All arguments need to be in registers.  */
4531
4532     default:
4533       break;
4534     }
4535   return false;
4536 }
4537
4538 static int
4539 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4540                   enum machine_mode mode ATTRIBUTE_UNUSED,
4541                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4542 {
4543   enum rtx_code c  = GET_CODE (x);
4544   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4545
4546   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4547     return addr_cost->pre_modify;
4548
4549   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4550     return addr_cost->post_modify;
4551
4552   if (c == PLUS)
4553     {
4554       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4555         return addr_cost->imm_offset;
4556       else if (GET_CODE (XEXP (x, 0)) == MULT
4557                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4558                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4559         return addr_cost->register_extend;
4560
4561       return addr_cost->register_offset;
4562     }
4563   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4564     return addr_cost->imm_offset;
4565
4566   return 0;
4567 }
4568
4569 static int
4570 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4571                             reg_class_t from, reg_class_t to)
4572 {
4573   const struct cpu_regmove_cost *regmove_cost
4574     = aarch64_tune_params->regmove_cost;
4575
4576   if (from == GENERAL_REGS && to == GENERAL_REGS)
4577     return regmove_cost->GP2GP;
4578   else if (from == GENERAL_REGS)
4579     return regmove_cost->GP2FP;
4580   else if (to == GENERAL_REGS)
4581     return regmove_cost->FP2GP;
4582
4583   /* When AdvSIMD instructions are disabled it is not possible to move
4584      a 128-bit value directly between Q registers.  This is handled in
4585      secondary reload.  A general register is used as a scratch to move
4586      the upper DI value and the lower DI value is moved directly,
4587      hence the cost is the sum of three moves. */
4588
4589   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4590     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4591
4592   return regmove_cost->FP2FP;
4593 }
4594
4595 static int
4596 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4597                           reg_class_t rclass ATTRIBUTE_UNUSED,
4598                           bool in ATTRIBUTE_UNUSED)
4599 {
4600   return aarch64_tune_params->memmov_cost;
4601 }
4602
4603 static void initialize_aarch64_code_model (void);
4604
4605 /* Parse the architecture extension string.  */
4606
4607 static void
4608 aarch64_parse_extension (char *str)
4609 {
4610   /* The extension string is parsed left to right.  */
4611   const struct aarch64_option_extension *opt = NULL;
4612
4613   /* Flag to say whether we are adding or removing an extension.  */
4614   int adding_ext = -1;
4615
4616   while (str != NULL && *str != 0)
4617     {
4618       char *ext;
4619       size_t len;
4620
4621       str++;
4622       ext = strchr (str, '+');
4623
4624       if (ext != NULL)
4625         len = ext - str;
4626       else
4627         len = strlen (str);
4628
4629       if (len >= 2 && strncmp (str, "no", 2) == 0)
4630         {
4631           adding_ext = 0;
4632           len -= 2;
4633           str += 2;
4634         }
4635       else if (len > 0)
4636         adding_ext = 1;
4637
4638       if (len == 0)
4639         {
4640           error ("missing feature modifier after %qs", "+no");
4641           return;
4642         }
4643
4644       /* Scan over the extensions table trying to find an exact match.  */
4645       for (opt = all_extensions; opt->name != NULL; opt++)
4646         {
4647           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4648             {
4649               /* Add or remove the extension.  */
4650               if (adding_ext)
4651                 aarch64_isa_flags |= opt->flags_on;
4652               else
4653                 aarch64_isa_flags &= ~(opt->flags_off);
4654               break;
4655             }
4656         }
4657
4658       if (opt->name == NULL)
4659         {
4660           /* Extension not found in list.  */
4661           error ("unknown feature modifier %qs", str);
4662           return;
4663         }
4664
4665       str = ext;
4666     };
4667
4668   return;
4669 }
4670
4671 /* Parse the ARCH string.  */
4672
4673 static void
4674 aarch64_parse_arch (void)
4675 {
4676   char *ext;
4677   const struct processor *arch;
4678   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4679   size_t len;
4680
4681   strcpy (str, aarch64_arch_string);
4682
4683   ext = strchr (str, '+');
4684
4685   if (ext != NULL)
4686     len = ext - str;
4687   else
4688     len = strlen (str);
4689
4690   if (len == 0)
4691     {
4692       error ("missing arch name in -march=%qs", str);
4693       return;
4694     }
4695
4696   /* Loop through the list of supported ARCHs to find a match.  */
4697   for (arch = all_architectures; arch->name != NULL; arch++)
4698     {
4699       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4700         {
4701           selected_arch = arch;
4702           aarch64_isa_flags = selected_arch->flags;
4703           selected_cpu = &all_cores[selected_arch->core];
4704
4705           if (ext != NULL)
4706             {
4707               /* ARCH string contains at least one extension.  */
4708               aarch64_parse_extension (ext);
4709             }
4710
4711           return;
4712         }
4713     }
4714
4715   /* ARCH name not found in list.  */
4716   error ("unknown value %qs for -march", str);
4717   return;
4718 }
4719
4720 /* Parse the CPU string.  */
4721
4722 static void
4723 aarch64_parse_cpu (void)
4724 {
4725   char *ext;
4726   const struct processor *cpu;
4727   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4728   size_t len;
4729
4730   strcpy (str, aarch64_cpu_string);
4731
4732   ext = strchr (str, '+');
4733
4734   if (ext != NULL)
4735     len = ext - str;
4736   else
4737     len = strlen (str);
4738
4739   if (len == 0)
4740     {
4741       error ("missing cpu name in -mcpu=%qs", str);
4742       return;
4743     }
4744
4745   /* Loop through the list of supported CPUs to find a match.  */
4746   for (cpu = all_cores; cpu->name != NULL; cpu++)
4747     {
4748       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4749         {
4750           selected_cpu = cpu;
4751           aarch64_isa_flags = selected_cpu->flags;
4752
4753           if (ext != NULL)
4754             {
4755               /* CPU string contains at least one extension.  */
4756               aarch64_parse_extension (ext);
4757             }
4758
4759           return;
4760         }
4761     }
4762
4763   /* CPU name not found in list.  */
4764   error ("unknown value %qs for -mcpu", str);
4765   return;
4766 }
4767
4768 /* Parse the TUNE string.  */
4769
4770 static void
4771 aarch64_parse_tune (void)
4772 {
4773   const struct processor *cpu;
4774   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4775   strcpy (str, aarch64_tune_string);
4776
4777   /* Loop through the list of supported CPUs to find a match.  */
4778   for (cpu = all_cores; cpu->name != NULL; cpu++)
4779     {
4780       if (strcmp (cpu->name, str) == 0)
4781         {
4782           selected_tune = cpu;
4783           return;
4784         }
4785     }
4786
4787   /* CPU name not found in list.  */
4788   error ("unknown value %qs for -mtune", str);
4789   return;
4790 }
4791
4792
4793 /* Implement TARGET_OPTION_OVERRIDE.  */
4794
4795 static void
4796 aarch64_override_options (void)
4797 {
4798   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4799      otherwise march remains undefined.  mtune can be used with either march or
4800      mcpu.  */
4801
4802   if (aarch64_arch_string)
4803     {
4804       aarch64_parse_arch ();
4805       aarch64_cpu_string = NULL;
4806     }
4807
4808   if (aarch64_cpu_string)
4809     {
4810       aarch64_parse_cpu ();
4811       selected_arch = NULL;
4812     }
4813
4814   if (aarch64_tune_string)
4815     {
4816       aarch64_parse_tune ();
4817     }
4818
4819   initialize_aarch64_code_model ();
4820
4821   aarch64_build_bitmask_table ();
4822
4823   /* This target defaults to strict volatile bitfields.  */
4824   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4825     flag_strict_volatile_bitfields = 1;
4826
4827   /* If the user did not specify a processor, choose the default
4828      one for them.  This will be the CPU set during configuration using
4829      --with-cpu, otherwise it is "generic".  */
4830   if (!selected_cpu)
4831     {
4832       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4833       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4834     }
4835
4836   gcc_assert (selected_cpu);
4837
4838   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4839   if (!selected_tune)
4840     selected_tune = &all_cores[selected_cpu->core];
4841
4842   aarch64_tune_flags = selected_tune->flags;
4843   aarch64_tune = selected_tune->core;
4844   aarch64_tune_params = selected_tune->tune;
4845
4846   aarch64_override_options_after_change ();
4847 }
4848
4849 /* Implement targetm.override_options_after_change.  */
4850
4851 static void
4852 aarch64_override_options_after_change (void)
4853 {
4854   faked_omit_frame_pointer = false;
4855
4856   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4857      that aarch64_frame_pointer_required will be called.  We need to remember
4858      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4859
4860   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4861     {
4862       flag_omit_frame_pointer = true;
4863       faked_omit_frame_pointer = true;
4864     }
4865 }
4866
4867 static struct machine_function *
4868 aarch64_init_machine_status (void)
4869 {
4870   struct machine_function *machine;
4871   machine = ggc_alloc_cleared_machine_function ();
4872   return machine;
4873 }
4874
4875 void
4876 aarch64_init_expanders (void)
4877 {
4878   init_machine_status = aarch64_init_machine_status;
4879 }
4880
4881 /* A checking mechanism for the implementation of the various code models.  */
4882 static void
4883 initialize_aarch64_code_model (void)
4884 {
4885    if (flag_pic)
4886      {
4887        switch (aarch64_cmodel_var)
4888          {
4889          case AARCH64_CMODEL_TINY:
4890            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4891            break;
4892          case AARCH64_CMODEL_SMALL:
4893            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4894            break;
4895          case AARCH64_CMODEL_LARGE:
4896            sorry ("code model %qs with -f%s", "large",
4897                   flag_pic > 1 ? "PIC" : "pic");
4898          default:
4899            gcc_unreachable ();
4900          }
4901      }
4902    else
4903      aarch64_cmodel = aarch64_cmodel_var;
4904 }
4905
4906 /* Return true if SYMBOL_REF X binds locally.  */
4907
4908 static bool
4909 aarch64_symbol_binds_local_p (const_rtx x)
4910 {
4911   return (SYMBOL_REF_DECL (x)
4912           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4913           : SYMBOL_REF_LOCAL_P (x));
4914 }
4915
4916 /* Return true if SYMBOL_REF X is thread local */
4917 static bool
4918 aarch64_tls_symbol_p (rtx x)
4919 {
4920   if (! TARGET_HAVE_TLS)
4921     return false;
4922
4923   if (GET_CODE (x) != SYMBOL_REF)
4924     return false;
4925
4926   return SYMBOL_REF_TLS_MODEL (x) != 0;
4927 }
4928
4929 /* Classify a TLS symbol into one of the TLS kinds.  */
4930 enum aarch64_symbol_type
4931 aarch64_classify_tls_symbol (rtx x)
4932 {
4933   enum tls_model tls_kind = tls_symbolic_operand_type (x);
4934
4935   switch (tls_kind)
4936     {
4937     case TLS_MODEL_GLOBAL_DYNAMIC:
4938     case TLS_MODEL_LOCAL_DYNAMIC:
4939       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4940
4941     case TLS_MODEL_INITIAL_EXEC:
4942       return SYMBOL_SMALL_GOTTPREL;
4943
4944     case TLS_MODEL_LOCAL_EXEC:
4945       return SYMBOL_SMALL_TPREL;
4946
4947     case TLS_MODEL_EMULATED:
4948     case TLS_MODEL_NONE:
4949       return SYMBOL_FORCE_TO_MEM;
4950
4951     default:
4952       gcc_unreachable ();
4953     }
4954 }
4955
4956 /* Return the method that should be used to access SYMBOL_REF or
4957    LABEL_REF X in context CONTEXT.  */
4958 enum aarch64_symbol_type
4959 aarch64_classify_symbol (rtx x,
4960                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4961 {
4962   if (GET_CODE (x) == LABEL_REF)
4963     {
4964       switch (aarch64_cmodel)
4965         {
4966         case AARCH64_CMODEL_LARGE:
4967           return SYMBOL_FORCE_TO_MEM;
4968
4969         case AARCH64_CMODEL_TINY_PIC:
4970         case AARCH64_CMODEL_TINY:
4971         case AARCH64_CMODEL_SMALL_PIC:
4972         case AARCH64_CMODEL_SMALL:
4973           return SYMBOL_SMALL_ABSOLUTE;
4974
4975         default:
4976           gcc_unreachable ();
4977         }
4978     }
4979
4980   gcc_assert (GET_CODE (x) == SYMBOL_REF);
4981
4982   switch (aarch64_cmodel)
4983     {
4984     case AARCH64_CMODEL_LARGE:
4985       return SYMBOL_FORCE_TO_MEM;
4986
4987     case AARCH64_CMODEL_TINY:
4988     case AARCH64_CMODEL_SMALL:
4989
4990       /* This is needed to get DFmode, TImode constants to be loaded off
4991          the constant pool.  Is it necessary to dump TImode values into
4992          the constant pool.  We don't handle TImode constant loads properly
4993          yet and hence need to use the constant pool.  */
4994       if (CONSTANT_POOL_ADDRESS_P (x))
4995         return SYMBOL_FORCE_TO_MEM;
4996
4997       if (aarch64_tls_symbol_p (x))
4998         return aarch64_classify_tls_symbol (x);
4999
5000       if (SYMBOL_REF_WEAK (x))
5001         return SYMBOL_FORCE_TO_MEM;
5002
5003       return SYMBOL_SMALL_ABSOLUTE;
5004
5005     case AARCH64_CMODEL_TINY_PIC:
5006     case AARCH64_CMODEL_SMALL_PIC:
5007
5008       if (CONSTANT_POOL_ADDRESS_P (x))
5009         return SYMBOL_FORCE_TO_MEM;
5010
5011       if (aarch64_tls_symbol_p (x))
5012         return aarch64_classify_tls_symbol (x);
5013
5014       if (!aarch64_symbol_binds_local_p (x))
5015         return SYMBOL_SMALL_GOT;
5016
5017       return SYMBOL_SMALL_ABSOLUTE;
5018
5019     default:
5020       gcc_unreachable ();
5021     }
5022   /* By default push everything into the constant pool.  */
5023   return SYMBOL_FORCE_TO_MEM;
5024 }
5025
5026 /* Return true if X is a symbolic constant that can be used in context
5027    CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
5028
5029 bool
5030 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5031                              enum aarch64_symbol_type *symbol_type)
5032 {
5033   rtx offset;
5034   split_const (x, &x, &offset);
5035   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5036     *symbol_type = aarch64_classify_symbol (x, context);
5037   else
5038     return false;
5039
5040   /* No checking of offset at this point.  */
5041   return true;
5042 }
5043
5044 bool
5045 aarch64_constant_address_p (rtx x)
5046 {
5047   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5048 }
5049
5050 bool
5051 aarch64_legitimate_pic_operand_p (rtx x)
5052 {
5053   if (GET_CODE (x) == SYMBOL_REF
5054       || (GET_CODE (x) == CONST
5055           && GET_CODE (XEXP (x, 0)) == PLUS
5056           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5057      return false;
5058
5059   return true;
5060 }
5061
5062 /* Return true if X holds either a quarter-precision or
5063      floating-point +0.0 constant.  */
5064 static bool
5065 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5066 {
5067   if (!CONST_DOUBLE_P (x))
5068     return false;
5069
5070   /* TODO: We could handle moving 0.0 to a TFmode register,
5071      but first we would like to refactor the movtf_aarch64
5072      to be more amicable to split moves properly and
5073      correctly gate on TARGET_SIMD.  For now - reject all
5074      constants which are not to SFmode or DFmode registers.  */
5075   if (!(mode == SFmode || mode == DFmode))
5076     return false;
5077
5078   if (aarch64_float_const_zero_rtx_p (x))
5079     return true;
5080   return aarch64_float_const_representable_p (x);
5081 }
5082
5083 static bool
5084 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5085 {
5086   /* Do not allow vector struct mode constants.  We could support
5087      0 and -1 easily, but they need support in aarch64-simd.md.  */
5088   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5089     return false;
5090
5091   /* This could probably go away because
5092      we now decompose CONST_INTs according to expand_mov_immediate.  */
5093   if ((GET_CODE (x) == CONST_VECTOR
5094        && aarch64_simd_valid_immediate (x, mode, false,
5095                                         NULL, NULL, NULL, NULL, NULL) != -1)
5096       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5097         return !targetm.cannot_force_const_mem (mode, x);
5098
5099   if (GET_CODE (x) == HIGH
5100       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5101     return true;
5102
5103   return aarch64_constant_address_p (x);
5104 }
5105
5106 rtx
5107 aarch64_load_tp (rtx target)
5108 {
5109   if (!target
5110       || GET_MODE (target) != Pmode
5111       || !register_operand (target, Pmode))
5112     target = gen_reg_rtx (Pmode);
5113
5114   /* Can return in any reg.  */
5115   emit_insn (gen_aarch64_load_tp_hard (target));
5116   return target;
5117 }
5118
5119 /* On AAPCS systems, this is the "struct __va_list".  */
5120 static GTY(()) tree va_list_type;
5121
5122 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5123    Return the type to use as __builtin_va_list.
5124
5125    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5126
5127    struct __va_list
5128    {
5129      void *__stack;
5130      void *__gr_top;
5131      void *__vr_top;
5132      int   __gr_offs;
5133      int   __vr_offs;
5134    };  */
5135
5136 static tree
5137 aarch64_build_builtin_va_list (void)
5138 {
5139   tree va_list_name;
5140   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5141
5142   /* Create the type.  */
5143   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5144   /* Give it the required name.  */
5145   va_list_name = build_decl (BUILTINS_LOCATION,
5146                              TYPE_DECL,
5147                              get_identifier ("__va_list"),
5148                              va_list_type);
5149   DECL_ARTIFICIAL (va_list_name) = 1;
5150   TYPE_NAME (va_list_type) = va_list_name;
5151   TYPE_STUB_DECL (va_list_type) = va_list_name;
5152
5153   /* Create the fields.  */
5154   f_stack = build_decl (BUILTINS_LOCATION,
5155                         FIELD_DECL, get_identifier ("__stack"),
5156                         ptr_type_node);
5157   f_grtop = build_decl (BUILTINS_LOCATION,
5158                         FIELD_DECL, get_identifier ("__gr_top"),
5159                         ptr_type_node);
5160   f_vrtop = build_decl (BUILTINS_LOCATION,
5161                         FIELD_DECL, get_identifier ("__vr_top"),
5162                         ptr_type_node);
5163   f_groff = build_decl (BUILTINS_LOCATION,
5164                         FIELD_DECL, get_identifier ("__gr_offs"),
5165                         integer_type_node);
5166   f_vroff = build_decl (BUILTINS_LOCATION,
5167                         FIELD_DECL, get_identifier ("__vr_offs"),
5168                         integer_type_node);
5169
5170   DECL_ARTIFICIAL (f_stack) = 1;
5171   DECL_ARTIFICIAL (f_grtop) = 1;
5172   DECL_ARTIFICIAL (f_vrtop) = 1;
5173   DECL_ARTIFICIAL (f_groff) = 1;
5174   DECL_ARTIFICIAL (f_vroff) = 1;
5175
5176   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5177   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5178   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5179   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5180   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5181
5182   TYPE_FIELDS (va_list_type) = f_stack;
5183   DECL_CHAIN (f_stack) = f_grtop;
5184   DECL_CHAIN (f_grtop) = f_vrtop;
5185   DECL_CHAIN (f_vrtop) = f_groff;
5186   DECL_CHAIN (f_groff) = f_vroff;
5187
5188   /* Compute its layout.  */
5189   layout_type (va_list_type);
5190
5191   return va_list_type;
5192 }
5193
5194 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5195 static void
5196 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5197 {
5198   const CUMULATIVE_ARGS *cum;
5199   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5200   tree stack, grtop, vrtop, groff, vroff;
5201   tree t;
5202   int gr_save_area_size;
5203   int vr_save_area_size;
5204   int vr_offset;
5205
5206   cum = &crtl->args.info;
5207   gr_save_area_size
5208     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5209   vr_save_area_size
5210     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5211
5212   if (TARGET_GENERAL_REGS_ONLY)
5213     {
5214       if (cum->aapcs_nvrn > 0)
5215         sorry ("%qs and floating point or vector arguments",
5216                "-mgeneral-regs-only");
5217       vr_save_area_size = 0;
5218     }
5219
5220   f_stack = TYPE_FIELDS (va_list_type_node);
5221   f_grtop = DECL_CHAIN (f_stack);
5222   f_vrtop = DECL_CHAIN (f_grtop);
5223   f_groff = DECL_CHAIN (f_vrtop);
5224   f_vroff = DECL_CHAIN (f_groff);
5225
5226   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5227                   NULL_TREE);
5228   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5229                   NULL_TREE);
5230   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5231                   NULL_TREE);
5232   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5233                   NULL_TREE);
5234   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5235                   NULL_TREE);
5236
5237   /* Emit code to initialize STACK, which points to the next varargs stack
5238      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5239      by named arguments.  STACK is 8-byte aligned.  */
5240   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5241   if (cum->aapcs_stack_size > 0)
5242     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5243   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5244   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5245
5246   /* Emit code to initialize GRTOP, the top of the GR save area.
5247      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5248   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5249   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5250   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5251
5252   /* Emit code to initialize VRTOP, the top of the VR save area.
5253      This address is gr_save_area_bytes below GRTOP, rounded
5254      down to the next 16-byte boundary.  */
5255   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5256   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5257                              STACK_BOUNDARY / BITS_PER_UNIT);
5258
5259   if (vr_offset)
5260     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5261   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5262   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5263
5264   /* Emit code to initialize GROFF, the offset from GRTOP of the
5265      next GPR argument.  */
5266   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5267               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5268   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5269
5270   /* Likewise emit code to initialize VROFF, the offset from FTOP
5271      of the next VR argument.  */
5272   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5273               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5274   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5275 }
5276
5277 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5278
5279 static tree
5280 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5281                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5282 {
5283   tree addr;
5284   bool indirect_p;
5285   bool is_ha;           /* is HFA or HVA.  */
5286   bool dw_align;        /* double-word align.  */
5287   enum machine_mode ag_mode = VOIDmode;
5288   int nregs;
5289   enum machine_mode mode;
5290
5291   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5292   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5293   HOST_WIDE_INT size, rsize, adjust, align;
5294   tree t, u, cond1, cond2;
5295
5296   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5297   if (indirect_p)
5298     type = build_pointer_type (type);
5299
5300   mode = TYPE_MODE (type);
5301
5302   f_stack = TYPE_FIELDS (va_list_type_node);
5303   f_grtop = DECL_CHAIN (f_stack);
5304   f_vrtop = DECL_CHAIN (f_grtop);
5305   f_groff = DECL_CHAIN (f_vrtop);
5306   f_vroff = DECL_CHAIN (f_groff);
5307
5308   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5309                   f_stack, NULL_TREE);
5310   size = int_size_in_bytes (type);
5311   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5312
5313   dw_align = false;
5314   adjust = 0;
5315   if (aarch64_vfp_is_call_or_return_candidate (mode,
5316                                                type,
5317                                                &ag_mode,
5318                                                &nregs,
5319                                                &is_ha))
5320     {
5321       /* TYPE passed in fp/simd registers.  */
5322       if (TARGET_GENERAL_REGS_ONLY)
5323         sorry ("%qs and floating point or vector arguments",
5324                "-mgeneral-regs-only");
5325
5326       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5327                       unshare_expr (valist), f_vrtop, NULL_TREE);
5328       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5329                       unshare_expr (valist), f_vroff, NULL_TREE);
5330
5331       rsize = nregs * UNITS_PER_VREG;
5332
5333       if (is_ha)
5334         {
5335           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5336             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5337         }
5338       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5339                && size < UNITS_PER_VREG)
5340         {
5341           adjust = UNITS_PER_VREG - size;
5342         }
5343     }
5344   else
5345     {
5346       /* TYPE passed in general registers.  */
5347       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5348                       unshare_expr (valist), f_grtop, NULL_TREE);
5349       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5350                       unshare_expr (valist), f_groff, NULL_TREE);
5351       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5352       nregs = rsize / UNITS_PER_WORD;
5353
5354       if (align > 8)
5355         dw_align = true;
5356
5357       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5358           && size < UNITS_PER_WORD)
5359         {
5360           adjust = UNITS_PER_WORD  - size;
5361         }
5362     }
5363
5364   /* Get a local temporary for the field value.  */
5365   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5366
5367   /* Emit code to branch if off >= 0.  */
5368   t = build2 (GE_EXPR, boolean_type_node, off,
5369               build_int_cst (TREE_TYPE (off), 0));
5370   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5371
5372   if (dw_align)
5373     {
5374       /* Emit: offs = (offs + 15) & -16.  */
5375       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5376                   build_int_cst (TREE_TYPE (off), 15));
5377       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5378                   build_int_cst (TREE_TYPE (off), -16));
5379       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5380     }
5381   else
5382     roundup = NULL;
5383
5384   /* Update ap.__[g|v]r_offs  */
5385   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5386               build_int_cst (TREE_TYPE (off), rsize));
5387   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5388
5389   /* String up.  */
5390   if (roundup)
5391     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5392
5393   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5394   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5395               build_int_cst (TREE_TYPE (f_off), 0));
5396   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5397
5398   /* String up: make sure the assignment happens before the use.  */
5399   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5400   COND_EXPR_ELSE (cond1) = t;
5401
5402   /* Prepare the trees handling the argument that is passed on the stack;
5403      the top level node will store in ON_STACK.  */
5404   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5405   if (align > 8)
5406     {
5407       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5408       t = fold_convert (intDI_type_node, arg);
5409       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5410                   build_int_cst (TREE_TYPE (t), 15));
5411       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5412                   build_int_cst (TREE_TYPE (t), -16));
5413       t = fold_convert (TREE_TYPE (arg), t);
5414       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5415     }
5416   else
5417     roundup = NULL;
5418   /* Advance ap.__stack  */
5419   t = fold_convert (intDI_type_node, arg);
5420   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5421               build_int_cst (TREE_TYPE (t), size + 7));
5422   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5423               build_int_cst (TREE_TYPE (t), -8));
5424   t = fold_convert (TREE_TYPE (arg), t);
5425   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5426   /* String up roundup and advance.  */
5427   if (roundup)
5428     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5429   /* String up with arg */
5430   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5431   /* Big-endianness related address adjustment.  */
5432   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5433       && size < UNITS_PER_WORD)
5434   {
5435     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5436                 size_int (UNITS_PER_WORD - size));
5437     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5438   }
5439
5440   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5441   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5442
5443   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5444   t = off;
5445   if (adjust)
5446     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5447                 build_int_cst (TREE_TYPE (off), adjust));
5448
5449   t = fold_convert (sizetype, t);
5450   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5451
5452   if (is_ha)
5453     {
5454       /* type ha; // treat as "struct {ftype field[n];}"
5455          ... [computing offs]
5456          for (i = 0; i <nregs; ++i, offs += 16)
5457            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5458          return ha;  */
5459       int i;
5460       tree tmp_ha, field_t, field_ptr_t;
5461
5462       /* Declare a local variable.  */
5463       tmp_ha = create_tmp_var_raw (type, "ha");
5464       gimple_add_tmp_var (tmp_ha);
5465
5466       /* Establish the base type.  */
5467       switch (ag_mode)
5468         {
5469         case SFmode:
5470           field_t = float_type_node;
5471           field_ptr_t = float_ptr_type_node;
5472           break;
5473         case DFmode:
5474           field_t = double_type_node;
5475           field_ptr_t = double_ptr_type_node;
5476           break;
5477         case TFmode:
5478           field_t = long_double_type_node;
5479           field_ptr_t = long_double_ptr_type_node;
5480           break;
5481 /* The half precision and quad precision are not fully supported yet.  Enable
5482    the following code after the support is complete.  Need to find the correct
5483    type node for __fp16 *.  */
5484 #if 0
5485         case HFmode:
5486           field_t = float_type_node;
5487           field_ptr_t = float_ptr_type_node;
5488           break;
5489 #endif
5490         case V2SImode:
5491         case V4SImode:
5492             {
5493               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5494               field_t = build_vector_type_for_mode (innertype, ag_mode);
5495               field_ptr_t = build_pointer_type (field_t);
5496             }
5497           break;
5498         default:
5499           gcc_assert (0);
5500         }
5501
5502       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5503       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5504       addr = t;
5505       t = fold_convert (field_ptr_t, addr);
5506       t = build2 (MODIFY_EXPR, field_t,
5507                   build1 (INDIRECT_REF, field_t, tmp_ha),
5508                   build1 (INDIRECT_REF, field_t, t));
5509
5510       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5511       for (i = 1; i < nregs; ++i)
5512         {
5513           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5514           u = fold_convert (field_ptr_t, addr);
5515           u = build2 (MODIFY_EXPR, field_t,
5516                       build2 (MEM_REF, field_t, tmp_ha,
5517                               build_int_cst (field_ptr_t,
5518                                              (i *
5519                                               int_size_in_bytes (field_t)))),
5520                       build1 (INDIRECT_REF, field_t, u));
5521           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5522         }
5523
5524       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5525       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5526     }
5527
5528   COND_EXPR_ELSE (cond2) = t;
5529   addr = fold_convert (build_pointer_type (type), cond1);
5530   addr = build_va_arg_indirect_ref (addr);
5531
5532   if (indirect_p)
5533     addr = build_va_arg_indirect_ref (addr);
5534
5535   return addr;
5536 }
5537
5538 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5539
5540 static void
5541 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5542                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5543                                 int no_rtl)
5544 {
5545   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5546   CUMULATIVE_ARGS local_cum;
5547   int gr_saved, vr_saved;
5548
5549   /* The caller has advanced CUM up to, but not beyond, the last named
5550      argument.  Advance a local copy of CUM past the last "real" named
5551      argument, to find out how many registers are left over.  */
5552   local_cum = *cum;
5553   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5554
5555   /* Found out how many registers we need to save.  */
5556   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5557   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5558
5559   if (TARGET_GENERAL_REGS_ONLY)
5560     {
5561       if (local_cum.aapcs_nvrn > 0)
5562         sorry ("%qs and floating point or vector arguments",
5563                "-mgeneral-regs-only");
5564       vr_saved = 0;
5565     }
5566
5567   if (!no_rtl)
5568     {
5569       if (gr_saved > 0)
5570         {
5571           rtx ptr, mem;
5572
5573           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5574           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5575                                - gr_saved * UNITS_PER_WORD);
5576           mem = gen_frame_mem (BLKmode, ptr);
5577           set_mem_alias_set (mem, get_varargs_alias_set ());
5578
5579           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5580                                mem, gr_saved);
5581         }
5582       if (vr_saved > 0)
5583         {
5584           /* We can't use move_block_from_reg, because it will use
5585              the wrong mode, storing D regs only.  */
5586           enum machine_mode mode = TImode;
5587           int off, i;
5588
5589           /* Set OFF to the offset from virtual_incoming_args_rtx of
5590              the first vector register.  The VR save area lies below
5591              the GR one, and is aligned to 16 bytes.  */
5592           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5593                                    STACK_BOUNDARY / BITS_PER_UNIT);
5594           off -= vr_saved * UNITS_PER_VREG;
5595
5596           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5597             {
5598               rtx ptr, mem;
5599
5600               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5601               mem = gen_frame_mem (mode, ptr);
5602               set_mem_alias_set (mem, get_varargs_alias_set ());
5603               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5604               off += UNITS_PER_VREG;
5605             }
5606         }
5607     }
5608
5609   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5610      any complication of having crtl->args.pretend_args_size changed.  */
5611   cfun->machine->saved_varargs_size
5612     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5613                       STACK_BOUNDARY / BITS_PER_UNIT)
5614        + vr_saved * UNITS_PER_VREG);
5615 }
5616
5617 static void
5618 aarch64_conditional_register_usage (void)
5619 {
5620   int i;
5621   if (!TARGET_FLOAT)
5622     {
5623       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5624         {
5625           fixed_regs[i] = 1;
5626           call_used_regs[i] = 1;
5627         }
5628     }
5629 }
5630
5631 /* Walk down the type tree of TYPE counting consecutive base elements.
5632    If *MODEP is VOIDmode, then set it to the first valid floating point
5633    type.  If a non-floating point type is found, or if a floating point
5634    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5635    otherwise return the count in the sub-tree.  */
5636 static int
5637 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5638 {
5639   enum machine_mode mode;
5640   HOST_WIDE_INT size;
5641
5642   switch (TREE_CODE (type))
5643     {
5644     case REAL_TYPE:
5645       mode = TYPE_MODE (type);
5646       if (mode != DFmode && mode != SFmode && mode != TFmode)
5647         return -1;
5648
5649       if (*modep == VOIDmode)
5650         *modep = mode;
5651
5652       if (*modep == mode)
5653         return 1;
5654
5655       break;
5656
5657     case COMPLEX_TYPE:
5658       mode = TYPE_MODE (TREE_TYPE (type));
5659       if (mode != DFmode && mode != SFmode && mode != TFmode)
5660         return -1;
5661
5662       if (*modep == VOIDmode)
5663         *modep = mode;
5664
5665       if (*modep == mode)
5666         return 2;
5667
5668       break;
5669
5670     case VECTOR_TYPE:
5671       /* Use V2SImode and V4SImode as representatives of all 64-bit
5672          and 128-bit vector types.  */
5673       size = int_size_in_bytes (type);
5674       switch (size)
5675         {
5676         case 8:
5677           mode = V2SImode;
5678           break;
5679         case 16:
5680           mode = V4SImode;
5681           break;
5682         default:
5683           return -1;
5684         }
5685
5686       if (*modep == VOIDmode)
5687         *modep = mode;
5688
5689       /* Vector modes are considered to be opaque: two vectors are
5690          equivalent for the purposes of being homogeneous aggregates
5691          if they are the same size.  */
5692       if (*modep == mode)
5693         return 1;
5694
5695       break;
5696
5697     case ARRAY_TYPE:
5698       {
5699         int count;
5700         tree index = TYPE_DOMAIN (type);
5701
5702         /* Can't handle incomplete types.  */
5703         if (!COMPLETE_TYPE_P (type))
5704           return -1;
5705
5706         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5707         if (count == -1
5708             || !index
5709             || !TYPE_MAX_VALUE (index)
5710             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5711             || !TYPE_MIN_VALUE (index)
5712             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5713             || count < 0)
5714           return -1;
5715
5716         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5717                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5718
5719         /* There must be no padding.  */
5720         if (!host_integerp (TYPE_SIZE (type), 1)
5721             || (tree_low_cst (TYPE_SIZE (type), 1)
5722                 != count * GET_MODE_BITSIZE (*modep)))
5723           return -1;
5724
5725         return count;
5726       }
5727
5728     case RECORD_TYPE:
5729       {
5730         int count = 0;
5731         int sub_count;
5732         tree field;
5733
5734         /* Can't handle incomplete types.  */
5735         if (!COMPLETE_TYPE_P (type))
5736           return -1;
5737
5738         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5739           {
5740             if (TREE_CODE (field) != FIELD_DECL)
5741               continue;
5742
5743             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5744             if (sub_count < 0)
5745               return -1;
5746             count += sub_count;
5747           }
5748
5749         /* There must be no padding.  */
5750         if (!host_integerp (TYPE_SIZE (type), 1)
5751             || (tree_low_cst (TYPE_SIZE (type), 1)
5752                 != count * GET_MODE_BITSIZE (*modep)))
5753           return -1;
5754
5755         return count;
5756       }
5757
5758     case UNION_TYPE:
5759     case QUAL_UNION_TYPE:
5760       {
5761         /* These aren't very interesting except in a degenerate case.  */
5762         int count = 0;
5763         int sub_count;
5764         tree field;
5765
5766         /* Can't handle incomplete types.  */
5767         if (!COMPLETE_TYPE_P (type))
5768           return -1;
5769
5770         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5771           {
5772             if (TREE_CODE (field) != FIELD_DECL)
5773               continue;
5774
5775             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5776             if (sub_count < 0)
5777               return -1;
5778             count = count > sub_count ? count : sub_count;
5779           }
5780
5781         /* There must be no padding.  */
5782         if (!host_integerp (TYPE_SIZE (type), 1)
5783             || (tree_low_cst (TYPE_SIZE (type), 1)
5784                 != count * GET_MODE_BITSIZE (*modep)))
5785           return -1;
5786
5787         return count;
5788       }
5789
5790     default:
5791       break;
5792     }
5793
5794   return -1;
5795 }
5796
5797 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5798    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5799    array types.  The C99 floating-point complex types are also considered
5800    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5801    types, which are GCC extensions and out of the scope of AAPCS64, are
5802    treated as composite types here as well.
5803
5804    Note that MODE itself is not sufficient in determining whether a type
5805    is such a composite type or not.  This is because
5806    stor-layout.c:compute_record_mode may have already changed the MODE
5807    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5808    structure with only one field may have its MODE set to the mode of the
5809    field.  Also an integer mode whose size matches the size of the
5810    RECORD_TYPE type may be used to substitute the original mode
5811    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5812    solely relied on.  */
5813
5814 static bool
5815 aarch64_composite_type_p (const_tree type,
5816                           enum machine_mode mode)
5817 {
5818   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5819     return true;
5820
5821   if (mode == BLKmode
5822       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5823       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5824     return true;
5825
5826   return false;
5827 }
5828
5829 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5830    type as described in AAPCS64 \S 4.1.2.
5831
5832    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5833
5834 static bool
5835 aarch64_short_vector_p (const_tree type,
5836                         enum machine_mode mode)
5837 {
5838   HOST_WIDE_INT size = -1;
5839
5840   if (type && TREE_CODE (type) == VECTOR_TYPE)
5841     size = int_size_in_bytes (type);
5842   else if (!aarch64_composite_type_p (type, mode)
5843            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5844                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5845     size = GET_MODE_SIZE (mode);
5846
5847   return (size == 8 || size == 16) ? true : false;
5848 }
5849
5850 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5851    shall be passed or returned in simd/fp register(s) (providing these
5852    parameter passing registers are available).
5853
5854    Upon successful return, *COUNT returns the number of needed registers,
5855    *BASE_MODE returns the mode of the individual register and when IS_HAF
5856    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5857    floating-point aggregate or a homogeneous short-vector aggregate.  */
5858
5859 static bool
5860 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5861                                          const_tree type,
5862                                          enum machine_mode *base_mode,
5863                                          int *count,
5864                                          bool *is_ha)
5865 {
5866   enum machine_mode new_mode = VOIDmode;
5867   bool composite_p = aarch64_composite_type_p (type, mode);
5868
5869   if (is_ha != NULL) *is_ha = false;
5870
5871   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5872       || aarch64_short_vector_p (type, mode))
5873     {
5874       *count = 1;
5875       new_mode = mode;
5876     }
5877   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5878     {
5879       if (is_ha != NULL) *is_ha = true;
5880       *count = 2;
5881       new_mode = GET_MODE_INNER (mode);
5882     }
5883   else if (type && composite_p)
5884     {
5885       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5886
5887       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5888         {
5889           if (is_ha != NULL) *is_ha = true;
5890           *count = ag_count;
5891         }
5892       else
5893         return false;
5894     }
5895   else
5896     return false;
5897
5898   *base_mode = new_mode;
5899   return true;
5900 }
5901
5902 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5903
5904 static rtx
5905 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5906                           int incoming ATTRIBUTE_UNUSED)
5907 {
5908   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5909 }
5910
5911 /* Implements target hook vector_mode_supported_p.  */
5912 static bool
5913 aarch64_vector_mode_supported_p (enum machine_mode mode)
5914 {
5915   if (TARGET_SIMD
5916       && (mode == V4SImode  || mode == V8HImode
5917           || mode == V16QImode || mode == V2DImode
5918           || mode == V2SImode  || mode == V4HImode
5919           || mode == V8QImode || mode == V2SFmode
5920           || mode == V4SFmode || mode == V2DFmode))
5921     return true;
5922
5923   return false;
5924 }
5925
5926 /* Return quad mode as the preferred SIMD mode.  */
5927 static enum machine_mode
5928 aarch64_preferred_simd_mode (enum machine_mode mode)
5929 {
5930   if (TARGET_SIMD)
5931     switch (mode)
5932       {
5933       case DFmode:
5934         return V2DFmode;
5935       case SFmode:
5936         return V4SFmode;
5937       case SImode:
5938         return V4SImode;
5939       case HImode:
5940         return V8HImode;
5941       case QImode:
5942         return V16QImode;
5943       case DImode:
5944           return V2DImode;
5945         break;
5946
5947       default:;
5948       }
5949   return word_mode;
5950 }
5951
5952 /* Return the bitmask of possible vector sizes for the vectorizer
5953    to iterate over.  */
5954 static unsigned int
5955 aarch64_autovectorize_vector_sizes (void)
5956 {
5957   return (16 | 8);
5958 }
5959
5960 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5961    vector types in order to conform to the AAPCS64 (see "Procedure
5962    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
5963    qualify for emission with the mangled names defined in that document,
5964    a vector type must not only be of the correct mode but also be
5965    composed of AdvSIMD vector element types (e.g.
5966    _builtin_aarch64_simd_qi); these types are registered by
5967    aarch64_init_simd_builtins ().  In other words, vector types defined
5968    in other ways e.g. via vector_size attribute will get default
5969    mangled names.  */
5970 typedef struct
5971 {
5972   enum machine_mode mode;
5973   const char *element_type_name;
5974   const char *mangled_name;
5975 } aarch64_simd_mangle_map_entry;
5976
5977 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5978   /* 64-bit containerized types.  */
5979   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
5980   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
5981   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
5982   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
5983   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
5984   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
5985   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
5986   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
5987   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5988   /* 128-bit containerized types.  */
5989   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
5990   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
5991   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
5992   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
5993   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
5994   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
5995   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
5996   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
5997   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
5998   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
5999   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6000   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6001   { VOIDmode, NULL, NULL }
6002 };
6003
6004 /* Implement TARGET_MANGLE_TYPE.  */
6005
6006 static const char *
6007 aarch64_mangle_type (const_tree type)
6008 {
6009   /* The AArch64 ABI documents say that "__va_list" has to be
6010      managled as if it is in the "std" namespace.  */
6011   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6012     return "St9__va_list";
6013
6014   /* Check the mode of the vector type, and the name of the vector
6015      element type, against the table.  */
6016   if (TREE_CODE (type) == VECTOR_TYPE)
6017     {
6018       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6019
6020       while (pos->mode != VOIDmode)
6021         {
6022           tree elt_type = TREE_TYPE (type);
6023
6024           if (pos->mode == TYPE_MODE (type)
6025               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6026               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6027                           pos->element_type_name))
6028             return pos->mangled_name;
6029
6030           pos++;
6031         }
6032     }
6033
6034   /* Use the default mangling.  */
6035   return NULL;
6036 }
6037
6038 /* Return the equivalent letter for size.  */
6039 static unsigned char
6040 sizetochar (int size)
6041 {
6042   switch (size)
6043     {
6044     case 64: return 'd';
6045     case 32: return 's';
6046     case 16: return 'h';
6047     case 8 : return 'b';
6048     default: gcc_unreachable ();
6049     }
6050 }
6051
6052 /* Return true iff x is a uniform vector of floating-point
6053    constants, and the constant can be represented in
6054    quarter-precision form.  Note, as aarch64_float_const_representable
6055    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6056 static bool
6057 aarch64_vect_float_const_representable_p (rtx x)
6058 {
6059   int i = 0;
6060   REAL_VALUE_TYPE r0, ri;
6061   rtx x0, xi;
6062
6063   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6064     return false;
6065
6066   x0 = CONST_VECTOR_ELT (x, 0);
6067   if (!CONST_DOUBLE_P (x0))
6068     return false;
6069
6070   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6071
6072   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6073     {
6074       xi = CONST_VECTOR_ELT (x, i);
6075       if (!CONST_DOUBLE_P (xi))
6076         return false;
6077
6078       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6079       if (!REAL_VALUES_EQUAL (r0, ri))
6080         return false;
6081     }
6082
6083   return aarch64_float_const_representable_p (x0);
6084 }
6085
6086 /* TODO: This function returns values similar to those
6087    returned by neon_valid_immediate in gcc/config/arm/arm.c
6088    but the API here is different enough that these magic numbers
6089    are not used.  It should be sufficient to return true or false.  */
6090 static int
6091 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6092                               rtx *modconst, int *elementwidth,
6093                               unsigned char *elementchar,
6094                               int *mvn, int *shift)
6095 {
6096 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6097   matches = 1;                                          \
6098   for (i = 0; i < idx; i += (STRIDE))                   \
6099     if (!(TEST))                                        \
6100       matches = 0;                                      \
6101   if (matches)                                          \
6102     {                                                   \
6103       immtype = (CLASS);                                \
6104       elsize = (ELSIZE);                                \
6105       elchar = sizetochar (elsize);                     \
6106       eshift = (SHIFT);                                 \
6107       emvn = (NEG);                                     \
6108       break;                                            \
6109     }
6110
6111   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6112   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6113   unsigned char bytes[16];
6114   unsigned char elchar = 0;
6115   int immtype = -1, matches;
6116   unsigned int invmask = inverse ? 0xff : 0;
6117   int eshift, emvn;
6118
6119   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6120     {
6121       bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6122       int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6123
6124       if (!(simd_imm_zero
6125             || aarch64_vect_float_const_representable_p (op)))
6126         return -1;
6127
6128         if (modconst)
6129           *modconst = CONST_VECTOR_ELT (op, 0);
6130
6131         if (elementwidth)
6132           *elementwidth = elem_width;
6133
6134         if (elementchar)
6135           *elementchar = sizetochar (elem_width);
6136
6137         if (shift)
6138           *shift = 0;
6139
6140         if (simd_imm_zero)
6141           return 19;
6142         else
6143           return 18;
6144     }
6145
6146   /* Splat vector constant out into a byte vector.  */
6147   for (i = 0; i < n_elts; i++)
6148     {
6149       rtx el = CONST_VECTOR_ELT (op, i);
6150       unsigned HOST_WIDE_INT elpart;
6151       unsigned int part, parts;
6152
6153       if (GET_CODE (el) == CONST_INT)
6154         {
6155           elpart = INTVAL (el);
6156           parts = 1;
6157         }
6158       else if (GET_CODE (el) == CONST_DOUBLE)
6159         {
6160           elpart = CONST_DOUBLE_LOW (el);
6161           parts = 2;
6162         }
6163       else
6164         gcc_unreachable ();
6165
6166       for (part = 0; part < parts; part++)
6167         {
6168           unsigned int byte;
6169           for (byte = 0; byte < innersize; byte++)
6170             {
6171               bytes[idx++] = (elpart & 0xff) ^ invmask;
6172               elpart >>= BITS_PER_UNIT;
6173             }
6174           if (GET_CODE (el) == CONST_DOUBLE)
6175             elpart = CONST_DOUBLE_HIGH (el);
6176         }
6177     }
6178
6179   /* Sanity check.  */
6180   gcc_assert (idx == GET_MODE_SIZE (mode));
6181
6182   do
6183     {
6184       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6185              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6186
6187       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6188              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6189
6190       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6191              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6192
6193       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6194              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6195
6196       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6197
6198       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6199
6200       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6201              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6202
6203       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6204              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6205
6206       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6207              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6208
6209       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6210              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6211
6212       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6213
6214       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6215
6216       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6217              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6218
6219       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6220              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6221
6222       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6223              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6224
6225       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6226              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6227
6228       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6229
6230       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6231              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6232     }
6233   while (0);
6234
6235   /* TODO: Currently the assembler cannot handle types 12 to 15.
6236      And there is no way to specify cmode through the compiler.
6237      Disable them till there is support in the assembler.  */
6238   if (immtype == -1
6239       || (immtype >= 12 && immtype <= 15)
6240       || immtype == 18)
6241     return -1;
6242
6243
6244   if (elementwidth)
6245     *elementwidth = elsize;
6246
6247   if (elementchar)
6248     *elementchar = elchar;
6249
6250   if (mvn)
6251     *mvn = emvn;
6252
6253   if (shift)
6254     *shift = eshift;
6255
6256   if (modconst)
6257     {
6258       unsigned HOST_WIDE_INT imm = 0;
6259
6260       /* Un-invert bytes of recognized vector, if necessary.  */
6261       if (invmask != 0)
6262         for (i = 0; i < idx; i++)
6263           bytes[i] ^= invmask;
6264
6265       if (immtype == 17)
6266         {
6267           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6268           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6269
6270           for (i = 0; i < 8; i++)
6271             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6272               << (i * BITS_PER_UNIT);
6273
6274           *modconst = GEN_INT (imm);
6275         }
6276       else
6277         {
6278           unsigned HOST_WIDE_INT imm = 0;
6279
6280           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6281             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6282
6283           /* Construct 'abcdefgh' because the assembler cannot handle
6284              generic constants.  */
6285           gcc_assert (shift != NULL && mvn != NULL);
6286           if (*mvn)
6287             imm = ~imm;
6288           imm = (imm >> *shift) & 0xff;
6289           *modconst = GEN_INT (imm);
6290         }
6291     }
6292
6293   return immtype;
6294 #undef CHECK
6295 }
6296
6297 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6298    (or, implicitly, MVNI) immediate.  Write back width per element
6299    to *ELEMENTWIDTH, and a modified constant (whatever should be output
6300    for a MOVI instruction) in *MODCONST.  */
6301 int
6302 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6303                                        rtx *modconst, int *elementwidth,
6304                                        unsigned char *elementchar,
6305                                        int *mvn, int *shift)
6306 {
6307   rtx tmpconst;
6308   int tmpwidth;
6309   unsigned char tmpwidthc;
6310   int tmpmvn = 0, tmpshift = 0;
6311   int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6312                                              &tmpwidth, &tmpwidthc,
6313                                              &tmpmvn, &tmpshift);
6314
6315   if (retval == -1)
6316     return 0;
6317
6318   if (modconst)
6319     *modconst = tmpconst;
6320
6321   if (elementwidth)
6322     *elementwidth = tmpwidth;
6323
6324   if (elementchar)
6325     *elementchar = tmpwidthc;
6326
6327   if (mvn)
6328     *mvn = tmpmvn;
6329
6330   if (shift)
6331     *shift = tmpshift;
6332
6333   return 1;
6334 }
6335
6336 static bool
6337 aarch64_const_vec_all_same_int_p (rtx x,
6338                                   HOST_WIDE_INT minval,
6339                                   HOST_WIDE_INT maxval)
6340 {
6341   HOST_WIDE_INT firstval;
6342   int count, i;
6343
6344   if (GET_CODE (x) != CONST_VECTOR
6345       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6346     return false;
6347
6348   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6349   if (firstval < minval || firstval > maxval)
6350     return false;
6351
6352   count = CONST_VECTOR_NUNITS (x);
6353   for (i = 1; i < count; i++)
6354     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6355       return false;
6356
6357   return true;
6358 }
6359
6360 /* Check of immediate shift constants are within range.  */
6361 bool
6362 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6363 {
6364   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6365   if (left)
6366     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6367   else
6368     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6369 }
6370
6371 /* Return true if X is a uniform vector where all elements
6372    are either the floating-point constant 0.0 or the
6373    integer constant 0.  */
6374 bool
6375 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6376 {
6377   return x == CONST0_RTX (mode);
6378 }
6379
6380 bool
6381 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6382 {
6383   HOST_WIDE_INT imm = INTVAL (x);
6384   int i;
6385
6386   for (i = 0; i < 8; i++)
6387     {
6388       unsigned int byte = imm & 0xff;
6389       if (byte != 0xff && byte != 0)
6390        return false;
6391       imm >>= 8;
6392     }
6393
6394   return true;
6395 }
6396
6397 /* Return a const_int vector of VAL.  */
6398 rtx
6399 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6400 {
6401   int nunits = GET_MODE_NUNITS (mode);
6402   rtvec v = rtvec_alloc (nunits);
6403   int i;
6404
6405   for (i=0; i < nunits; i++)
6406     RTVEC_ELT (v, i) = GEN_INT (val);
6407
6408   return gen_rtx_CONST_VECTOR (mode, v);
6409 }
6410
6411 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6412
6413 bool
6414 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6415 {
6416   enum machine_mode vmode;
6417
6418   gcc_assert (!VECTOR_MODE_P (mode));
6419   vmode = aarch64_preferred_simd_mode (mode);
6420   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6421   int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
6422                                                       NULL, NULL, NULL, NULL);
6423   return retval;
6424 }
6425
6426 /* Construct and return a PARALLEL RTX vector.  */
6427 rtx
6428 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6429 {
6430   int nunits = GET_MODE_NUNITS (mode);
6431   rtvec v = rtvec_alloc (nunits / 2);
6432   int base = high ? nunits / 2 : 0;
6433   rtx t1;
6434   int i;
6435
6436   for (i=0; i < nunits / 2; i++)
6437     RTVEC_ELT (v, i) = GEN_INT (base + i);
6438
6439   t1 = gen_rtx_PARALLEL (mode, v);
6440   return t1;
6441 }
6442
6443 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6444    HIGH (exclusive).  */
6445 void
6446 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6447 {
6448   HOST_WIDE_INT lane;
6449   gcc_assert (GET_CODE (operand) == CONST_INT);
6450   lane = INTVAL (operand);
6451
6452   if (lane < low || lane >= high)
6453     error ("lane out of range");
6454 }
6455
6456 void
6457 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6458 {
6459   gcc_assert (GET_CODE (operand) == CONST_INT);
6460   HOST_WIDE_INT lane = INTVAL (operand);
6461
6462   if (lane < low || lane >= high)
6463     error ("constant out of range");
6464 }
6465
6466 /* Emit code to reinterpret one AdvSIMD type as another,
6467    without altering bits.  */
6468 void
6469 aarch64_simd_reinterpret (rtx dest, rtx src)
6470 {
6471   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6472 }
6473
6474 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6475    registers).  */
6476 void
6477 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6478                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6479                             rtx op1)
6480 {
6481   rtx mem = gen_rtx_MEM (mode, destaddr);
6482   rtx tmp1 = gen_reg_rtx (mode);
6483   rtx tmp2 = gen_reg_rtx (mode);
6484
6485   emit_insn (intfn (tmp1, op1, tmp2));
6486
6487   emit_move_insn (mem, tmp1);
6488   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6489   emit_move_insn (mem, tmp2);
6490 }
6491
6492 /* Return TRUE if OP is a valid vector addressing mode.  */
6493 bool
6494 aarch64_simd_mem_operand_p (rtx op)
6495 {
6496   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6497                         || GET_CODE (XEXP (op, 0)) == REG);
6498 }
6499
6500 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6501    not to early-clobber SRC registers in the process.
6502
6503    We assume that the operands described by SRC and DEST represent a
6504    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6505    number of components into which the copy has been decomposed.  */
6506 void
6507 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6508                                 rtx *src, unsigned int count)
6509 {
6510   unsigned int i;
6511
6512   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6513       || REGNO (operands[0]) < REGNO (operands[1]))
6514     {
6515       for (i = 0; i < count; i++)
6516         {
6517           operands[2 * i] = dest[i];
6518           operands[2 * i + 1] = src[i];
6519         }
6520     }
6521   else
6522     {
6523       for (i = 0; i < count; i++)
6524         {
6525           operands[2 * i] = dest[count - i - 1];
6526           operands[2 * i + 1] = src[count - i - 1];
6527         }
6528     }
6529 }
6530
6531 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6532    one of VSTRUCT modes: OI, CI or XI.  */
6533 int
6534 aarch64_simd_attr_length_move (rtx insn)
6535 {
6536   enum machine_mode mode;
6537
6538   extract_insn_cached (insn);
6539
6540   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6541     {
6542       mode = GET_MODE (recog_data.operand[0]);
6543       switch (mode)
6544         {
6545         case OImode:
6546           return 8;
6547         case CImode:
6548           return 12;
6549         case XImode:
6550           return 16;
6551         default:
6552           gcc_unreachable ();
6553         }
6554     }
6555   return 4;
6556 }
6557
6558 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6559    alignment of a vector to 128 bits.  */
6560 static HOST_WIDE_INT
6561 aarch64_simd_vector_alignment (const_tree type)
6562 {
6563   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6564   return MIN (align, 128);
6565 }
6566
6567 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6568 static bool
6569 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6570 {
6571   if (is_packed)
6572     return false;
6573
6574   /* We guarantee alignment for vectors up to 128-bits.  */
6575   if (tree_int_cst_compare (TYPE_SIZE (type),
6576                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6577     return false;
6578
6579   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6580   return true;
6581 }
6582
6583 /* If VALS is a vector constant that can be loaded into a register
6584    using DUP, generate instructions to do so and return an RTX to
6585    assign to the register.  Otherwise return NULL_RTX.  */
6586 static rtx
6587 aarch64_simd_dup_constant (rtx vals)
6588 {
6589   enum machine_mode mode = GET_MODE (vals);
6590   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6591   int n_elts = GET_MODE_NUNITS (mode);
6592   bool all_same = true;
6593   rtx x;
6594   int i;
6595
6596   if (GET_CODE (vals) != CONST_VECTOR)
6597     return NULL_RTX;
6598
6599   for (i = 1; i < n_elts; ++i)
6600     {
6601       x = CONST_VECTOR_ELT (vals, i);
6602       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6603         all_same = false;
6604     }
6605
6606   if (!all_same)
6607     return NULL_RTX;
6608
6609   /* We can load this constant by using DUP and a constant in a
6610      single ARM register.  This will be cheaper than a vector
6611      load.  */
6612   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6613   return gen_rtx_VEC_DUPLICATE (mode, x);
6614 }
6615
6616
6617 /* Generate code to load VALS, which is a PARALLEL containing only
6618    constants (for vec_init) or CONST_VECTOR, efficiently into a
6619    register.  Returns an RTX to copy into the register, or NULL_RTX
6620    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6621 static rtx
6622 aarch64_simd_make_constant (rtx vals)
6623 {
6624   enum machine_mode mode = GET_MODE (vals);
6625   rtx const_dup;
6626   rtx const_vec = NULL_RTX;
6627   int n_elts = GET_MODE_NUNITS (mode);
6628   int n_const = 0;
6629   int i;
6630
6631   if (GET_CODE (vals) == CONST_VECTOR)
6632     const_vec = vals;
6633   else if (GET_CODE (vals) == PARALLEL)
6634     {
6635       /* A CONST_VECTOR must contain only CONST_INTs and
6636          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6637          Only store valid constants in a CONST_VECTOR.  */
6638       for (i = 0; i < n_elts; ++i)
6639         {
6640           rtx x = XVECEXP (vals, 0, i);
6641           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6642             n_const++;
6643         }
6644       if (n_const == n_elts)
6645         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6646     }
6647   else
6648     gcc_unreachable ();
6649
6650   if (const_vec != NULL_RTX
6651       && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6652                                                 NULL, NULL, NULL))
6653     /* Load using MOVI/MVNI.  */
6654     return const_vec;
6655   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6656     /* Loaded using DUP.  */
6657     return const_dup;
6658   else if (const_vec != NULL_RTX)
6659     /* Load from constant pool. We can not take advantage of single-cycle
6660        LD1 because we need a PC-relative addressing mode.  */
6661     return const_vec;
6662   else
6663     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6664        We can not construct an initializer.  */
6665     return NULL_RTX;
6666 }
6667
6668 void
6669 aarch64_expand_vector_init (rtx target, rtx vals)
6670 {
6671   enum machine_mode mode = GET_MODE (target);
6672   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6673   int n_elts = GET_MODE_NUNITS (mode);
6674   int n_var = 0, one_var = -1;
6675   bool all_same = true;
6676   rtx x, mem;
6677   int i;
6678
6679   x = XVECEXP (vals, 0, 0);
6680   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6681     n_var = 1, one_var = 0;
6682
6683   for (i = 1; i < n_elts; ++i)
6684     {
6685       x = XVECEXP (vals, 0, i);
6686       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6687         ++n_var, one_var = i;
6688
6689       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6690         all_same = false;
6691     }
6692
6693   if (n_var == 0)
6694     {
6695       rtx constant = aarch64_simd_make_constant (vals);
6696       if (constant != NULL_RTX)
6697         {
6698           emit_move_insn (target, constant);
6699           return;
6700         }
6701     }
6702
6703   /* Splat a single non-constant element if we can.  */
6704   if (all_same)
6705     {
6706       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6707       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6708       return;
6709     }
6710
6711   /* One field is non-constant.  Load constant then overwrite varying
6712      field.  This is more efficient than using the stack.  */
6713   if (n_var == 1)
6714     {
6715       rtx copy = copy_rtx (vals);
6716       rtx index = GEN_INT (one_var);
6717       enum insn_code icode;
6718
6719       /* Load constant part of vector, substitute neighboring value for
6720          varying element.  */
6721       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6722       aarch64_expand_vector_init (target, copy);
6723
6724       /* Insert variable.  */
6725       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6726       icode = optab_handler (vec_set_optab, mode);
6727       gcc_assert (icode != CODE_FOR_nothing);
6728       emit_insn (GEN_FCN (icode) (target, x, index));
6729       return;
6730     }
6731
6732   /* Construct the vector in memory one field at a time
6733      and load the whole vector.  */
6734   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6735   for (i = 0; i < n_elts; i++)
6736     emit_move_insn (adjust_address_nv (mem, inner_mode,
6737                                     i * GET_MODE_SIZE (inner_mode)),
6738                     XVECEXP (vals, 0, i));
6739   emit_move_insn (target, mem);
6740
6741 }
6742
6743 static unsigned HOST_WIDE_INT
6744 aarch64_shift_truncation_mask (enum machine_mode mode)
6745 {
6746   return
6747     (aarch64_vector_mode_supported_p (mode)
6748      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6749 }
6750
6751 #ifndef TLS_SECTION_ASM_FLAG
6752 #define TLS_SECTION_ASM_FLAG 'T'
6753 #endif
6754
6755 void
6756 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6757                                tree decl ATTRIBUTE_UNUSED)
6758 {
6759   char flagchars[10], *f = flagchars;
6760
6761   /* If we have already declared this section, we can use an
6762      abbreviated form to switch back to it -- unless this section is
6763      part of a COMDAT groups, in which case GAS requires the full
6764      declaration every time.  */
6765   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6766       && (flags & SECTION_DECLARED))
6767     {
6768       fprintf (asm_out_file, "\t.section\t%s\n", name);
6769       return;
6770     }
6771
6772   if (!(flags & SECTION_DEBUG))
6773     *f++ = 'a';
6774   if (flags & SECTION_WRITE)
6775     *f++ = 'w';
6776   if (flags & SECTION_CODE)
6777     *f++ = 'x';
6778   if (flags & SECTION_SMALL)
6779     *f++ = 's';
6780   if (flags & SECTION_MERGE)
6781     *f++ = 'M';
6782   if (flags & SECTION_STRINGS)
6783     *f++ = 'S';
6784   if (flags & SECTION_TLS)
6785     *f++ = TLS_SECTION_ASM_FLAG;
6786   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6787     *f++ = 'G';
6788   *f = '\0';
6789
6790   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6791
6792   if (!(flags & SECTION_NOTYPE))
6793     {
6794       const char *type;
6795       const char *format;
6796
6797       if (flags & SECTION_BSS)
6798         type = "nobits";
6799       else
6800         type = "progbits";
6801
6802 #ifdef TYPE_OPERAND_FMT
6803       format = "," TYPE_OPERAND_FMT;
6804 #else
6805       format = ",@%s";
6806 #endif
6807
6808       fprintf (asm_out_file, format, type);
6809
6810       if (flags & SECTION_ENTSIZE)
6811         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6812       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6813         {
6814           if (TREE_CODE (decl) == IDENTIFIER_NODE)
6815             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6816           else
6817             fprintf (asm_out_file, ",%s,comdat",
6818                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6819         }
6820     }
6821
6822   putc ('\n', asm_out_file);
6823 }
6824
6825 /* Select a format to encode pointers in exception handling data.  */
6826 int
6827 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6828 {
6829    int type;
6830    switch (aarch64_cmodel)
6831      {
6832      case AARCH64_CMODEL_TINY:
6833      case AARCH64_CMODEL_TINY_PIC:
6834      case AARCH64_CMODEL_SMALL:
6835      case AARCH64_CMODEL_SMALL_PIC:
6836        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6837           for everything.  */
6838        type = DW_EH_PE_sdata4;
6839        break;
6840      default:
6841        /* No assumptions here.  8-byte relocs required.  */
6842        type = DW_EH_PE_sdata8;
6843        break;
6844      }
6845    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6846 }
6847
6848 /* Emit load exclusive.  */
6849
6850 static void
6851 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6852                              rtx mem, rtx model_rtx)
6853 {
6854   rtx (*gen) (rtx, rtx, rtx);
6855
6856   switch (mode)
6857     {
6858     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6859     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6860     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6861     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6862     default:
6863       gcc_unreachable ();
6864     }
6865
6866   emit_insn (gen (rval, mem, model_rtx));
6867 }
6868
6869 /* Emit store exclusive.  */
6870
6871 static void
6872 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6873                               rtx rval, rtx mem, rtx model_rtx)
6874 {
6875   rtx (*gen) (rtx, rtx, rtx, rtx);
6876
6877   switch (mode)
6878     {
6879     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6880     case HImode: gen = gen_aarch64_store_exclusivehi; break;
6881     case SImode: gen = gen_aarch64_store_exclusivesi; break;
6882     case DImode: gen = gen_aarch64_store_exclusivedi; break;
6883     default:
6884       gcc_unreachable ();
6885     }
6886
6887   emit_insn (gen (bval, rval, mem, model_rtx));
6888 }
6889
6890 /* Mark the previous jump instruction as unlikely.  */
6891
6892 static void
6893 aarch64_emit_unlikely_jump (rtx insn)
6894 {
6895   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6896
6897   insn = emit_jump_insn (insn);
6898   add_reg_note (insn, REG_BR_PROB, very_unlikely);
6899 }
6900
6901 /* Expand a compare and swap pattern.  */
6902
6903 void
6904 aarch64_expand_compare_and_swap (rtx operands[])
6905 {
6906   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6907   enum machine_mode mode, cmp_mode;
6908   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6909
6910   bval = operands[0];
6911   rval = operands[1];
6912   mem = operands[2];
6913   oldval = operands[3];
6914   newval = operands[4];
6915   is_weak = operands[5];
6916   mod_s = operands[6];
6917   mod_f = operands[7];
6918   mode = GET_MODE (mem);
6919   cmp_mode = mode;
6920
6921   /* Normally the succ memory model must be stronger than fail, but in the
6922      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6923      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
6924
6925   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6926       && INTVAL (mod_s) == MEMMODEL_RELEASE)
6927     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6928
6929   switch (mode)
6930     {
6931     case QImode:
6932     case HImode:
6933       /* For short modes, we're going to perform the comparison in SImode,
6934          so do the zero-extension now.  */
6935       cmp_mode = SImode;
6936       rval = gen_reg_rtx (SImode);
6937       oldval = convert_modes (SImode, mode, oldval, true);
6938       /* Fall through.  */
6939
6940     case SImode:
6941     case DImode:
6942       /* Force the value into a register if needed.  */
6943       if (!aarch64_plus_operand (oldval, mode))
6944         oldval = force_reg (cmp_mode, oldval);
6945       break;
6946
6947     default:
6948       gcc_unreachable ();
6949     }
6950
6951   switch (mode)
6952     {
6953     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6954     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6955     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6956     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6957     default:
6958       gcc_unreachable ();
6959     }
6960
6961   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6962
6963   if (mode == QImode || mode == HImode)
6964     emit_move_insn (operands[1], gen_lowpart (mode, rval));
6965
6966   x = gen_rtx_REG (CCmode, CC_REGNUM);
6967   x = gen_rtx_EQ (SImode, x, const0_rtx);
6968   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6969 }
6970
6971 /* Split a compare and swap pattern.  */
6972
6973 void
6974 aarch64_split_compare_and_swap (rtx operands[])
6975 {
6976   rtx rval, mem, oldval, newval, scratch;
6977   enum machine_mode mode;
6978   bool is_weak;
6979   rtx label1, label2, x, cond;
6980
6981   rval = operands[0];
6982   mem = operands[1];
6983   oldval = operands[2];
6984   newval = operands[3];
6985   is_weak = (operands[4] != const0_rtx);
6986   scratch = operands[7];
6987   mode = GET_MODE (mem);
6988
6989   label1 = NULL_RTX;
6990   if (!is_weak)
6991     {
6992       label1 = gen_label_rtx ();
6993       emit_label (label1);
6994     }
6995   label2 = gen_label_rtx ();
6996
6997   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6998
6999   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7000   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7001   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7002                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7003   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7004
7005   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7006
7007   if (!is_weak)
7008     {
7009       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7010       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7011                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7012       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7013     }
7014   else
7015     {
7016       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7017       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7018       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7019     }
7020
7021   emit_label (label2);
7022 }
7023
7024 /* Split an atomic operation.  */
7025
7026 void
7027 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7028                      rtx value, rtx model_rtx, rtx cond)
7029 {
7030   enum machine_mode mode = GET_MODE (mem);
7031   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7032   rtx label, x;
7033
7034   label = gen_label_rtx ();
7035   emit_label (label);
7036
7037   if (new_out)
7038     new_out = gen_lowpart (wmode, new_out);
7039   if (old_out)
7040     old_out = gen_lowpart (wmode, old_out);
7041   else
7042     old_out = new_out;
7043   value = simplify_gen_subreg (wmode, value, mode, 0);
7044
7045   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7046
7047   switch (code)
7048     {
7049     case SET:
7050       new_out = value;
7051       break;
7052
7053     case NOT:
7054       x = gen_rtx_AND (wmode, old_out, value);
7055       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7056       x = gen_rtx_NOT (wmode, new_out);
7057       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7058       break;
7059
7060     case MINUS:
7061       if (CONST_INT_P (value))
7062         {
7063           value = GEN_INT (-INTVAL (value));
7064           code = PLUS;
7065         }
7066       /* Fall through.  */
7067
7068     default:
7069       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7070       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7071       break;
7072     }
7073
7074   aarch64_emit_store_exclusive (mode, cond, mem,
7075                                 gen_lowpart (mode, new_out), model_rtx);
7076
7077   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7078   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7079                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7080   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7081 }
7082
7083 static void
7084 aarch64_print_extension (void)
7085 {
7086   const struct aarch64_option_extension *opt = NULL;
7087
7088   for (opt = all_extensions; opt->name != NULL; opt++)
7089     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7090       asm_fprintf (asm_out_file, "+%s", opt->name);
7091
7092   asm_fprintf (asm_out_file, "\n");
7093 }
7094
7095 static void
7096 aarch64_start_file (void)
7097 {
7098   if (selected_arch)
7099     {
7100       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7101       aarch64_print_extension ();
7102     }
7103   else if (selected_cpu)
7104     {
7105       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7106       aarch64_print_extension ();
7107     }
7108   default_file_start();
7109 }
7110
7111 /* Target hook for c_mode_for_suffix.  */
7112 static enum machine_mode
7113 aarch64_c_mode_for_suffix (char suffix)
7114 {
7115   if (suffix == 'q')
7116     return TFmode;
7117
7118   return VOIDmode;
7119 }
7120
7121 /* We can only represent floating point constants which will fit in
7122    "quarter-precision" values.  These values are characterised by
7123    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7124    by:
7125
7126    (-1)^s * (n/16) * 2^r
7127
7128    Where:
7129      's' is the sign bit.
7130      'n' is an integer in the range 16 <= n <= 31.
7131      'r' is an integer in the range -3 <= r <= 4.  */
7132
7133 /* Return true iff X can be represented by a quarter-precision
7134    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7135 bool
7136 aarch64_float_const_representable_p (rtx x)
7137 {
7138   /* This represents our current view of how many bits
7139      make up the mantissa.  */
7140   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7141   int exponent;
7142   unsigned HOST_WIDE_INT mantissa, mask;
7143   HOST_WIDE_INT m1, m2;
7144   REAL_VALUE_TYPE r, m;
7145
7146   if (!CONST_DOUBLE_P (x))
7147     return false;
7148
7149   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7150
7151   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7152      know if we have +zero until we analyse the mantissa, but we
7153      can reject the other invalid values.  */
7154   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7155       || REAL_VALUE_MINUS_ZERO (r))
7156     return false;
7157
7158   /* Extract exponent.  */
7159   r = real_value_abs (&r);
7160   exponent = REAL_EXP (&r);
7161
7162   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7163      highest (sign) bit, with a fixed binary point at bit point_pos.
7164      m1 holds the low part of the mantissa, m2 the high part.
7165      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7166      bits for the mantissa, this can fail (low bits will be lost).  */
7167   real_ldexp (&m, &r, point_pos - exponent);
7168   REAL_VALUE_TO_INT (&m1, &m2, m);
7169
7170   /* If the low part of the mantissa has bits set we cannot represent
7171      the value.  */
7172   if (m1 != 0)
7173     return false;
7174   /* We have rejected the lower HOST_WIDE_INT, so update our
7175      understanding of how many bits lie in the mantissa and
7176      look only at the high HOST_WIDE_INT.  */
7177   mantissa = m2;
7178   point_pos -= HOST_BITS_PER_WIDE_INT;
7179
7180   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7181   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7182   if ((mantissa & mask) != 0)
7183     return false;
7184
7185   /* Having filtered unrepresentable values, we may now remove all
7186      but the highest 5 bits.  */
7187   mantissa >>= point_pos - 5;
7188
7189   /* We cannot represent the value 0.0, so reject it.  This is handled
7190      elsewhere.  */
7191   if (mantissa == 0)
7192     return false;
7193
7194   /* Then, as bit 4 is always set, we can mask it off, leaving
7195      the mantissa in the range [0, 15].  */
7196   mantissa &= ~(1 << 4);
7197   gcc_assert (mantissa <= 15);
7198
7199   /* GCC internally does not use IEEE754-like encoding (where normalized
7200      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7201      Our mantissa values are shifted 4 places to the left relative to
7202      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7203      by 5 places to correct for GCC's representation.  */
7204   exponent = 5 - exponent;
7205
7206   return (exponent >= 0 && exponent <= 7);
7207 }
7208
7209 char*
7210 aarch64_output_simd_mov_immediate (rtx *const_vector,
7211                                    enum machine_mode mode,
7212                                    unsigned width)
7213 {
7214   int is_valid;
7215   unsigned char widthc;
7216   int lane_width_bits;
7217   static char templ[40];
7218   int shift = 0, mvn = 0;
7219   const char *mnemonic;
7220   unsigned int lane_count = 0;
7221
7222   is_valid =
7223     aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7224                                            const_vector, &lane_width_bits,
7225                                            &widthc, &mvn, &shift);
7226   gcc_assert (is_valid);
7227
7228   mode = GET_MODE_INNER (mode);
7229   if (mode == SFmode || mode == DFmode)
7230     {
7231       bool zero_p =
7232         aarch64_float_const_zero_rtx_p (*const_vector);
7233       gcc_assert (shift == 0);
7234       mnemonic = zero_p ? "movi" : "fmov";
7235     }
7236   else
7237     mnemonic = mvn ? "mvni" : "movi";
7238
7239   gcc_assert (lane_width_bits != 0);
7240   lane_count = width / lane_width_bits;
7241
7242   if (lane_count == 1)
7243     snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7244   else if (shift)
7245     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7246               mnemonic, lane_count, widthc, shift);
7247   else
7248     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7249               mnemonic, lane_count, widthc);
7250   return templ;
7251 }
7252
7253 /* Split operands into moves from op[1] + op[2] into op[0].  */
7254
7255 void
7256 aarch64_split_combinev16qi (rtx operands[3])
7257 {
7258   unsigned int dest = REGNO (operands[0]);
7259   unsigned int src1 = REGNO (operands[1]);
7260   unsigned int src2 = REGNO (operands[2]);
7261   enum machine_mode halfmode = GET_MODE (operands[1]);
7262   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7263   rtx destlo, desthi;
7264
7265   gcc_assert (halfmode == V16QImode);
7266
7267   if (src1 == dest && src2 == dest + halfregs)
7268     {
7269       /* No-op move.  Can't split to nothing; emit something.  */
7270       emit_note (NOTE_INSN_DELETED);
7271       return;
7272     }
7273
7274   /* Preserve register attributes for variable tracking.  */
7275   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7276   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7277                                GET_MODE_SIZE (halfmode));
7278
7279   /* Special case of reversed high/low parts.  */
7280   if (reg_overlap_mentioned_p (operands[2], destlo)
7281       && reg_overlap_mentioned_p (operands[1], desthi))
7282     {
7283       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7284       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7285       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7286     }
7287   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7288     {
7289       /* Try to avoid unnecessary moves if part of the result
7290          is in the right place already.  */
7291       if (src1 != dest)
7292         emit_move_insn (destlo, operands[1]);
7293       if (src2 != dest + halfregs)
7294         emit_move_insn (desthi, operands[2]);
7295     }
7296   else
7297     {
7298       if (src2 != dest + halfregs)
7299         emit_move_insn (desthi, operands[2]);
7300       if (src1 != dest)
7301         emit_move_insn (destlo, operands[1]);
7302     }
7303 }
7304
7305 /* vec_perm support.  */
7306
7307 #define MAX_VECT_LEN 16
7308
7309 struct expand_vec_perm_d
7310 {
7311   rtx target, op0, op1;
7312   unsigned char perm[MAX_VECT_LEN];
7313   enum machine_mode vmode;
7314   unsigned char nelt;
7315   bool one_vector_p;
7316   bool testing_p;
7317 };
7318
7319 /* Generate a variable permutation.  */
7320
7321 static void
7322 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7323 {
7324   enum machine_mode vmode = GET_MODE (target);
7325   bool one_vector_p = rtx_equal_p (op0, op1);
7326
7327   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7328   gcc_checking_assert (GET_MODE (op0) == vmode);
7329   gcc_checking_assert (GET_MODE (op1) == vmode);
7330   gcc_checking_assert (GET_MODE (sel) == vmode);
7331   gcc_checking_assert (TARGET_SIMD);
7332
7333   if (one_vector_p)
7334     {
7335       if (vmode == V8QImode)
7336         {
7337           /* Expand the argument to a V16QI mode by duplicating it.  */
7338           rtx pair = gen_reg_rtx (V16QImode);
7339           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7340           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7341         }
7342       else
7343         {
7344           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7345         }
7346     }
7347   else
7348     {
7349       rtx pair;
7350
7351       if (vmode == V8QImode)
7352         {
7353           pair = gen_reg_rtx (V16QImode);
7354           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7355           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7356         }
7357       else
7358         {
7359           pair = gen_reg_rtx (OImode);
7360           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7361           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7362         }
7363     }
7364 }
7365
7366 void
7367 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7368 {
7369   enum machine_mode vmode = GET_MODE (target);
7370   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7371   bool one_vector_p = rtx_equal_p (op0, op1);
7372   rtx rmask[MAX_VECT_LEN], mask;
7373
7374   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7375
7376   /* The TBL instruction does not use a modulo index, so we must take care
7377      of that ourselves.  */
7378   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7379   for (i = 0; i < nelt; ++i)
7380     rmask[i] = mask;
7381   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7382   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7383
7384   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7385 }
7386
7387 /* Recognize patterns suitable for the TRN instructions.  */
7388 static bool
7389 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7390 {
7391   unsigned int i, odd, mask, nelt = d->nelt;
7392   rtx out, in0, in1, x;
7393   rtx (*gen) (rtx, rtx, rtx);
7394   enum machine_mode vmode = d->vmode;
7395
7396   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7397     return false;
7398
7399   /* Note that these are little-endian tests.
7400      We correct for big-endian later.  */
7401   if (d->perm[0] == 0)
7402     odd = 0;
7403   else if (d->perm[0] == 1)
7404     odd = 1;
7405   else
7406     return false;
7407   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7408
7409   for (i = 0; i < nelt; i += 2)
7410     {
7411       if (d->perm[i] != i + odd)
7412         return false;
7413       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7414         return false;
7415     }
7416
7417   /* Success!  */
7418   if (d->testing_p)
7419     return true;
7420
7421   in0 = d->op0;
7422   in1 = d->op1;
7423   if (BYTES_BIG_ENDIAN)
7424     {
7425       x = in0, in0 = in1, in1 = x;
7426       odd = !odd;
7427     }
7428   out = d->target;
7429
7430   if (odd)
7431     {
7432       switch (vmode)
7433         {
7434         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7435         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7436         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7437         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7438         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7439         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7440         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7441         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7442         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7443         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7444         default:
7445           return false;
7446         }
7447     }
7448   else
7449     {
7450       switch (vmode)
7451         {
7452         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7453         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7454         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7455         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7456         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7457         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7458         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7459         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7460         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7461         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7462         default:
7463           return false;
7464         }
7465     }
7466
7467   emit_insn (gen (out, in0, in1));
7468   return true;
7469 }
7470
7471 /* Recognize patterns suitable for the UZP instructions.  */
7472 static bool
7473 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7474 {
7475   unsigned int i, odd, mask, nelt = d->nelt;
7476   rtx out, in0, in1, x;
7477   rtx (*gen) (rtx, rtx, rtx);
7478   enum machine_mode vmode = d->vmode;
7479
7480   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7481     return false;
7482
7483   /* Note that these are little-endian tests.
7484      We correct for big-endian later.  */
7485   if (d->perm[0] == 0)
7486     odd = 0;
7487   else if (d->perm[0] == 1)
7488     odd = 1;
7489   else
7490     return false;
7491   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7492
7493   for (i = 0; i < nelt; i++)
7494     {
7495       unsigned elt = (i * 2 + odd) & mask;
7496       if (d->perm[i] != elt)
7497         return false;
7498     }
7499
7500   /* Success!  */
7501   if (d->testing_p)
7502     return true;
7503
7504   in0 = d->op0;
7505   in1 = d->op1;
7506   if (BYTES_BIG_ENDIAN)
7507     {
7508       x = in0, in0 = in1, in1 = x;
7509       odd = !odd;
7510     }
7511   out = d->target;
7512
7513   if (odd)
7514     {
7515       switch (vmode)
7516         {
7517         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7518         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7519         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7520         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7521         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7522         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7523         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7524         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7525         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7526         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7527         default:
7528           return false;
7529         }
7530     }
7531   else
7532     {
7533       switch (vmode)
7534         {
7535         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7536         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7537         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7538         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7539         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7540         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7541         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7542         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7543         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7544         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7545         default:
7546           return false;
7547         }
7548     }
7549
7550   emit_insn (gen (out, in0, in1));
7551   return true;
7552 }
7553
7554 /* Recognize patterns suitable for the ZIP instructions.  */
7555 static bool
7556 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7557 {
7558   unsigned int i, high, mask, nelt = d->nelt;
7559   rtx out, in0, in1, x;
7560   rtx (*gen) (rtx, rtx, rtx);
7561   enum machine_mode vmode = d->vmode;
7562
7563   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7564     return false;
7565
7566   /* Note that these are little-endian tests.
7567      We correct for big-endian later.  */
7568   high = nelt / 2;
7569   if (d->perm[0] == high)
7570     /* Do Nothing.  */
7571     ;
7572   else if (d->perm[0] == 0)
7573     high = 0;
7574   else
7575     return false;
7576   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7577
7578   for (i = 0; i < nelt / 2; i++)
7579     {
7580       unsigned elt = (i + high) & mask;
7581       if (d->perm[i * 2] != elt)
7582         return false;
7583       elt = (elt + nelt) & mask;
7584       if (d->perm[i * 2 + 1] != elt)
7585         return false;
7586     }
7587
7588   /* Success!  */
7589   if (d->testing_p)
7590     return true;
7591
7592   in0 = d->op0;
7593   in1 = d->op1;
7594   if (BYTES_BIG_ENDIAN)
7595     {
7596       x = in0, in0 = in1, in1 = x;
7597       high = !high;
7598     }
7599   out = d->target;
7600
7601   if (high)
7602     {
7603       switch (vmode)
7604         {
7605         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7606         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7607         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7608         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7609         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7610         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7611         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7612         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7613         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7614         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7615         default:
7616           return false;
7617         }
7618     }
7619   else
7620     {
7621       switch (vmode)
7622         {
7623         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7624         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7625         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7626         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7627         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7628         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7629         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7630         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7631         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7632         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7633         default:
7634           return false;
7635         }
7636     }
7637
7638   emit_insn (gen (out, in0, in1));
7639   return true;
7640 }
7641
7642 static bool
7643 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7644 {
7645   rtx rperm[MAX_VECT_LEN], sel;
7646   enum machine_mode vmode = d->vmode;
7647   unsigned int i, nelt = d->nelt;
7648
7649   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7650      numbering of elements for big-endian, we must reverse the order.  */
7651   if (BYTES_BIG_ENDIAN)
7652     return false;
7653
7654   if (d->testing_p)
7655     return true;
7656
7657   /* Generic code will try constant permutation twice.  Once with the
7658      original mode and again with the elements lowered to QImode.
7659      So wait and don't do the selector expansion ourselves.  */
7660   if (vmode != V8QImode && vmode != V16QImode)
7661     return false;
7662
7663   for (i = 0; i < nelt; ++i)
7664     rperm[i] = GEN_INT (d->perm[i]);
7665   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7666   sel = force_reg (vmode, sel);
7667
7668   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7669   return true;
7670 }
7671
7672 static bool
7673 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7674 {
7675   /* The pattern matching functions above are written to look for a small
7676      number to begin the sequence (0, 1, N/2).  If we begin with an index
7677      from the second operand, we can swap the operands.  */
7678   if (d->perm[0] >= d->nelt)
7679     {
7680       unsigned i, nelt = d->nelt;
7681       rtx x;
7682
7683       for (i = 0; i < nelt; ++i)
7684         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7685
7686       x = d->op0;
7687       d->op0 = d->op1;
7688       d->op1 = x;
7689     }
7690
7691   if (TARGET_SIMD)
7692     {
7693       if (aarch64_evpc_zip (d))
7694         return true;
7695       else if (aarch64_evpc_uzp (d))
7696         return true;
7697       else if (aarch64_evpc_trn (d))
7698         return true;
7699       return aarch64_evpc_tbl (d);
7700     }
7701   return false;
7702 }
7703
7704 /* Expand a vec_perm_const pattern.  */
7705
7706 bool
7707 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7708 {
7709   struct expand_vec_perm_d d;
7710   int i, nelt, which;
7711
7712   d.target = target;
7713   d.op0 = op0;
7714   d.op1 = op1;
7715
7716   d.vmode = GET_MODE (target);
7717   gcc_assert (VECTOR_MODE_P (d.vmode));
7718   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7719   d.testing_p = false;
7720
7721   for (i = which = 0; i < nelt; ++i)
7722     {
7723       rtx e = XVECEXP (sel, 0, i);
7724       int ei = INTVAL (e) & (2 * nelt - 1);
7725       which |= (ei < nelt ? 1 : 2);
7726       d.perm[i] = ei;
7727     }
7728
7729   switch (which)
7730     {
7731     default:
7732       gcc_unreachable ();
7733
7734     case 3:
7735       d.one_vector_p = false;
7736       if (!rtx_equal_p (op0, op1))
7737         break;
7738
7739       /* The elements of PERM do not suggest that only the first operand
7740          is used, but both operands are identical.  Allow easier matching
7741          of the permutation by folding the permutation into the single
7742          input vector.  */
7743       /* Fall Through.  */
7744     case 2:
7745       for (i = 0; i < nelt; ++i)
7746         d.perm[i] &= nelt - 1;
7747       d.op0 = op1;
7748       d.one_vector_p = true;
7749       break;
7750
7751     case 1:
7752       d.op1 = op0;
7753       d.one_vector_p = true;
7754       break;
7755     }
7756
7757   return aarch64_expand_vec_perm_const_1 (&d);
7758 }
7759
7760 static bool
7761 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7762                                      const unsigned char *sel)
7763 {
7764   struct expand_vec_perm_d d;
7765   unsigned int i, nelt, which;
7766   bool ret;
7767
7768   d.vmode = vmode;
7769   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7770   d.testing_p = true;
7771   memcpy (d.perm, sel, nelt);
7772
7773   /* Calculate whether all elements are in one vector.  */
7774   for (i = which = 0; i < nelt; ++i)
7775     {
7776       unsigned char e = d.perm[i];
7777       gcc_assert (e < 2 * nelt);
7778       which |= (e < nelt ? 1 : 2);
7779     }
7780
7781   /* If all elements are from the second vector, reindex as if from the
7782      first vector.  */
7783   if (which == 2)
7784     for (i = 0; i < nelt; ++i)
7785       d.perm[i] -= nelt;
7786
7787   /* Check whether the mask can be applied to a single vector.  */
7788   d.one_vector_p = (which != 3);
7789
7790   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7791   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7792   if (!d.one_vector_p)
7793     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7794
7795   start_sequence ();
7796   ret = aarch64_expand_vec_perm_const_1 (&d);
7797   end_sequence ();
7798
7799   return ret;
7800 }
7801
7802 #undef TARGET_ADDRESS_COST
7803 #define TARGET_ADDRESS_COST aarch64_address_cost
7804
7805 /* This hook will determines whether unnamed bitfields affect the alignment
7806    of the containing structure.  The hook returns true if the structure
7807    should inherit the alignment requirements of an unnamed bitfield's
7808    type.  */
7809 #undef TARGET_ALIGN_ANON_BITFIELD
7810 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7811
7812 #undef TARGET_ASM_ALIGNED_DI_OP
7813 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7814
7815 #undef TARGET_ASM_ALIGNED_HI_OP
7816 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7817
7818 #undef TARGET_ASM_ALIGNED_SI_OP
7819 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7820
7821 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7822 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7823   hook_bool_const_tree_hwi_hwi_const_tree_true
7824
7825 #undef TARGET_ASM_FILE_START
7826 #define TARGET_ASM_FILE_START aarch64_start_file
7827
7828 #undef TARGET_ASM_OUTPUT_MI_THUNK
7829 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7830
7831 #undef TARGET_ASM_SELECT_RTX_SECTION
7832 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7833
7834 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7835 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7836
7837 #undef TARGET_BUILD_BUILTIN_VA_LIST
7838 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7839
7840 #undef TARGET_CALLEE_COPIES
7841 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7842
7843 #undef TARGET_CAN_ELIMINATE
7844 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7845
7846 #undef TARGET_CANNOT_FORCE_CONST_MEM
7847 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7848
7849 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7850 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7851
7852 /* Only the least significant bit is used for initialization guard
7853    variables.  */
7854 #undef TARGET_CXX_GUARD_MASK_BIT
7855 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7856
7857 #undef TARGET_C_MODE_FOR_SUFFIX
7858 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7859
7860 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7861 #undef  TARGET_DEFAULT_TARGET_FLAGS
7862 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7863 #endif
7864
7865 #undef TARGET_CLASS_MAX_NREGS
7866 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7867
7868 #undef TARGET_BUILTIN_DECL
7869 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7870
7871 #undef  TARGET_EXPAND_BUILTIN
7872 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7873
7874 #undef TARGET_EXPAND_BUILTIN_VA_START
7875 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7876
7877 #undef TARGET_FOLD_BUILTIN
7878 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7879
7880 #undef TARGET_FUNCTION_ARG
7881 #define TARGET_FUNCTION_ARG aarch64_function_arg
7882
7883 #undef TARGET_FUNCTION_ARG_ADVANCE
7884 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7885
7886 #undef TARGET_FUNCTION_ARG_BOUNDARY
7887 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7888
7889 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7890 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7891
7892 #undef TARGET_FUNCTION_VALUE
7893 #define TARGET_FUNCTION_VALUE aarch64_function_value
7894
7895 #undef TARGET_FUNCTION_VALUE_REGNO_P
7896 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7897
7898 #undef TARGET_FRAME_POINTER_REQUIRED
7899 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7900
7901 #undef TARGET_GIMPLE_FOLD_BUILTIN
7902 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
7903
7904 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7905 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7906
7907 #undef  TARGET_INIT_BUILTINS
7908 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
7909
7910 #undef TARGET_LEGITIMATE_ADDRESS_P
7911 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7912
7913 #undef TARGET_LEGITIMATE_CONSTANT_P
7914 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7915
7916 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7917 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7918
7919 #undef TARGET_MANGLE_TYPE
7920 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7921
7922 #undef TARGET_MEMORY_MOVE_COST
7923 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7924
7925 #undef TARGET_MUST_PASS_IN_STACK
7926 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7927
7928 /* This target hook should return true if accesses to volatile bitfields
7929    should use the narrowest mode possible.  It should return false if these
7930    accesses should use the bitfield container type.  */
7931 #undef TARGET_NARROW_VOLATILE_BITFIELD
7932 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7933
7934 #undef  TARGET_OPTION_OVERRIDE
7935 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7936
7937 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7938 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7939   aarch64_override_options_after_change
7940
7941 #undef TARGET_PASS_BY_REFERENCE
7942 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7943
7944 #undef TARGET_PREFERRED_RELOAD_CLASS
7945 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7946
7947 #undef TARGET_SECONDARY_RELOAD
7948 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7949
7950 #undef TARGET_SHIFT_TRUNCATION_MASK
7951 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7952
7953 #undef TARGET_SETUP_INCOMING_VARARGS
7954 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7955
7956 #undef TARGET_STRUCT_VALUE_RTX
7957 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
7958
7959 #undef TARGET_REGISTER_MOVE_COST
7960 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7961
7962 #undef TARGET_RETURN_IN_MEMORY
7963 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7964
7965 #undef TARGET_RETURN_IN_MSB
7966 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7967
7968 #undef TARGET_RTX_COSTS
7969 #define TARGET_RTX_COSTS aarch64_rtx_costs
7970
7971 #undef TARGET_TRAMPOLINE_INIT
7972 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7973
7974 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7975 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7976
7977 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7978 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7979
7980 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7981 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7982
7983 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7984 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7985
7986 #undef TARGET_VECTORIZE_BUILTINS
7987 #define TARGET_VECTORIZE_BUILTINS
7988
7989 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7990 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7991   aarch64_builtin_vectorized_function
7992
7993 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7994 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7995   aarch64_autovectorize_vector_sizes
7996
7997 /* Section anchor support.  */
7998
7999 #undef TARGET_MIN_ANCHOR_OFFSET
8000 #define TARGET_MIN_ANCHOR_OFFSET -256
8001
8002 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8003    byte offset; we can do much more for larger data types, but have no way
8004    to determine the size of the access.  We assume accesses are aligned.  */
8005 #undef TARGET_MAX_ANCHOR_OFFSET
8006 #define TARGET_MAX_ANCHOR_OFFSET 4095
8007
8008 #undef TARGET_VECTOR_ALIGNMENT
8009 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8010
8011 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8012 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8013   aarch64_simd_vector_alignment_reachable
8014
8015 /* vec_perm support.  */
8016
8017 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8018 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8019   aarch64_vectorize_vec_perm_const_ok
8020
8021
8022 #undef TARGET_FIXED_CONDITION_CODE_REGS
8023 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8024
8025 struct gcc_target targetm = TARGET_INITIALIZER;
8026
8027 #include "gt-aarch64.h"