gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48
  49 /* Classifies an address.
  50
  51    ADDRESS_REG_IMM
  52        A simple base register plus immediate offset.
  53
  54    ADDRESS_REG_WB
  55        A base register indexed by immediate offset with writeback.
  56
  57    ADDRESS_REG_REG
  58        A base register indexed by (optionally scaled) register.
  59
  60    ADDRESS_REG_UXTW
  61        A base register indexed by (optionally scaled) zero-extended register.
  62
  63    ADDRESS_REG_SXTW
  64        A base register indexed by (optionally scaled) sign-extended register.
  65
  66    ADDRESS_LO_SUM
  67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  68
  69    ADDRESS_SYMBOLIC:
  70        A constant symbolic address, in pc-relative literal pool.  */
  71
  72 enum aarch64_address_type {
  73   ADDRESS_REG_IMM,
  74   ADDRESS_REG_WB,
  75   ADDRESS_REG_REG,
  76   ADDRESS_REG_UXTW,
  77   ADDRESS_REG_SXTW,
  78   ADDRESS_LO_SUM,
  79   ADDRESS_SYMBOLIC
  80 };
  81
  82 struct aarch64_address_info {
  83   enum aarch64_address_type type;
  84   rtx base;
  85   rtx offset;
  86   int shift;
  87   enum aarch64_symbol_type symbol_type;
  88 };
  89
  90 /* The current code model.  */
  91 enum aarch64_code_model aarch64_cmodel;
  92
  93 #ifdef HAVE_AS_TLS
  94 #undef TARGET_HAVE_TLS
  95 #define TARGET_HAVE_TLS 1
  96 #endif
  97
  98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
  99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 100                                                      const_tree,
 101                                                      enum machine_mode *, int *,
 102                                                      bool *);
 103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 105 static void aarch64_override_options_after_change (void);
 106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
 107                                          int *, unsigned char *, int *, int *);
 108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 109 static unsigned bit_count (unsigned HOST_WIDE_INT);
 110 static bool aarch64_const_vec_all_same_int_p (rtx,
 111                                               HOST_WIDE_INT, HOST_WIDE_INT);
 112
 113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 114                                                  const unsigned char *sel);
 115
 116 /* The processor for which instructions should be scheduled.  */
 117 enum aarch64_processor aarch64_tune = generic;
 118
 119 /* The current tuning set.  */
 120 const struct tune_params *aarch64_tune_params;
 121
 122 /* Mask to specify which instructions we are allowed to generate.  */
 123 unsigned long aarch64_isa_flags = 0;
 124
 125 /* Mask to specify which instruction scheduling options should be used.  */
 126 unsigned long aarch64_tune_flags = 0;
 127
 128 /* Tuning parameters.  */
 129
 130 #if HAVE_DESIGNATED_INITIALIZERS
 131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 132 #else
 133 #define NAMED_PARAM(NAME, VAL) (VAL)
 134 #endif
 135
 136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 137 __extension__
 138 #endif
 139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 140 {
 141   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 142   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 143   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 144   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 145   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 146   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 147   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 148   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 149   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 150   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 151   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 152   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 153 };
 154
 155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 156 __extension__
 157 #endif
 158 static const struct cpu_addrcost_table generic_addrcost_table =
 159 {
 160   NAMED_PARAM (pre_modify, 0),
 161   NAMED_PARAM (post_modify, 0),
 162   NAMED_PARAM (register_offset, 0),
 163   NAMED_PARAM (register_extend, 0),
 164   NAMED_PARAM (imm_offset, 0)
 165 };
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170 static const struct cpu_regmove_cost generic_regmove_cost =
 171 {
 172   NAMED_PARAM (GP2GP, 1),
 173   NAMED_PARAM (GP2FP, 2),
 174   NAMED_PARAM (FP2GP, 2),
 175   /* We currently do not provide direct support for TFmode Q->Q move.
 176      Therefore we need to raise the cost above 2 in order to have
 177      reload handle the situation.  */
 178   NAMED_PARAM (FP2FP, 4)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct tune_params generic_tunings =
 185 {
 186   &generic_rtx_cost_table,
 187   &generic_addrcost_table,
 188   &generic_regmove_cost,
 189   NAMED_PARAM (memmov_cost, 4)
 190 };
 191
 192 /* A processor implementing AArch64.  */
 193 struct processor
 194 {
 195   const char *const name;
 196   enum aarch64_processor core;
 197   const char *arch;
 198   const unsigned long flags;
 199   const struct tune_params *const tune;
 200 };
 201
 202 /* Processor cores implementing AArch64.  */
 203 static const struct processor all_cores[] =
 204 {
 205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 206   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 207 #include "aarch64-cores.def"
 208 #undef AARCH64_CORE
 209   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 210   {NULL, aarch64_none, NULL, 0, NULL}
 211 };
 212
 213 /* Architectures implementing AArch64.  */
 214 static const struct processor all_architectures[] =
 215 {
 216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 217   {NAME, CORE, #ARCH, FLAGS, NULL},
 218 #include "aarch64-arches.def"
 219 #undef AARCH64_ARCH
 220   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 221   {NULL, aarch64_none, NULL, 0, NULL}
 222 };
 223
 224 /* Target specification.  These are populated as commandline arguments
 225    are processed, or NULL if not specified.  */
 226 static const struct processor *selected_arch;
 227 static const struct processor *selected_cpu;
 228 static const struct processor *selected_tune;
 229
 230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 231
 232 /* An ISA extension in the co-processor and main instruction set space.  */
 233 struct aarch64_option_extension
 234 {
 235   const char *const name;
 236   const unsigned long flags_on;
 237   const unsigned long flags_off;
 238 };
 239
 240 /* ISA extensions in AArch64.  */
 241 static const struct aarch64_option_extension all_extensions[] =
 242 {
 243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 244   {NAME, FLAGS_ON, FLAGS_OFF},
 245 #include "aarch64-option-extensions.def"
 246 #undef AARCH64_OPT_EXTENSION
 247   {NULL, 0, 0}
 248 };
 249
 250 /* Used to track the size of an address when generating a pre/post
 251    increment address.  */
 252 static enum machine_mode aarch64_memory_reference_mode;
 253
 254 /* Used to force GTY into this file.  */
 255 static GTY(()) int gty_dummy;
 256
 257 /* A table of valid AArch64 "bitmask immediate" values for
 258    logical instructions.  */
 259
 260 #define AARCH64_NUM_BITMASKS  5334
 261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 262
 263 /* Did we set flag_omit_frame_pointer just so
 264    aarch64_frame_pointer_required would be called? */
 265 static bool faked_omit_frame_pointer;
 266
 267 typedef enum aarch64_cond_code
 268 {
 269   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 270   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 271   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 272 }
 273 aarch64_cc;
 274
 275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 276
 277 /* The condition codes of the processor, and the inverse function.  */
 278 static const char * const aarch64_condition_codes[] =
 279 {
 280   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 281   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 282 };
 283
 284 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 285 unsigned
 286 aarch64_dbx_register_number (unsigned regno)
 287 {
 288    if (GP_REGNUM_P (regno))
 289      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 290    else if (regno == SP_REGNUM)
 291      return AARCH64_DWARF_SP;
 292    else if (FP_REGNUM_P (regno))
 293      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 294
 295    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 296       equivalent DWARF register.  */
 297    return DWARF_FRAME_REGISTERS;
 298 }
 299
 300 /* Return TRUE if MODE is any of the large INT modes.  */
 301 static bool
 302 aarch64_vect_struct_mode_p (enum machine_mode mode)
 303 {
 304   return mode == OImode || mode == CImode || mode == XImode;
 305 }
 306
 307 /* Return TRUE if MODE is any of the vector modes.  */
 308 static bool
 309 aarch64_vector_mode_p (enum machine_mode mode)
 310 {
 311   return aarch64_vector_mode_supported_p (mode)
 312          || aarch64_vect_struct_mode_p (mode);
 313 }
 314
 315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 316 static bool
 317 aarch64_array_mode_supported_p (enum machine_mode mode,
 318                                 unsigned HOST_WIDE_INT nelems)
 319 {
 320   if (TARGET_SIMD
 321       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 322       && (nelems >= 2 && nelems <= 4))
 323     return true;
 324
 325   return false;
 326 }
 327
 328 /* Implement HARD_REGNO_NREGS.  */
 329
 330 int
 331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 332 {
 333   switch (aarch64_regno_regclass (regno))
 334     {
 335     case FP_REGS:
 336     case FP_LO_REGS:
 337       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 338     default:
 339       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 340     }
 341   gcc_unreachable ();
 342 }
 343
 344 /* Implement HARD_REGNO_MODE_OK.  */
 345
 346 int
 347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 348 {
 349   if (GET_MODE_CLASS (mode) == MODE_CC)
 350     return regno == CC_REGNUM;
 351
 352   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 353       || regno == ARG_POINTER_REGNUM)
 354     return mode == Pmode;
 355
 356   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 357     return 1;
 358
 359   if (FP_REGNUM_P (regno))
 360     {
 361       if (aarch64_vect_struct_mode_p (mode))
 362         return
 363           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 364       else
 365         return 1;
 366     }
 367
 368   return 0;
 369 }
 370
 371 /* Return true if calls to DECL should be treated as
 372    long-calls (ie called via a register).  */
 373 static bool
 374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 375 {
 376   return false;
 377 }
 378
 379 /* Return true if calls to symbol-ref SYM should be treated as
 380    long-calls (ie called via a register).  */
 381 bool
 382 aarch64_is_long_call_p (rtx sym)
 383 {
 384   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 385 }
 386
 387 /* Return true if the offsets to a zero/sign-extract operation
 388    represent an expression that matches an extend operation.  The
 389    operands represent the paramters from
 390
 391    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 392 bool
 393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 394                                 rtx extract_imm)
 395 {
 396   HOST_WIDE_INT mult_val, extract_val;
 397
 398   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 399     return false;
 400
 401   mult_val = INTVAL (mult_imm);
 402   extract_val = INTVAL (extract_imm);
 403
 404   if (extract_val > 8
 405       && extract_val < GET_MODE_BITSIZE (mode)
 406       && exact_log2 (extract_val & ~7) > 0
 407       && (extract_val & 7) <= 4
 408       && mult_val == (1 << (extract_val & 7)))
 409     return true;
 410
 411   return false;
 412 }
 413
 414 /* Emit an insn that's a simple single-set.  Both the operands must be
 415    known to be valid.  */
 416 inline static rtx
 417 emit_set_insn (rtx x, rtx y)
 418 {
 419   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 420 }
 421
 422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 423    return the rtx for register 0 in the proper mode.  */
 424 rtx
 425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 426 {
 427   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 428   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 429
 430   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 431   return cc_reg;
 432 }
 433
 434 /* Build the SYMBOL_REF for __tls_get_addr.  */
 435
 436 static GTY(()) rtx tls_get_addr_libfunc;
 437
 438 rtx
 439 aarch64_tls_get_addr (void)
 440 {
 441   if (!tls_get_addr_libfunc)
 442     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 443   return tls_get_addr_libfunc;
 444 }
 445
 446 /* Return the TLS model to use for ADDR.  */
 447
 448 static enum tls_model
 449 tls_symbolic_operand_type (rtx addr)
 450 {
 451   enum tls_model tls_kind = TLS_MODEL_NONE;
 452   rtx sym, addend;
 453
 454   if (GET_CODE (addr) == CONST)
 455     {
 456       split_const (addr, &sym, &addend);
 457       if (GET_CODE (sym) == SYMBOL_REF)
 458         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 459     }
 460   else if (GET_CODE (addr) == SYMBOL_REF)
 461     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 462
 463   return tls_kind;
 464 }
 465
 466 /* We'll allow lo_sum's in addresses in our legitimate addresses
 467    so that combine would take care of combining addresses where
 468    necessary, but for generation purposes, we'll generate the address
 469    as :
 470    RTL                               Absolute
 471    tmp = hi (symbol_ref);            adrp  x1, foo
 472    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 473                                      nop
 474
 475    PIC                               TLS
 476    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 477    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 478                                      bl   __tls_get_addr
 479                                      nop
 480
 481    Load TLS symbol, depending on TLS mechanism and TLS access model.
 482
 483    Global Dynamic - Traditional TLS:
 484    adrp tmp, :tlsgd:imm
 485    add  dest, tmp, #:tlsgd_lo12:imm
 486    bl   __tls_get_addr
 487
 488    Global Dynamic - TLS Descriptors:
 489    adrp dest, :tlsdesc:imm
 490    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 491    add  dest, dest, #:tlsdesc_lo12:imm
 492    blr  tmp
 493    mrs  tp, tpidr_el0
 494    add  dest, dest, tp
 495
 496    Initial Exec:
 497    mrs  tp, tpidr_el0
 498    adrp tmp, :gottprel:imm
 499    ldr  dest, [tmp, #:gottprel_lo12:imm]
 500    add  dest, dest, tp
 501
 502    Local Exec:
 503    mrs  tp, tpidr_el0
 504    add  t0, tp, #:tprel_hi12:imm
 505    add  t0, #:tprel_lo12_nc:imm
 506 */
 507
 508 static void
 509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 510                                    enum aarch64_symbol_type type)
 511 {
 512   switch (type)
 513     {
 514     case SYMBOL_SMALL_ABSOLUTE:
 515       {
 516         rtx tmp_reg = dest;
 517         if (can_create_pseudo_p ())
 518           {
 519             tmp_reg =  gen_reg_rtx (Pmode);
 520           }
 521
 522         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 523         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 524         return;
 525       }
 526
 527     case SYMBOL_SMALL_GOT:
 528       {
 529         rtx tmp_reg = dest;
 530         if (can_create_pseudo_p ())
 531           {
 532             tmp_reg =  gen_reg_rtx (Pmode);
 533           }
 534         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 535         emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 536         return;
 537       }
 538
 539     case SYMBOL_SMALL_TLSGD:
 540       {
 541         rtx insns;
 542         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 543
 544         start_sequence ();
 545         emit_call_insn (gen_tlsgd_small (result, imm));
 546         insns = get_insns ();
 547         end_sequence ();
 548
 549         RTL_CONST_CALL_P (insns) = 1;
 550         emit_libcall_block (insns, dest, result, imm);
 551         return;
 552       }
 553
 554     case SYMBOL_SMALL_TLSDESC:
 555       {
 556         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 557         rtx tp;
 558
 559         emit_insn (gen_tlsdesc_small (imm));
 560         tp = aarch64_load_tp (NULL);
 561         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 562         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 563         return;
 564       }
 565
 566     case SYMBOL_SMALL_GOTTPREL:
 567       {
 568         rtx tmp_reg = gen_reg_rtx (Pmode);
 569         rtx tp = aarch64_load_tp (NULL);
 570         emit_insn (gen_tlsie_small (tmp_reg, imm));
 571         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 572         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 573         return;
 574       }
 575
 576     case SYMBOL_SMALL_TPREL:
 577       {
 578         rtx tp = aarch64_load_tp (NULL);
 579         emit_insn (gen_tlsle_small (dest, tp, imm));
 580         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 581         return;
 582       }
 583
 584     default:
 585       gcc_unreachable ();
 586     }
 587 }
 588
 589 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 590    handle all moves if !can_create_pseudo_p ().  The distinction is
 591    important because, unlike emit_move_insn, the move expanders know
 592    how to force Pmode objects into the constant pool even when the
 593    constant pool address is not itself legitimate.  */
 594 static rtx
 595 aarch64_emit_move (rtx dest, rtx src)
 596 {
 597   return (can_create_pseudo_p ()
 598           ? emit_move_insn (dest, src)
 599           : emit_move_insn_1 (dest, src));
 600 }
 601
 602 void
 603 aarch64_split_128bit_move (rtx dst, rtx src)
 604 {
 605   rtx low_dst;
 606
 607   gcc_assert (GET_MODE (dst) == TImode);
 608
 609   if (REG_P (dst) && REG_P (src))
 610     {
 611       int src_regno = REGNO (src);
 612       int dst_regno = REGNO (dst);
 613
 614       gcc_assert (GET_MODE (src) == TImode);
 615
 616       /* Handle r -> w, w -> r.  */
 617       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 618         {
 619           emit_insn (gen_aarch64_movtilow_di (dst,
 620                                               gen_lowpart (word_mode, src)));
 621           emit_insn (gen_aarch64_movtihigh_di (dst,
 622                                                gen_highpart (word_mode, src)));
 623           return;
 624         }
 625       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 626         {
 627           emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
 628                                               src));
 629           emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
 630                                                src));
 631           return;
 632         }
 633       /* Fall through to r -> r cases.  */
 634     }
 635
 636   low_dst = gen_lowpart (word_mode, dst);
 637   if (REG_P (low_dst)
 638       && reg_overlap_mentioned_p (low_dst, src))
 639     {
 640       aarch64_emit_move (gen_highpart (word_mode, dst),
 641                          gen_highpart_mode (word_mode, TImode, src));
 642       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 643     }
 644   else
 645     {
 646       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 647       aarch64_emit_move (gen_highpart (word_mode, dst),
 648                          gen_highpart_mode (word_mode, TImode, src));
 649     }
 650 }
 651
 652 bool
 653 aarch64_split_128bit_move_p (rtx dst, rtx src)
 654 {
 655   return (! REG_P (src)
 656           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 657 }
 658
 659 static rtx
 660 aarch64_force_temporary (rtx x, rtx value)
 661 {
 662   if (can_create_pseudo_p ())
 663     return force_reg (Pmode, value);
 664   else
 665     {
 666       x = aarch64_emit_move (x, value);
 667       return x;
 668     }
 669 }
 670
 671
 672 static rtx
 673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 674 {
 675   if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
 676     {
 677       rtx high;
 678       /* Load the full offset into a register.  This
 679          might be improvable in the future.  */
 680       high = GEN_INT (offset);
 681       offset = 0;
 682       high = aarch64_force_temporary (temp, high);
 683       reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
 684     }
 685   return plus_constant (mode, reg, offset);
 686 }
 687
 688 void
 689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 690 {
 691   enum machine_mode mode = GET_MODE (dest);
 692   unsigned HOST_WIDE_INT mask;
 693   int i;
 694   bool first;
 695   unsigned HOST_WIDE_INT val;
 696   bool subtargets;
 697   rtx subtarget;
 698   int one_match, zero_match;
 699
 700   gcc_assert (mode == SImode || mode == DImode);
 701
 702   /* Check on what type of symbol it is.  */
 703   if (GET_CODE (imm) == SYMBOL_REF
 704       || GET_CODE (imm) == LABEL_REF
 705       || GET_CODE (imm) == CONST)
 706     {
 707       rtx mem, base, offset;
 708       enum aarch64_symbol_type sty;
 709
 710       /* If we have (const (plus symbol offset)), separate out the offset
 711          before we start classifying the symbol.  */
 712       split_const (imm, &base, &offset);
 713
 714       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 715       switch (sty)
 716         {
 717         case SYMBOL_FORCE_TO_MEM:
 718           if (offset != const0_rtx
 719               && targetm.cannot_force_const_mem (mode, imm))
 720             {
 721               gcc_assert(can_create_pseudo_p ());
 722               base = aarch64_force_temporary (dest, base);
 723               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 724               aarch64_emit_move (dest, base);
 725               return;
 726             }
 727           mem = force_const_mem (mode, imm);
 728           gcc_assert (mem);
 729           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 730           return;
 731
 732         case SYMBOL_SMALL_TLSGD:
 733         case SYMBOL_SMALL_TLSDESC:
 734         case SYMBOL_SMALL_GOTTPREL:
 735         case SYMBOL_SMALL_GOT:
 736           if (offset != const0_rtx)
 737             {
 738               gcc_assert(can_create_pseudo_p ());
 739               base = aarch64_force_temporary (dest, base);
 740               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 741               aarch64_emit_move (dest, base);
 742               return;
 743             }
 744           /* FALLTHRU */
 745
 746         case SYMBOL_SMALL_TPREL:
 747         case SYMBOL_SMALL_ABSOLUTE:
 748           aarch64_load_symref_appropriately (dest, imm, sty);
 749           return;
 750
 751         default:
 752           gcc_unreachable ();
 753         }
 754     }
 755
 756   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 757     {
 758       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 759       return;
 760     }
 761
 762   if (!CONST_INT_P (imm))
 763     {
 764       if (GET_CODE (imm) == HIGH)
 765         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 766       else
 767         {
 768           rtx mem = force_const_mem (mode, imm);
 769           gcc_assert (mem);
 770           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 771         }
 772
 773       return;
 774     }
 775
 776   if (mode == SImode)
 777     {
 778       /* We know we can't do this in 1 insn, and we must be able to do it
 779          in two; so don't mess around looking for sequences that don't buy
 780          us anything.  */
 781       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 782       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 783                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 784       return;
 785     }
 786
 787   /* Remaining cases are all for DImode.  */
 788
 789   val = INTVAL (imm);
 790   subtargets = optimize && can_create_pseudo_p ();
 791
 792   one_match = 0;
 793   zero_match = 0;
 794   mask = 0xffff;
 795
 796   for (i = 0; i < 64; i += 16, mask <<= 16)
 797     {
 798       if ((val & mask) == 0)
 799         zero_match++;
 800       else if ((val & mask) == mask)
 801         one_match++;
 802     }
 803
 804   if (one_match == 2)
 805     {
 806       mask = 0xffff;
 807       for (i = 0; i < 64; i += 16, mask <<= 16)
 808         {
 809           if ((val & mask) != mask)
 810             {
 811               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
 812               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 813                                          GEN_INT ((val >> i) & 0xffff)));
 814               return;
 815             }
 816         }
 817       gcc_unreachable ();
 818     }
 819
 820   if (zero_match == 2)
 821     goto simple_sequence;
 822
 823   mask = 0x0ffff0000UL;
 824   for (i = 16; i < 64; i += 16, mask <<= 16)
 825     {
 826       HOST_WIDE_INT comp = mask & ~(mask - 1);
 827
 828       if (aarch64_uimm12_shift (val - (val & mask)))
 829         {
 830           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 831
 832           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
 833           emit_insn (gen_adddi3 (dest, subtarget,
 834                                  GEN_INT (val - (val & mask))));
 835           return;
 836         }
 837       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 838         {
 839           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 840
 841           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 842                                   GEN_INT ((val + comp) & mask)));
 843           emit_insn (gen_adddi3 (dest, subtarget,
 844                                  GEN_INT (val - ((val + comp) & mask))));
 845           return;
 846         }
 847       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 848         {
 849           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 850
 851           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 852                                   GEN_INT ((val - comp) | ~mask)));
 853           emit_insn (gen_adddi3 (dest, subtarget,
 854                                  GEN_INT (val - ((val - comp) | ~mask))));
 855           return;
 856         }
 857       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 858         {
 859           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 860
 861           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 862                                   GEN_INT (val | ~mask)));
 863           emit_insn (gen_adddi3 (dest, subtarget,
 864                                  GEN_INT (val - (val | ~mask))));
 865           return;
 866         }
 867     }
 868
 869   /* See if we can do it by arithmetically combining two
 870      immediates.  */
 871   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 872     {
 873       int j;
 874       mask = 0xffff;
 875
 876       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
 877           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
 878         {
 879           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 880           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 881                                   GEN_INT (aarch64_bitmasks[i])));
 882           emit_insn (gen_adddi3 (dest, subtarget,
 883                                  GEN_INT (val - aarch64_bitmasks[i])));
 884           return;
 885         }
 886
 887       for (j = 0; j < 64; j += 16, mask <<= 16)
 888         {
 889           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
 890             {
 891               emit_insn (gen_rtx_SET (VOIDmode, dest,
 892                                       GEN_INT (aarch64_bitmasks[i])));
 893               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
 894                                          GEN_INT ((val >> j) & 0xffff)));
 895               return;
 896             }
 897         }
 898     }
 899
 900   /* See if we can do it by logically combining two immediates.  */
 901   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 902     {
 903       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
 904         {
 905           int j;
 906
 907           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 908             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
 909               {
 910                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 911                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 912                                         GEN_INT (aarch64_bitmasks[i])));
 913                 emit_insn (gen_iordi3 (dest, subtarget,
 914                                        GEN_INT (aarch64_bitmasks[j])));
 915                 return;
 916               }
 917         }
 918       else if ((val & aarch64_bitmasks[i]) == val)
 919         {
 920           int j;
 921
 922           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 923             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
 924               {
 925
 926                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 927                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 928                                         GEN_INT (aarch64_bitmasks[j])));
 929                 emit_insn (gen_anddi3 (dest, subtarget,
 930                                        GEN_INT (aarch64_bitmasks[i])));
 931                 return;
 932               }
 933         }
 934     }
 935
 936  simple_sequence:
 937   first = true;
 938   mask = 0xffff;
 939   for (i = 0; i < 64; i += 16, mask <<= 16)
 940     {
 941       if ((val & mask) != 0)
 942         {
 943           if (first)
 944             {
 945               emit_insn (gen_rtx_SET (VOIDmode, dest,
 946                                       GEN_INT (val & mask)));
 947               first = false;
 948             }
 949           else
 950             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 951                                        GEN_INT ((val >> i) & 0xffff)));
 952         }
 953     }
 954 }
 955
 956 static bool
 957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 958 {
 959   /* Indirect calls are not currently supported.  */
 960   if (decl == NULL)
 961     return false;
 962
 963   /* Cannot tail-call to long-calls, since these are outside of the
 964      range of a branch instruction (we could handle this if we added
 965      support for indirect tail-calls.  */
 966   if (aarch64_decl_is_long_call_p (decl))
 967     return false;
 968
 969   return true;
 970 }
 971
 972 /* Implement TARGET_PASS_BY_REFERENCE.  */
 973
 974 static bool
 975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
 976                            enum machine_mode mode,
 977                            const_tree type,
 978                            bool named ATTRIBUTE_UNUSED)
 979 {
 980   HOST_WIDE_INT size;
 981   enum machine_mode dummymode;
 982   int nregs;
 983
 984   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
 985   size = (mode == BLKmode && type)
 986     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
 987
 988   if (type)
 989     {
 990       /* Arrays always passed by reference.  */
 991       if (TREE_CODE (type) == ARRAY_TYPE)
 992         return true;
 993       /* Other aggregates based on their size.  */
 994       if (AGGREGATE_TYPE_P (type))
 995         size = int_size_in_bytes (type);
 996     }
 997
 998   /* Variable sized arguments are always returned by reference.  */
 999   if (size < 0)
1000     return true;
1001
1002   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1003   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004                                                &dummymode, &nregs,
1005                                                NULL))
1006     return false;
1007
1008   /* Arguments which are variable sized or larger than 2 registers are
1009      passed by reference unless they are a homogenous floating point
1010      aggregate.  */
1011   return size > 2 * UNITS_PER_WORD;
1012 }
1013
1014 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1015 static bool
1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018   enum machine_mode dummy_mode;
1019   int dummy_int;
1020
1021   /* Never happens in little-endian mode.  */
1022   if (!BYTES_BIG_ENDIAN)
1023     return false;
1024
1025   /* Only composite types smaller than or equal to 16 bytes can
1026      be potentially returned in registers.  */
1027   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028       || int_size_in_bytes (valtype) <= 0
1029       || int_size_in_bytes (valtype) > 16)
1030     return false;
1031
1032   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034      is always passed/returned in the least significant bits of fp/simd
1035      register(s).  */
1036   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037                                                &dummy_mode, &dummy_int, NULL))
1038     return false;
1039
1040   return true;
1041 }
1042
1043 /* Implement TARGET_FUNCTION_VALUE.
1044    Define how to find the value returned by a function.  */
1045
1046 static rtx
1047 aarch64_function_value (const_tree type, const_tree func,
1048                         bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050   enum machine_mode mode;
1051   int unsignedp;
1052   int count;
1053   enum machine_mode ag_mode;
1054
1055   mode = TYPE_MODE (type);
1056   if (INTEGRAL_TYPE_P (type))
1057     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059   if (aarch64_return_in_msb (type))
1060     {
1061       HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063       if (size % UNITS_PER_WORD != 0)
1064         {
1065           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067         }
1068     }
1069
1070   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071                                                &ag_mode, &count, NULL))
1072     {
1073       if (!aarch64_composite_type_p (type, mode))
1074         {
1075           gcc_assert (count == 1 && mode == ag_mode);
1076           return gen_rtx_REG (mode, V0_REGNUM);
1077         }
1078       else
1079         {
1080           int i;
1081           rtx par;
1082
1083           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084           for (i = 0; i < count; i++)
1085             {
1086               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089               XVECEXP (par, 0, i) = tmp;
1090             }
1091           return par;
1092         }
1093     }
1094   else
1095     return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099    Return true if REGNO is the number of a hard register in which the values
1100    of called function may come back.  */
1101
1102 static bool
1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1106      of 16-byte return values are: 128-bit integers and 16-byte small
1107      structures (excluding homogeneous floating-point aggregates).  */
1108   if (regno == R0_REGNUM || regno == R1_REGNUM)
1109     return true;
1110
1111   /* Up to four fp/simd registers can return a function value, e.g. a
1112      homogeneous floating-point aggregate having four members.  */
1113   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114     return !TARGET_GENERAL_REGS_ONLY;
1115
1116   return false;
1117 }
1118
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120
1121    If the type T of the result of a function is such that
1122      void func (T arg)
1123    would require that arg be passed as a value in a register (or set of
1124    registers) according to the parameter passing rules, then the result
1125    is returned in the same registers as would be used for such an
1126    argument.  */
1127
1128 static bool
1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131   HOST_WIDE_INT size;
1132   enum machine_mode ag_mode;
1133   int count;
1134
1135   if (!AGGREGATE_TYPE_P (type)
1136       && TREE_CODE (type) != COMPLEX_TYPE
1137       && TREE_CODE (type) != VECTOR_TYPE)
1138     /* Simple scalar types always returned in registers.  */
1139     return false;
1140
1141   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142                                                type,
1143                                                &ag_mode,
1144                                                &count,
1145                                                NULL))
1146     return false;
1147
1148   /* Types larger than 2 registers returned in memory.  */
1149   size = int_size_in_bytes (type);
1150   return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152
1153 static bool
1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155                                const_tree type, int *nregs)
1156 {
1157   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158   return aarch64_vfp_is_call_or_return_candidate (mode,
1159                                                   type,
1160                                                   &pcum->aapcs_vfp_rmode,
1161                                                   nregs,
1162                                                   NULL);
1163 }
1164
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166    bits.  The idea is to suppress any stronger alignment requested by
1167    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168    This is a helper function for local use only.  */
1169
1170 static unsigned int
1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173   unsigned int alignment;
1174
1175   if (type)
1176     {
1177       if (!integer_zerop (TYPE_SIZE (type)))
1178         {
1179           if (TYPE_MODE (type) == mode)
1180             alignment = TYPE_ALIGN (type);
1181           else
1182             alignment = GET_MODE_ALIGNMENT (mode);
1183         }
1184       else
1185         alignment = 0;
1186     }
1187   else
1188     alignment = GET_MODE_ALIGNMENT (mode);
1189
1190   return alignment;
1191 }
1192
1193 /* Layout a function argument according to the AAPCS64 rules.  The rule
1194    numbers refer to the rule numbers in the AAPCS64.  */
1195
1196 static void
1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198                     const_tree type,
1199                     bool named ATTRIBUTE_UNUSED)
1200 {
1201   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202   int ncrn, nvrn, nregs;
1203   bool allocate_ncrn, allocate_nvrn;
1204
1205   /* We need to do this once per argument.  */
1206   if (pcum->aapcs_arg_processed)
1207     return;
1208
1209   pcum->aapcs_arg_processed = true;
1210
1211   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213                                                  mode,
1214                                                  type,
1215                                                  &nregs);
1216
1217   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218      The following code thus handles passing by SIMD/FP registers first.  */
1219
1220   nvrn = pcum->aapcs_nvrn;
1221
1222   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223      and homogenous short-vector aggregates (HVA).  */
1224   if (allocate_nvrn)
1225     {
1226       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227         {
1228           pcum->aapcs_nextnvrn = nvrn + nregs;
1229           if (!aarch64_composite_type_p (type, mode))
1230             {
1231               gcc_assert (nregs == 1);
1232               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233             }
1234           else
1235             {
1236               rtx par;
1237               int i;
1238               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239               for (i = 0; i < nregs; i++)
1240                 {
1241                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242                                          V0_REGNUM + nvrn + i);
1243                   tmp = gen_rtx_EXPR_LIST
1244                     (VOIDmode, tmp,
1245                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246                   XVECEXP (par, 0, i) = tmp;
1247                 }
1248               pcum->aapcs_reg = par;
1249             }
1250           return;
1251         }
1252       else
1253         {
1254           /* C.3 NSRN is set to 8.  */
1255           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256           goto on_stack;
1257         }
1258     }
1259
1260   ncrn = pcum->aapcs_ncrn;
1261   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265   /* C6 - C9.  though the sign and zero extension semantics are
1266      handled elsewhere.  This is the case where the argument fits
1267      entirely general registers.  */
1268   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269     {
1270       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274       /* C.8 if the argument has an alignment of 16 then the NGRN is
1275          rounded up to the next even number.  */
1276       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277         {
1278           ++ncrn;
1279           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280         }
1281       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282          A reg is still generated for it, but the caller should be smart
1283          enough not to use it.  */
1284       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285         {
1286           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287         }
1288       else
1289         {
1290           rtx par;
1291           int i;
1292
1293           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294           for (i = 0; i < nregs; i++)
1295             {
1296               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298                                        GEN_INT (i * UNITS_PER_WORD));
1299               XVECEXP (par, 0, i) = tmp;
1300             }
1301           pcum->aapcs_reg = par;
1302         }
1303
1304       pcum->aapcs_nextncrn = ncrn + nregs;
1305       return;
1306     }
1307
1308   /* C.11  */
1309   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311   /* The argument is passed on stack; record the needed number of words for
1312      this argument (we can re-use NREGS) and align the total size if
1313      necessary.  */
1314 on_stack:
1315   pcum->aapcs_stack_words = nregs;
1316   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318                                                16 / UNITS_PER_WORD) + 1;
1319   return;
1320 }
1321
1322 /* Implement TARGET_FUNCTION_ARG.  */
1323
1324 static rtx
1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326                       const_tree type, bool named)
1327 {
1328   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331   if (mode == VOIDmode)
1332     return NULL_RTX;
1333
1334   aarch64_layout_arg (pcum_v, mode, type, named);
1335   return pcum->aapcs_reg;
1336 }
1337
1338 void
1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340                            const_tree fntype ATTRIBUTE_UNUSED,
1341                            rtx libname ATTRIBUTE_UNUSED,
1342                            const_tree fndecl ATTRIBUTE_UNUSED,
1343                            unsigned n_named ATTRIBUTE_UNUSED)
1344 {
1345   pcum->aapcs_ncrn = 0;
1346   pcum->aapcs_nvrn = 0;
1347   pcum->aapcs_nextncrn = 0;
1348   pcum->aapcs_nextnvrn = 0;
1349   pcum->pcs_variant = ARM_PCS_AAPCS64;
1350   pcum->aapcs_reg = NULL_RTX;
1351   pcum->aapcs_arg_processed = false;
1352   pcum->aapcs_stack_words = 0;
1353   pcum->aapcs_stack_size = 0;
1354
1355   return;
1356 }
1357
1358 static void
1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360                               enum machine_mode mode,
1361                               const_tree type,
1362                               bool named)
1363 {
1364   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366     {
1367       aarch64_layout_arg (pcum_v, mode, type, named);
1368       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369                   != (pcum->aapcs_stack_words != 0));
1370       pcum->aapcs_arg_processed = false;
1371       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374       pcum->aapcs_stack_words = 0;
1375       pcum->aapcs_reg = NULL_RTX;
1376     }
1377 }
1378
1379 bool
1380 aarch64_function_arg_regno_p (unsigned regno)
1381 {
1382   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384 }
1385
1386 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1387    PARM_BOUNDARY bits of alignment, but will be given anything up
1388    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1389    that both before and after the layout of each argument, the Next
1390    Stacked Argument Address (NSAA) will have a minimum alignment of
1391    8 bytes.  */
1392
1393 static unsigned int
1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395 {
1396   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398   if (alignment < PARM_BOUNDARY)
1399     alignment = PARM_BOUNDARY;
1400   if (alignment > STACK_BOUNDARY)
1401     alignment = STACK_BOUNDARY;
1402   return alignment;
1403 }
1404
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407    Return true if an argument passed on the stack should be padded upwards,
1408    i.e. if the least-significant byte of the stack slot has useful data.
1409
1410    Small aggregate types are placed in the lowest memory address.
1411
1412    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1413
1414 bool
1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416 {
1417   /* On little-endian targets, the least significant byte of every stack
1418      argument is passed at the lowest byte address of the stack slot.  */
1419   if (!BYTES_BIG_ENDIAN)
1420     return true;
1421
1422   /* Otherwise, integral types and floating point types are padded downward:
1423      the least significant byte of a stack argument is passed at the highest
1424      byte address of the stack slot.  */
1425   if (type
1426       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428     return false;
1429
1430   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1431   return true;
1432 }
1433
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436    It specifies padding for the last (may also be the only)
1437    element of a block move between registers and memory.  If
1438    assuming the block is in the memory, padding upward means that
1439    the last element is padded after its highest significant byte,
1440    while in downward padding, the last element is padded at the
1441    its least significant byte side.
1442
1443    Small aggregates and small complex types are always padded
1444    upwards.
1445
1446    We don't need to worry about homogeneous floating-point or
1447    short-vector aggregates; their move is not affected by the
1448    padding direction determined here.  Regardless of endianness,
1449    each element of such an aggregate is put in the least
1450    significant bits of a fp/simd register.
1451
1452    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453    register has useful data, and return the opposite if the most
1454    significant byte does.  */
1455
1456 bool
1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458                      bool first ATTRIBUTE_UNUSED)
1459 {
1460
1461   /* Small composite types are always padded upward.  */
1462   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463     {
1464       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465                             : GET_MODE_SIZE (mode));
1466       if (size < 2 * UNITS_PER_WORD)
1467         return true;
1468     }
1469
1470   /* Otherwise, use the default padding.  */
1471   return !BYTES_BIG_ENDIAN;
1472 }
1473
1474 static enum machine_mode
1475 aarch64_libgcc_cmp_return_mode (void)
1476 {
1477   return SImode;
1478 }
1479
1480 static bool
1481 aarch64_frame_pointer_required (void)
1482 {
1483   /* If the function contains dynamic stack allocations, we need to
1484      use the frame pointer to access the static parts of the frame.  */
1485   if (cfun->calls_alloca)
1486     return true;
1487
1488   /* We may have turned flag_omit_frame_pointer on in order to have this
1489      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490      and we'll check it here.
1491      If we really did set flag_omit_frame_pointer normally, then we return false
1492      (no frame pointer required) in all cases.  */
1493
1494   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495     return false;
1496   else if (flag_omit_leaf_frame_pointer)
1497     return !crtl->is_leaf;
1498   return true;
1499 }
1500
1501 /* Mark the registers that need to be saved by the callee and calculate
1502    the size of the callee-saved registers area and frame record (both FP
1503    and LR may be omitted).  */
1504 static void
1505 aarch64_layout_frame (void)
1506 {
1507   HOST_WIDE_INT offset = 0;
1508   int regno;
1509
1510   if (reload_completed && cfun->machine->frame.laid_out)
1511     return;
1512
1513   cfun->machine->frame.fp_lr_offset = 0;
1514
1515   /* First mark all the registers that really need to be saved...  */
1516   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517     cfun->machine->frame.reg_offset[regno] = -1;
1518
1519   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520     cfun->machine->frame.reg_offset[regno] = -1;
1521
1522   /* ... that includes the eh data registers (if needed)...  */
1523   if (crtl->calls_eh_return)
1524     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527   /* ... and any callee saved register that dataflow says is live.  */
1528   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529     if (df_regs_ever_live_p (regno)
1530         && !call_used_regs[regno])
1531       cfun->machine->frame.reg_offset[regno] = 0;
1532
1533   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534     if (df_regs_ever_live_p (regno)
1535         && !call_used_regs[regno])
1536       cfun->machine->frame.reg_offset[regno] = 0;
1537
1538   if (frame_pointer_needed)
1539     {
1540       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543     }
1544
1545   /* Now assign stack slots for them.  */
1546   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547     if (cfun->machine->frame.reg_offset[regno] != -1)
1548       {
1549         cfun->machine->frame.reg_offset[regno] = offset;
1550         offset += UNITS_PER_WORD;
1551       }
1552
1553   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554     if (cfun->machine->frame.reg_offset[regno] != -1)
1555       {
1556         cfun->machine->frame.reg_offset[regno] = offset;
1557         offset += UNITS_PER_WORD;
1558       }
1559
1560   if (frame_pointer_needed)
1561     {
1562       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563       offset += UNITS_PER_WORD;
1564       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565     }
1566
1567   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568     {
1569       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570       offset += UNITS_PER_WORD;
1571       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572     }
1573
1574   cfun->machine->frame.padding0 =
1575     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578   cfun->machine->frame.saved_regs_size = offset;
1579   cfun->machine->frame.laid_out = true;
1580 }
1581
1582 /* Make the last instruction frame-related and note that it performs
1583    the operation described by FRAME_PATTERN.  */
1584
1585 static void
1586 aarch64_set_frame_expr (rtx frame_pattern)
1587 {
1588   rtx insn;
1589
1590   insn = get_last_insn ();
1591   RTX_FRAME_RELATED_P (insn) = 1;
1592   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594                                       frame_pattern,
1595                                       REG_NOTES (insn));
1596 }
1597
1598 static bool
1599 aarch64_register_saved_on_entry (int regno)
1600 {
1601   return cfun->machine->frame.reg_offset[regno] != -1;
1602 }
1603
1604
1605 static void
1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607                               bool restore, rtx base_rtx)
1608
1609 {
1610   unsigned regno;
1611   unsigned regno2;
1612   rtx insn;
1613   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617     {
1618       if (aarch64_register_saved_on_entry (regno))
1619         {
1620           rtx mem;
1621           mem = gen_mem_ref (DFmode,
1622                              plus_constant (Pmode,
1623                                             base_rtx,
1624                                             start_offset));
1625
1626           for (regno2 = regno + 1;
1627                regno2 <= V31_REGNUM
1628                  && !aarch64_register_saved_on_entry (regno2);
1629                regno2++)
1630             {
1631               /* Empty loop.  */
1632             }
1633           if (regno2 <= V31_REGNUM &&
1634               aarch64_register_saved_on_entry (regno2))
1635             {
1636               rtx mem2;
1637               /* Next highest register to be saved.  */
1638               mem2 = gen_mem_ref (DFmode,
1639                                   plus_constant
1640                                   (Pmode,
1641                                    base_rtx,
1642                                    start_offset + increment));
1643               if (restore == false)
1644                 {
1645                   insn = emit_insn
1646                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647                                         mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649                 }
1650               else
1651                 {
1652                   insn = emit_insn
1653                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654                                        gen_rtx_REG (DFmode, regno2), mem2));
1655
1656                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658                 }
1659
1660                   /* The first part of a frame-related parallel insn
1661                      is always assumed to be relevant to the frame
1662                      calculations; subsequent parts, are only
1663                      frame-related if explicitly marked.  */
1664               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665                                             1)) = 1;
1666               regno = regno2;
1667               start_offset += increment * 2;
1668             }
1669           else
1670             {
1671               if (restore == false)
1672                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673               else
1674                 {
1675                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677                 }
1678               start_offset += increment;
1679             }
1680           RTX_FRAME_RELATED_P (insn) = 1;
1681         }
1682     }
1683
1684 }
1685
1686
1687 /* offset from the stack pointer of where the saves and
1688    restore's have to happen.  */
1689 static void
1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691                                             bool restore)
1692 {
1693   rtx insn;
1694   rtx base_rtx = stack_pointer_rtx;
1695   HOST_WIDE_INT start_offset = offset;
1696   HOST_WIDE_INT increment = UNITS_PER_WORD;
1697   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699   unsigned regno;
1700   unsigned regno2;
1701
1702   for (regno = R0_REGNUM; regno <= limit; regno++)
1703     {
1704       if (aarch64_register_saved_on_entry (regno))
1705         {
1706           rtx mem;
1707           mem = gen_mem_ref (Pmode,
1708                              plus_constant (Pmode,
1709                                             base_rtx,
1710                                             start_offset));
1711
1712           for (regno2 = regno + 1;
1713                regno2 <= limit
1714                  && !aarch64_register_saved_on_entry (regno2);
1715                regno2++)
1716             {
1717               /* Empty loop.  */
1718             }
1719           if (regno2 <= limit &&
1720               aarch64_register_saved_on_entry (regno2))
1721             {
1722               rtx mem2;
1723               /* Next highest register to be saved.  */
1724               mem2 = gen_mem_ref (Pmode,
1725                                   plus_constant
1726                                   (Pmode,
1727                                    base_rtx,
1728                                    start_offset + increment));
1729               if (restore == false)
1730                 {
1731                   insn = emit_insn
1732                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733                                         mem2, gen_rtx_REG (DImode, regno2)));
1734
1735                 }
1736               else
1737                 {
1738                   insn = emit_insn
1739                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740                                      gen_rtx_REG (DImode, regno2), mem2));
1741
1742                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744                 }
1745
1746                   /* The first part of a frame-related parallel insn
1747                      is always assumed to be relevant to the frame
1748                      calculations; subsequent parts, are only
1749                      frame-related if explicitly marked.  */
1750               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751                                             1)) = 1;
1752               regno = regno2;
1753               start_offset += increment * 2;
1754             }
1755           else
1756             {
1757               if (restore == false)
1758                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759               else
1760                 {
1761                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763                 }
1764               start_offset += increment;
1765             }
1766           RTX_FRAME_RELATED_P (insn) = 1;
1767         }
1768     }
1769
1770   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772 }
1773
1774 /* AArch64 stack frames generated by this compiler look like:
1775
1776         +-------------------------------+
1777         |                               |
1778         |  incoming stack arguments     |
1779         |                               |
1780         +-------------------------------+ <-- arg_pointer_rtx
1781         |                               |
1782         |  callee-allocated save area   |
1783         |  for register varargs         |
1784         |                               |
1785         +-------------------------------+
1786         |                               |
1787         |  local variables              |
1788         |                               |
1789         +-------------------------------+ <-- frame_pointer_rtx
1790         |                               |
1791         |  callee-saved registers       |
1792         |                               |
1793         +-------------------------------+
1794         |  LR'                          |
1795         +-------------------------------+
1796         |  FP'                          |
1797       P +-------------------------------+ <-- hard_frame_pointer_rtx
1798         |  dynamic allocation           |
1799         +-------------------------------+
1800         |                               |
1801         |  outgoing stack arguments     |
1802         |                               |
1803         +-------------------------------+ <-- stack_pointer_rtx
1804
1805    Dynamic stack allocations such as alloca insert data at point P.
1806    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807    hard_frame_pointer_rtx unchanged.  */
1808
1809 /* Generate the prologue instructions for entry into a function.
1810    Establish the stack frame by decreasing the stack pointer with a
1811    properly calculated size and, if necessary, create a frame record
1812    filled with the values of LR and previous frame pointer.  The
1813    current FP is also set up is it is in use.  */
1814
1815 void
1816 aarch64_expand_prologue (void)
1817 {
1818   /* sub sp, sp, #<frame_size>
1819      stp {fp, lr}, [sp, #<frame_size> - 16]
1820      add fp, sp, #<frame_size> - hardfp_offset
1821      stp {cs_reg}, [fp, #-16] etc.
1822
1823      sub sp, sp, <final_adjustment_if_any>
1824   */
1825   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
1826   HOST_WIDE_INT frame_size, offset;
1827   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
1828   rtx insn;
1829
1830   aarch64_layout_frame ();
1831   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835                 + crtl->outgoing_args_size);
1836   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837                                           STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839   if (flag_stack_usage_info)
1840     current_function_static_stack_size = frame_size;
1841
1842   fp_offset = (offset
1843                - original_frame_size
1844                - cfun->machine->frame.saved_regs_size);
1845
1846   /* Store pairs and load pairs have a range only -512 to 504.  */
1847   if (offset >= 512)
1848     {
1849       /* When the frame has a large size, an initial decrease is done on
1850          the stack pointer to jump over the callee-allocated save area for
1851          register varargs, the local variable area and/or the callee-saved
1852          register area.  This will allow the pre-index write-back
1853          store pair instructions to be used for setting up the stack frame
1854          efficiently.  */
1855       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856       if (offset >= 512)
1857         offset = cfun->machine->frame.saved_regs_size;
1858
1859       frame_size -= (offset + crtl->outgoing_args_size);
1860       fp_offset = 0;
1861
1862       if (frame_size >= 0x1000000)
1863         {
1864           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865           emit_move_insn (op0, GEN_INT (-frame_size));
1866           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867           aarch64_set_frame_expr (gen_rtx_SET
1868                                   (Pmode, stack_pointer_rtx,
1869                                    gen_rtx_PLUS (Pmode,
1870                                                  stack_pointer_rtx,
1871                                                  GEN_INT (-frame_size))));
1872         }
1873       else if (frame_size > 0)
1874         {
1875           if ((frame_size & 0xfff) != frame_size)
1876             {
1877               insn = emit_insn (gen_add2_insn
1878                                 (stack_pointer_rtx,
1879                                  GEN_INT (-(frame_size
1880                                             & ~(HOST_WIDE_INT)0xfff))));
1881               RTX_FRAME_RELATED_P (insn) = 1;
1882             }
1883           if ((frame_size & 0xfff) != 0)
1884             {
1885               insn = emit_insn (gen_add2_insn
1886                                 (stack_pointer_rtx,
1887                                  GEN_INT (-(frame_size
1888                                             & (HOST_WIDE_INT)0xfff))));
1889               RTX_FRAME_RELATED_P (insn) = 1;
1890             }
1891         }
1892     }
1893   else
1894     frame_size = -1;
1895
1896   if (offset > 0)
1897     {
1898       /* Save the frame pointer and lr if the frame pointer is needed
1899          first.  Make the frame pointer point to the location of the
1900          old frame pointer on the stack.  */
1901       if (frame_pointer_needed)
1902         {
1903           rtx mem_fp, mem_lr;
1904
1905           if (fp_offset)
1906             {
1907               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908                                                GEN_INT (-offset)));
1909               RTX_FRAME_RELATED_P (insn) = 1;
1910               aarch64_set_frame_expr (gen_rtx_SET
1911                                       (Pmode, stack_pointer_rtx,
1912                                        gen_rtx_MINUS (Pmode,
1913                                                       stack_pointer_rtx,
1914                                                       GEN_INT (offset))));
1915               mem_fp = gen_frame_mem (DImode,
1916                                       plus_constant (Pmode,
1917                                                      stack_pointer_rtx,
1918                                                      fp_offset));
1919               mem_lr = gen_frame_mem (DImode,
1920                                       plus_constant (Pmode,
1921                                                      stack_pointer_rtx,
1922                                                      fp_offset
1923                                                      + UNITS_PER_WORD));
1924               insn = emit_insn (gen_store_pairdi (mem_fp,
1925                                                   hard_frame_pointer_rtx,
1926                                                   mem_lr,
1927                                                   gen_rtx_REG (DImode,
1928                                                                LR_REGNUM)));
1929             }
1930           else
1931             {
1932               insn = emit_insn (gen_storewb_pairdi_di
1933                                 (stack_pointer_rtx, stack_pointer_rtx,
1934                                  hard_frame_pointer_rtx,
1935                                  gen_rtx_REG (DImode, LR_REGNUM),
1936                                  GEN_INT (-offset),
1937                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939             }
1940
1941           /* The first part of a frame-related parallel insn is always
1942              assumed to be relevant to the frame calculations;
1943              subsequent parts, are only frame-related if explicitly
1944              marked.  */
1945           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946           RTX_FRAME_RELATED_P (insn) = 1;
1947
1948           /* Set up frame pointer to point to the location of the
1949              previous frame pointer on the stack.  */
1950           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951                                            stack_pointer_rtx,
1952                                            GEN_INT (fp_offset)));
1953           aarch64_set_frame_expr (gen_rtx_SET
1954                                   (Pmode, hard_frame_pointer_rtx,
1955                                    gen_rtx_PLUS (Pmode,
1956                                                  stack_pointer_rtx,
1957                                                  GEN_INT (fp_offset))));
1958           RTX_FRAME_RELATED_P (insn) = 1;
1959           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960                                            hard_frame_pointer_rtx));
1961         }
1962       else
1963         {
1964           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965                                            GEN_INT (-offset)));
1966           RTX_FRAME_RELATED_P (insn) = 1;
1967         }
1968
1969       aarch64_save_or_restore_callee_save_registers
1970         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971     }
1972
1973   /* when offset >= 512,
1974      sub sp, sp, #<outgoing_args_size> */
1975   if (frame_size > -1)
1976     {
1977       if (crtl->outgoing_args_size > 0)
1978         {
1979           insn = emit_insn (gen_add2_insn
1980                             (stack_pointer_rtx,
1981                              GEN_INT (- crtl->outgoing_args_size)));
1982           RTX_FRAME_RELATED_P (insn) = 1;
1983         }
1984     }
1985 }
1986
1987 /* Generate the epilogue instructions for returning from a function.  */
1988 void
1989 aarch64_expand_epilogue (bool for_sibcall)
1990 {
1991   HOST_WIDE_INT original_frame_size, frame_size, offset;
1992   HOST_WIDE_INT fp_offset;
1993   rtx insn;
1994   rtx cfa_reg;
1995
1996   aarch64_layout_frame ();
1997   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999                 + crtl->outgoing_args_size);
2000   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001                                           STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003   fp_offset = (offset
2004                - original_frame_size
2005                - cfun->machine->frame.saved_regs_size);
2006
2007   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009   /* Store pairs and load pairs have a range only -512 to 504.  */
2010   if (offset >= 512)
2011     {
2012       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013       if (offset >= 512)
2014         offset = cfun->machine->frame.saved_regs_size;
2015
2016       frame_size -= (offset + crtl->outgoing_args_size);
2017       fp_offset = 0;
2018       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019         {
2020           insn = emit_insn (gen_add2_insn
2021                             (stack_pointer_rtx,
2022                              GEN_INT (crtl->outgoing_args_size)));
2023           RTX_FRAME_RELATED_P (insn) = 1;
2024         }
2025     }
2026   else
2027     frame_size = -1;
2028
2029   /* If there were outgoing arguments or we've done dynamic stack
2030      allocation, then restore the stack pointer from the frame
2031      pointer.  This is at most one insn and more efficient than using
2032      GCC's internal mechanism.  */
2033   if (frame_pointer_needed
2034       && (crtl->outgoing_args_size || cfun->calls_alloca))
2035     {
2036       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037                                        hard_frame_pointer_rtx,
2038                                        GEN_INT (- fp_offset)));
2039       RTX_FRAME_RELATED_P (insn) = 1;
2040       /* As SP is set to (FP - fp_offset), according to the rules in
2041          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042          from the value of SP from now on.  */
2043       cfa_reg = stack_pointer_rtx;
2044     }
2045
2046   aarch64_save_or_restore_callee_save_registers
2047     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2050   if (offset > 0)
2051     {
2052       if (frame_pointer_needed)
2053         {
2054           rtx mem_fp, mem_lr;
2055
2056           if (fp_offset)
2057             {
2058               mem_fp = gen_frame_mem (DImode,
2059                                       plus_constant (Pmode,
2060                                                      stack_pointer_rtx,
2061                                                      fp_offset));
2062               mem_lr = gen_frame_mem (DImode,
2063                                       plus_constant (Pmode,
2064                                                      stack_pointer_rtx,
2065                                                      fp_offset
2066                                                      + UNITS_PER_WORD));
2067               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068                                                  mem_fp,
2069                                                  gen_rtx_REG (DImode,
2070                                                               LR_REGNUM),
2071                                                  mem_lr));
2072             }
2073           else
2074             {
2075               insn = emit_insn (gen_loadwb_pairdi_di
2076                                 (stack_pointer_rtx,
2077                                  stack_pointer_rtx,
2078                                  hard_frame_pointer_rtx,
2079                                  gen_rtx_REG (DImode, LR_REGNUM),
2080                                  GEN_INT (offset),
2081                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085                                           plus_constant (Pmode, cfa_reg,
2086                                                          offset))));
2087             }
2088
2089           /* The first part of a frame-related parallel insn
2090              is always assumed to be relevant to the frame
2091              calculations; subsequent parts, are only
2092              frame-related if explicitly marked.  */
2093           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094           RTX_FRAME_RELATED_P (insn) = 1;
2095           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096           add_reg_note (insn, REG_CFA_RESTORE,
2097                         gen_rtx_REG (DImode, LR_REGNUM));
2098
2099           if (fp_offset)
2100             {
2101               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102                                                GEN_INT (offset)));
2103               RTX_FRAME_RELATED_P (insn) = 1;
2104             }
2105         }
2106       else
2107         {
2108           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109                                            GEN_INT (offset)));
2110           RTX_FRAME_RELATED_P (insn) = 1;
2111         }
2112     }
2113
2114   /* Stack adjustment for exception handler.  */
2115   if (crtl->calls_eh_return)
2116     {
2117       /* We need to unwind the stack by the offset computed by
2118          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2119          based on SP.  Ideally we would update the SP and define the
2120          CFA along the lines of:
2121
2122          SP = SP + EH_RETURN_STACKADJ_RTX
2123          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125          However the dwarf emitter only understands a constant
2126          register offset.
2127
2128          The solution choosen here is to use the otherwise unused IP0
2129          as a temporary register to hold the current SP value.  The
2130          CFA is described using IP0 then SP is modified.  */
2131
2132       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134       insn = emit_move_insn (ip0, stack_pointer_rtx);
2135       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136       RTX_FRAME_RELATED_P (insn) = 1;
2137
2138       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140       /* Ensure the assignment to IP0 does not get optimized away.  */
2141       emit_use (ip0);
2142     }
2143
2144   if (frame_size > -1)
2145     {
2146       if (frame_size >= 0x1000000)
2147         {
2148           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149           emit_move_insn (op0, GEN_INT (frame_size));
2150           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151           aarch64_set_frame_expr (gen_rtx_SET
2152                                   (Pmode, stack_pointer_rtx,
2153                                    gen_rtx_PLUS (Pmode,
2154                                                  stack_pointer_rtx,
2155                                                  GEN_INT (frame_size))));
2156         }
2157       else if (frame_size > 0)
2158         {
2159           if ((frame_size & 0xfff) != 0)
2160             {
2161               insn = emit_insn (gen_add2_insn
2162                                 (stack_pointer_rtx,
2163                                  GEN_INT ((frame_size
2164                                            & (HOST_WIDE_INT) 0xfff))));
2165               RTX_FRAME_RELATED_P (insn) = 1;
2166             }
2167           if ((frame_size & 0xfff) != frame_size)
2168             {
2169               insn = emit_insn (gen_add2_insn
2170                                 (stack_pointer_rtx,
2171                                  GEN_INT ((frame_size
2172                                            & ~ (HOST_WIDE_INT) 0xfff))));
2173               RTX_FRAME_RELATED_P (insn) = 1;
2174             }
2175         }
2176
2177       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178                                            gen_rtx_PLUS (Pmode,
2179                                                          stack_pointer_rtx,
2180                                                          GEN_INT (offset))));
2181     }
2182
2183   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184   if (!for_sibcall)
2185     emit_jump_insn (ret_rtx);
2186 }
2187
2188 /* Return the place to copy the exception unwinding return address to.
2189    This will probably be a stack slot, but could (in theory be the
2190    return register).  */
2191 rtx
2192 aarch64_final_eh_return_addr (void)
2193 {
2194   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195   aarch64_layout_frame ();
2196   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198                 + crtl->outgoing_args_size);
2199   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200                                           STACK_BOUNDARY / BITS_PER_UNIT);
2201   fp_offset = offset
2202     - original_frame_size
2203     - cfun->machine->frame.saved_regs_size;
2204
2205   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206     return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2209      result in a store to save LR introduced by builtin_eh_return () being
2210      incorrectly deleted because the alias is not detected.
2211      So in the calculation of the address to copy the exception unwinding
2212      return address to, we note 2 cases.
2213      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214      we return a SP-relative location since all the addresses are SP-relative
2215      in this case.  This prevents the store from being optimized away.
2216      If the fp_offset is not 0, then the addresses will be FP-relative and
2217      therefore we return a FP-relative location.  */
2218
2219   if (frame_pointer_needed)
2220     {
2221       if (fp_offset)
2222         return gen_frame_mem (DImode,
2223                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224       else
2225         return gen_frame_mem (DImode,
2226                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227     }
2228
2229   /* If FP is not needed, we calculate the location of LR, which would be
2230      at the top of the saved registers block.  */
2231
2232   return gen_frame_mem (DImode,
2233                         plus_constant (Pmode,
2234                                        stack_pointer_rtx,
2235                                        fp_offset
2236                                        + cfun->machine->frame.saved_regs_size
2237                                        - 2 * UNITS_PER_WORD));
2238 }
2239
2240 /* Output code to build up a constant in a register.  */
2241 static void
2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2243 {
2244   if (aarch64_bitmask_imm (val, DImode))
2245     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246   else
2247     {
2248       int i;
2249       int ncount = 0;
2250       int zcount = 0;
2251       HOST_WIDE_INT valp = val >> 16;
2252       HOST_WIDE_INT valm;
2253       HOST_WIDE_INT tval;
2254
2255       for (i = 16; i < 64; i += 16)
2256         {
2257           valm = (valp & 0xffff);
2258
2259           if (valm != 0)
2260             ++ zcount;
2261
2262           if (valm != 0xffff)
2263             ++ ncount;
2264
2265           valp >>= 16;
2266         }
2267
2268       /* zcount contains the number of additional MOVK instructions
2269          required if the constant is built up with an initial MOVZ instruction,
2270          while ncount is the number of MOVK instructions required if starting
2271          with a MOVN instruction.  Choose the sequence that yields the fewest
2272          number of instructions, preferring MOVZ instructions when they are both
2273          the same.  */
2274       if (ncount < zcount)
2275         {
2276           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277                           GEN_INT ((~val) & 0xffff));
2278           tval = 0xffff;
2279         }
2280       else
2281         {
2282           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283                           GEN_INT (val & 0xffff));
2284           tval = 0;
2285         }
2286
2287       val >>= 16;
2288
2289       for (i = 16; i < 64; i += 16)
2290         {
2291           if ((val & 0xffff) != tval)
2292             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2294           val >>= 16;
2295         }
2296     }
2297 }
2298
2299 static void
2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2301 {
2302   HOST_WIDE_INT mdelta = delta;
2303   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2305
2306   if (mdelta < 0)
2307     mdelta = -mdelta;
2308
2309   if (mdelta >= 4096 * 4096)
2310     {
2311       aarch64_build_constant (scratchreg, delta);
2312       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2313     }
2314   else if (mdelta > 0)
2315     {
2316       if (mdelta >= 4096)
2317         {
2318           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320           if (delta < 0)
2321             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323           else
2324             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326         }
2327       if (mdelta % 4096 != 0)
2328         {
2329           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332         }
2333     }
2334 }
2335
2336 /* Output code to add DELTA to the first argument, and then jump
2337    to FUNCTION.  Used for C++ multiple inheritance.  */
2338 static void
2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340                          HOST_WIDE_INT delta,
2341                          HOST_WIDE_INT vcall_offset,
2342                          tree function)
2343 {
2344   /* The this pointer is always in x0.  Note that this differs from
2345      Arm where the this pointer maybe bumped to r1 if r0 is required
2346      to return a pointer to an aggregate.  On AArch64 a result value
2347      pointer will be in x8.  */
2348   int this_regno = R0_REGNUM;
2349   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2350
2351   reload_completed = 1;
2352   emit_note (NOTE_INSN_PROLOGUE_END);
2353
2354   if (vcall_offset == 0)
2355     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356   else
2357     {
2358       gcc_assert ((vcall_offset & 0x7) == 0);
2359
2360       this_rtx = gen_rtx_REG (Pmode, this_regno);
2361       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2363
2364       addr = this_rtx;
2365       if (delta != 0)
2366         {
2367           if (delta >= -256 && delta < 256)
2368             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369                                        plus_constant (Pmode, this_rtx, delta));
2370           else
2371             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2372         }
2373
2374       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
2376       if (vcall_offset >= -256 && vcall_offset < 32768)
2377           addr = plus_constant (Pmode, temp0, vcall_offset);
2378       else
2379         {
2380           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2382         }
2383
2384       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385       emit_insn (gen_add2_insn (this_rtx, temp1));
2386     }
2387
2388   /* Generate a tail call to the target function.  */
2389   if (!TREE_USED (function))
2390     {
2391       assemble_external (function);
2392       TREE_USED (function) = 1;
2393     }
2394   funexp = XEXP (DECL_RTL (function), 0);
2395   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397   SIBLING_CALL_P (insn) = 1;
2398
2399   insn = get_insns ();
2400   shorten_branches (insn);
2401   final_start_function (insn, file, 1);
2402   final (insn, file, 1);
2403   final_end_function ();
2404
2405   /* Stop pretending to be a post-reload pass.  */
2406   reload_completed = 0;
2407 }
2408
2409 static int
2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411 {
2412   if (GET_CODE (*x) == SYMBOL_REF)
2413     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416      TLS offsets, not real symbol references.  */
2417   if (GET_CODE (*x) == UNSPEC
2418       && XINT (*x, 1) == UNSPEC_TLS)
2419     return -1;
2420
2421   return 0;
2422 }
2423
2424 static bool
2425 aarch64_tls_referenced_p (rtx x)
2426 {
2427   if (!TARGET_HAVE_TLS)
2428     return false;
2429
2430   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431 }
2432
2433
2434 static int
2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436 {
2437   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440   if (*imm1 < *imm2)
2441     return -1;
2442   if (*imm1 > *imm2)
2443     return +1;
2444   return 0;
2445 }
2446
2447
2448 static void
2449 aarch64_build_bitmask_table (void)
2450 {
2451   unsigned HOST_WIDE_INT mask, imm;
2452   unsigned int log_e, e, s, r;
2453   unsigned int nimms = 0;
2454
2455   for (log_e = 1; log_e <= 6; log_e++)
2456     {
2457       e = 1 << log_e;
2458       if (e == 64)
2459         mask = ~(HOST_WIDE_INT) 0;
2460       else
2461         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462       for (s = 1; s < e; s++)
2463         {
2464           for (r = 0; r < e; r++)
2465             {
2466               /* set s consecutive bits to 1 (s < 64) */
2467               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468               /* rotate right by r */
2469               if (r != 0)
2470                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471               /* replicate the constant depending on SIMD size */
2472               switch (log_e) {
2473               case 1: imm |= (imm <<  2);
2474               case 2: imm |= (imm <<  4);
2475               case 3: imm |= (imm <<  8);
2476               case 4: imm |= (imm << 16);
2477               case 5: imm |= (imm << 32);
2478               case 6:
2479                 break;
2480               default:
2481                 gcc_unreachable ();
2482               }
2483               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484               aarch64_bitmasks[nimms++] = imm;
2485             }
2486         }
2487     }
2488
2489   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491          aarch64_bitmasks_cmp);
2492 }
2493
2494
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496    a left shift of 0 or 12 bits.  */
2497 bool
2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2499 {
2500   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502           );
2503 }
2504
2505
2506 /* Return true if val is an immediate that can be loaded into a
2507    register by a MOVZ instruction.  */
2508 static bool
2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510 {
2511   if (GET_MODE_SIZE (mode) > 4)
2512     {
2513       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515         return 1;
2516     }
2517   else
2518     {
2519       /* Ignore sign extension.  */
2520       val &= (HOST_WIDE_INT) 0xffffffff;
2521     }
2522   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524 }
2525
2526
2527 /* Return true if val is a valid bitmask immediate.  */
2528 bool
2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530 {
2531   if (GET_MODE_SIZE (mode) < 8)
2532     {
2533       /* Replicate bit pattern.  */
2534       val &= (HOST_WIDE_INT) 0xffffffff;
2535       val |= val << 32;
2536     }
2537   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539 }
2540
2541
2542 /* Return true if val is an immediate that can be loaded into a
2543    register in a single instruction.  */
2544 bool
2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546 {
2547   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548     return 1;
2549   return aarch64_bitmask_imm (val, mode);
2550 }
2551
2552 static bool
2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554 {
2555   rtx base, offset;
2556   if (GET_CODE (x) == HIGH)
2557     return true;
2558
2559   split_const (x, &base, &offset);
2560   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563   return aarch64_tls_referenced_p (x);
2564 }
2565
2566 /* Return true if register REGNO is a valid index register.
2567    STRICT_P is true if REG_OK_STRICT is in effect.  */
2568
2569 bool
2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571 {
2572   if (!HARD_REGISTER_NUM_P (regno))
2573     {
2574       if (!strict_p)
2575         return true;
2576
2577       if (!reg_renumber)
2578         return false;
2579
2580       regno = reg_renumber[regno];
2581     }
2582   return GP_REGNUM_P (regno);
2583 }
2584
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586    STRICT_P is true if REG_OK_STRICT is in effect.  */
2587
2588 bool
2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590 {
2591   if (!HARD_REGISTER_NUM_P (regno))
2592     {
2593       if (!strict_p)
2594         return true;
2595
2596       if (!reg_renumber)
2597         return false;
2598
2599       regno = reg_renumber[regno];
2600     }
2601
2602   /* The fake registers will be eliminated to either the stack or
2603      hard frame pointer, both of which are usually valid base registers.
2604      Reload deals with the cases where the eliminated form isn't valid.  */
2605   return (GP_REGNUM_P (regno)
2606           || regno == SP_REGNUM
2607           || regno == FRAME_POINTER_REGNUM
2608           || regno == ARG_POINTER_REGNUM);
2609 }
2610
2611 /* Return true if X is a valid base register for mode MODE.
2612    STRICT_P is true if REG_OK_STRICT is in effect.  */
2613
2614 static bool
2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616 {
2617   if (!strict_p && GET_CODE (x) == SUBREG)
2618     x = SUBREG_REG (x);
2619
2620   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621 }
2622
2623 /* Return true if address offset is a valid index.  If it is, fill in INFO
2624    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2625
2626 static bool
2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628                         enum machine_mode mode, bool strict_p)
2629 {
2630   enum aarch64_address_type type;
2631   rtx index;
2632   int shift;
2633
2634   /* (reg:P) */
2635   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636       && GET_MODE (x) == Pmode)
2637     {
2638       type = ADDRESS_REG_REG;
2639       index = x;
2640       shift = 0;
2641     }
2642   /* (sign_extend:DI (reg:SI)) */
2643   else if ((GET_CODE (x) == SIGN_EXTEND
2644             || GET_CODE (x) == ZERO_EXTEND)
2645            && GET_MODE (x) == DImode
2646            && GET_MODE (XEXP (x, 0)) == SImode)
2647     {
2648       type = (GET_CODE (x) == SIGN_EXTEND)
2649         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650       index = XEXP (x, 0);
2651       shift = 0;
2652     }
2653   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654   else if (GET_CODE (x) == MULT
2655            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657            && GET_MODE (XEXP (x, 0)) == DImode
2658            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659            && CONST_INT_P (XEXP (x, 1)))
2660     {
2661       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663       index = XEXP (XEXP (x, 0), 0);
2664       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665     }
2666   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667   else if (GET_CODE (x) == ASHIFT
2668            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670            && GET_MODE (XEXP (x, 0)) == DImode
2671            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672            && CONST_INT_P (XEXP (x, 1)))
2673     {
2674       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676       index = XEXP (XEXP (x, 0), 0);
2677       shift = INTVAL (XEXP (x, 1));
2678     }
2679   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680   else if ((GET_CODE (x) == SIGN_EXTRACT
2681             || GET_CODE (x) == ZERO_EXTRACT)
2682            && GET_MODE (x) == DImode
2683            && GET_CODE (XEXP (x, 0)) == MULT
2684            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686     {
2687       type = (GET_CODE (x) == SIGN_EXTRACT)
2688         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689       index = XEXP (XEXP (x, 0), 0);
2690       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691       if (INTVAL (XEXP (x, 1)) != 32 + shift
2692           || INTVAL (XEXP (x, 2)) != 0)
2693         shift = -1;
2694     }
2695   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696      (const_int 0xffffffff<<shift)) */
2697   else if (GET_CODE (x) == AND
2698            && GET_MODE (x) == DImode
2699            && GET_CODE (XEXP (x, 0)) == MULT
2700            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702            && CONST_INT_P (XEXP (x, 1)))
2703     {
2704       type = ADDRESS_REG_UXTW;
2705       index = XEXP (XEXP (x, 0), 0);
2706       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708         shift = -1;
2709     }
2710   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711   else if ((GET_CODE (x) == SIGN_EXTRACT
2712             || GET_CODE (x) == ZERO_EXTRACT)
2713            && GET_MODE (x) == DImode
2714            && GET_CODE (XEXP (x, 0)) == ASHIFT
2715            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717     {
2718       type = (GET_CODE (x) == SIGN_EXTRACT)
2719         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720       index = XEXP (XEXP (x, 0), 0);
2721       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722       if (INTVAL (XEXP (x, 1)) != 32 + shift
2723           || INTVAL (XEXP (x, 2)) != 0)
2724         shift = -1;
2725     }
2726   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727      (const_int 0xffffffff<<shift)) */
2728   else if (GET_CODE (x) == AND
2729            && GET_MODE (x) == DImode
2730            && GET_CODE (XEXP (x, 0)) == ASHIFT
2731            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733            && CONST_INT_P (XEXP (x, 1)))
2734     {
2735       type = ADDRESS_REG_UXTW;
2736       index = XEXP (XEXP (x, 0), 0);
2737       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739         shift = -1;
2740     }
2741   /* (mult:P (reg:P) (const_int scale)) */
2742   else if (GET_CODE (x) == MULT
2743            && GET_MODE (x) == Pmode
2744            && GET_MODE (XEXP (x, 0)) == Pmode
2745            && CONST_INT_P (XEXP (x, 1)))
2746     {
2747       type = ADDRESS_REG_REG;
2748       index = XEXP (x, 0);
2749       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750     }
2751   /* (ashift:P (reg:P) (const_int shift)) */
2752   else if (GET_CODE (x) == ASHIFT
2753            && GET_MODE (x) == Pmode
2754            && GET_MODE (XEXP (x, 0)) == Pmode
2755            && CONST_INT_P (XEXP (x, 1)))
2756     {
2757       type = ADDRESS_REG_REG;
2758       index = XEXP (x, 0);
2759       shift = INTVAL (XEXP (x, 1));
2760     }
2761   else
2762     return false;
2763
2764   if (GET_CODE (index) == SUBREG)
2765     index = SUBREG_REG (index);
2766
2767   if ((shift == 0 ||
2768        (shift > 0 && shift <= 3
2769         && (1 << shift) == GET_MODE_SIZE (mode)))
2770       && REG_P (index)
2771       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772     {
2773       info->type = type;
2774       info->offset = index;
2775       info->shift = shift;
2776       return true;
2777     }
2778
2779   return false;
2780 }
2781
2782 static inline bool
2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784 {
2785   return (offset >= -64 * GET_MODE_SIZE (mode)
2786           && offset < 64 * GET_MODE_SIZE (mode)
2787           && offset % GET_MODE_SIZE (mode) == 0);
2788 }
2789
2790 static inline bool
2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792                                HOST_WIDE_INT offset)
2793 {
2794   return offset >= -256 && offset < 256;
2795 }
2796
2797 static inline bool
2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799 {
2800   return (offset >= 0
2801           && offset < 4096 * GET_MODE_SIZE (mode)
2802           && offset % GET_MODE_SIZE (mode) == 0);
2803 }
2804
2805 /* Return true if X is a valid address for machine mode MODE.  If it is,
2806    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2807    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2808
2809 static bool
2810 aarch64_classify_address (struct aarch64_address_info *info,
2811                           rtx x, enum machine_mode mode,
2812                           RTX_CODE outer_code, bool strict_p)
2813 {
2814   enum rtx_code code = GET_CODE (x);
2815   rtx op0, op1;
2816   bool allow_reg_index_p =
2817     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819   /* Don't support anything other than POST_INC or REG addressing for
2820      AdvSIMD.  */
2821   if (aarch64_vector_mode_p (mode)
2822       && (code != POST_INC && code != REG))
2823     return false;
2824
2825   switch (code)
2826     {
2827     case REG:
2828     case SUBREG:
2829       info->type = ADDRESS_REG_IMM;
2830       info->base = x;
2831       info->offset = const0_rtx;
2832       return aarch64_base_register_rtx_p (x, strict_p);
2833
2834     case PLUS:
2835       op0 = XEXP (x, 0);
2836       op1 = XEXP (x, 1);
2837       if (GET_MODE_SIZE (mode) != 0
2838           && CONST_INT_P (op1)
2839           && aarch64_base_register_rtx_p (op0, strict_p))
2840         {
2841           HOST_WIDE_INT offset = INTVAL (op1);
2842
2843           info->type = ADDRESS_REG_IMM;
2844           info->base = op0;
2845           info->offset = op1;
2846
2847           /* TImode and TFmode values are allowed in both pairs of X
2848              registers and individual Q registers.  The available
2849              address modes are:
2850              X,X: 7-bit signed scaled offset
2851              Q:   9-bit signed offset
2852              We conservatively require an offset representable in either mode.
2853            */
2854           if (mode == TImode || mode == TFmode)
2855             return (offset_7bit_signed_scaled_p (mode, offset)
2856                     && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858           if (outer_code == PARALLEL)
2859             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860                     && offset_7bit_signed_scaled_p (mode, offset));
2861           else
2862             return (offset_9bit_signed_unscaled_p (mode, offset)
2863                     || offset_12bit_unsigned_scaled_p (mode, offset));
2864         }
2865
2866       if (allow_reg_index_p)
2867         {
2868           /* Look for base + (scaled/extended) index register.  */
2869           if (aarch64_base_register_rtx_p (op0, strict_p)
2870               && aarch64_classify_index (info, op1, mode, strict_p))
2871             {
2872               info->base = op0;
2873               return true;
2874             }
2875           if (aarch64_base_register_rtx_p (op1, strict_p)
2876               && aarch64_classify_index (info, op0, mode, strict_p))
2877             {
2878               info->base = op1;
2879               return true;
2880             }
2881         }
2882
2883       return false;
2884
2885     case POST_INC:
2886     case POST_DEC:
2887     case PRE_INC:
2888     case PRE_DEC:
2889       info->type = ADDRESS_REG_WB;
2890       info->base = XEXP (x, 0);
2891       info->offset = NULL_RTX;
2892       return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894     case POST_MODIFY:
2895     case PRE_MODIFY:
2896       info->type = ADDRESS_REG_WB;
2897       info->base = XEXP (x, 0);
2898       if (GET_CODE (XEXP (x, 1)) == PLUS
2899           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901           && aarch64_base_register_rtx_p (info->base, strict_p))
2902         {
2903           HOST_WIDE_INT offset;
2904           info->offset = XEXP (XEXP (x, 1), 1);
2905           offset = INTVAL (info->offset);
2906
2907           /* TImode and TFmode values are allowed in both pairs of X
2908              registers and individual Q registers.  The available
2909              address modes are:
2910              X,X: 7-bit signed scaled offset
2911              Q:   9-bit signed offset
2912              We conservatively require an offset representable in either mode.
2913            */
2914           if (mode == TImode || mode == TFmode)
2915             return (offset_7bit_signed_scaled_p (mode, offset)
2916                     && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918           if (outer_code == PARALLEL)
2919             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920                     && offset_7bit_signed_scaled_p (mode, offset));
2921           else
2922             return offset_9bit_signed_unscaled_p (mode, offset);
2923         }
2924       return false;
2925
2926     case CONST:
2927     case SYMBOL_REF:
2928     case LABEL_REF:
2929       /* load literal: pc-relative constant pool entry.  */
2930       info->type = ADDRESS_SYMBOLIC;
2931       if (outer_code != PARALLEL)
2932         {
2933           rtx sym, addend;
2934
2935           split_const (x, &sym, &addend);
2936           return (GET_CODE (sym) == LABEL_REF
2937                   || (GET_CODE (sym) == SYMBOL_REF
2938                       && CONSTANT_POOL_ADDRESS_P (sym)));
2939         }
2940       return false;
2941
2942     case LO_SUM:
2943       info->type = ADDRESS_LO_SUM;
2944       info->base = XEXP (x, 0);
2945       info->offset = XEXP (x, 1);
2946       if (allow_reg_index_p
2947           && aarch64_base_register_rtx_p (info->base, strict_p))
2948         {
2949           rtx sym, offs;
2950           split_const (info->offset, &sym, &offs);
2951           if (GET_CODE (sym) == SYMBOL_REF
2952               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2953                   == SYMBOL_SMALL_ABSOLUTE))
2954             {
2955               /* The symbol and offset must be aligned to the access size.  */
2956               unsigned int align;
2957               unsigned int ref_size;
2958
2959               if (CONSTANT_POOL_ADDRESS_P (sym))
2960                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2961               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2962                 {
2963                   tree exp = SYMBOL_REF_DECL (sym);
2964                   align = TYPE_ALIGN (TREE_TYPE (exp));
2965                   align = CONSTANT_ALIGNMENT (exp, align);
2966                 }
2967               else if (SYMBOL_REF_DECL (sym))
2968                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2969               else
2970                 align = BITS_PER_UNIT;
2971
2972               ref_size = GET_MODE_SIZE (mode);
2973               if (ref_size == 0)
2974                 ref_size = GET_MODE_SIZE (DImode);
2975
2976               return ((INTVAL (offs) & (ref_size - 1)) == 0
2977                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2978             }
2979         }
2980       return false;
2981
2982     default:
2983       return false;
2984     }
2985 }
2986
2987 bool
2988 aarch64_symbolic_address_p (rtx x)
2989 {
2990   rtx offset;
2991
2992   split_const (x, &x, &offset);
2993   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2994 }
2995
2996 /* Classify the base of symbolic expression X, given that X appears in
2997    context CONTEXT.  */
2998 static enum aarch64_symbol_type
2999 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3000 {
3001   rtx offset;
3002   split_const (x, &x, &offset);
3003   return aarch64_classify_symbol (x, context);
3004 }
3005
3006
3007 /* Return TRUE if X is a legitimate address for accessing memory in
3008    mode MODE.  */
3009 static bool
3010 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3011 {
3012   struct aarch64_address_info addr;
3013
3014   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3015 }
3016
3017 /* Return TRUE if X is a legitimate address for accessing memory in
3018    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3019    pair operation.  */
3020 bool
3021 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3022                            RTX_CODE outer_code, bool strict_p)
3023 {
3024   struct aarch64_address_info addr;
3025
3026   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3027 }
3028
3029 /* Return TRUE if rtx X is immediate constant 0.0 */
3030 bool
3031 aarch64_float_const_zero_rtx_p (rtx x)
3032 {
3033   REAL_VALUE_TYPE r;
3034
3035   if (GET_MODE (x) == VOIDmode)
3036     return false;
3037
3038   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3039   if (REAL_VALUE_MINUS_ZERO (r))
3040     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3041   return REAL_VALUES_EQUAL (r, dconst0);
3042 }
3043
3044 /* Return the fixed registers used for condition codes.  */
3045
3046 static bool
3047 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3048 {
3049   *p1 = CC_REGNUM;
3050   *p2 = INVALID_REGNUM;
3051   return true;
3052 }
3053
3054 enum machine_mode
3055 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3056 {
3057   /* All floating point compares return CCFP if it is an equality
3058      comparison, and CCFPE otherwise.  */
3059   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3060     {
3061       switch (code)
3062         {
3063         case EQ:
3064         case NE:
3065         case UNORDERED:
3066         case ORDERED:
3067         case UNLT:
3068         case UNLE:
3069         case UNGT:
3070         case UNGE:
3071         case UNEQ:
3072         case LTGT:
3073           return CCFPmode;
3074
3075         case LT:
3076         case LE:
3077         case GT:
3078         case GE:
3079           return CCFPEmode;
3080
3081         default:
3082           gcc_unreachable ();
3083         }
3084     }
3085
3086   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3087       && y == const0_rtx
3088       && (code == EQ || code == NE || code == LT || code == GE)
3089       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3090     return CC_NZmode;
3091
3092   /* A compare with a shifted operand.  Because of canonicalization,
3093      the comparison will have to be swapped when we emit the assembly
3094      code.  */
3095   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3096       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3097       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3098           || GET_CODE (x) == LSHIFTRT
3099           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3100     return CC_SWPmode;
3101
3102   /* A compare of a mode narrower than SI mode against zero can be done
3103      by extending the value in the comparison.  */
3104   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3105       && y == const0_rtx)
3106     /* Only use sign-extension if we really need it.  */
3107     return ((code == GT || code == GE || code == LE || code == LT)
3108             ? CC_SESWPmode : CC_ZESWPmode);
3109
3110   /* For everything else, return CCmode.  */
3111   return CCmode;
3112 }
3113
3114 static unsigned
3115 aarch64_get_condition_code (rtx x)
3116 {
3117   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3118   enum rtx_code comp_code = GET_CODE (x);
3119
3120   if (GET_MODE_CLASS (mode) != MODE_CC)
3121     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3122
3123   switch (mode)
3124     {
3125     case CCFPmode:
3126     case CCFPEmode:
3127       switch (comp_code)
3128         {
3129         case GE: return AARCH64_GE;
3130         case GT: return AARCH64_GT;
3131         case LE: return AARCH64_LS;
3132         case LT: return AARCH64_MI;
3133         case NE: return AARCH64_NE;
3134         case EQ: return AARCH64_EQ;
3135         case ORDERED: return AARCH64_VC;
3136         case UNORDERED: return AARCH64_VS;
3137         case UNLT: return AARCH64_LT;
3138         case UNLE: return AARCH64_LE;
3139         case UNGT: return AARCH64_HI;
3140         case UNGE: return AARCH64_PL;
3141         default: gcc_unreachable ();
3142         }
3143       break;
3144
3145     case CCmode:
3146       switch (comp_code)
3147         {
3148         case NE: return AARCH64_NE;
3149         case EQ: return AARCH64_EQ;
3150         case GE: return AARCH64_GE;
3151         case GT: return AARCH64_GT;
3152         case LE: return AARCH64_LE;
3153         case LT: return AARCH64_LT;
3154         case GEU: return AARCH64_CS;
3155         case GTU: return AARCH64_HI;
3156         case LEU: return AARCH64_LS;
3157         case LTU: return AARCH64_CC;
3158         default: gcc_unreachable ();
3159         }
3160       break;
3161
3162     case CC_SWPmode:
3163     case CC_ZESWPmode:
3164     case CC_SESWPmode:
3165       switch (comp_code)
3166         {
3167         case NE: return AARCH64_NE;
3168         case EQ: return AARCH64_EQ;
3169         case GE: return AARCH64_LE;
3170         case GT: return AARCH64_LT;
3171         case LE: return AARCH64_GE;
3172         case LT: return AARCH64_GT;
3173         case GEU: return AARCH64_LS;
3174         case GTU: return AARCH64_CC;
3175         case LEU: return AARCH64_CS;
3176         case LTU: return AARCH64_HI;
3177         default: gcc_unreachable ();
3178         }
3179       break;
3180
3181     case CC_NZmode:
3182       switch (comp_code)
3183         {
3184         case NE: return AARCH64_NE;
3185         case EQ: return AARCH64_EQ;
3186         case GE: return AARCH64_PL;
3187         case LT: return AARCH64_MI;
3188         default: gcc_unreachable ();
3189         }
3190       break;
3191
3192     default:
3193       gcc_unreachable ();
3194       break;
3195     }
3196 }
3197
3198 static unsigned
3199 bit_count (unsigned HOST_WIDE_INT value)
3200 {
3201   unsigned count = 0;
3202
3203   while (value)
3204     {
3205       count++;
3206       value &= value - 1;
3207     }
3208
3209   return count;
3210 }
3211
3212 void
3213 aarch64_print_operand (FILE *f, rtx x, char code)
3214 {
3215   switch (code)
3216     {
3217     case 'e':
3218       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3219       {
3220         int n;
3221
3222         if (GET_CODE (x) != CONST_INT
3223             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3224           {
3225             output_operand_lossage ("invalid operand for '%%%c'", code);
3226             return;
3227           }
3228
3229         switch (n)
3230           {
3231           case 3:
3232             fputc ('b', f);
3233             break;
3234           case 4:
3235             fputc ('h', f);
3236             break;
3237           case 5:
3238             fputc ('w', f);
3239             break;
3240           default:
3241             output_operand_lossage ("invalid operand for '%%%c'", code);
3242             return;
3243           }
3244       }
3245       break;
3246
3247     case 'p':
3248       {
3249         int n;
3250
3251         /* Print N such that 2^N == X.  */
3252         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3253           {
3254             output_operand_lossage ("invalid operand for '%%%c'", code);
3255             return;
3256           }
3257
3258         asm_fprintf (f, "%d", n);
3259       }
3260       break;
3261
3262     case 'P':
3263       /* Print the number of non-zero bits in X (a const_int).  */
3264       if (GET_CODE (x) != CONST_INT)
3265         {
3266           output_operand_lossage ("invalid operand for '%%%c'", code);
3267           return;
3268         }
3269
3270       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3271       break;
3272
3273     case 'H':
3274       /* Print the higher numbered register of a pair (TImode) of regs.  */
3275       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3276         {
3277           output_operand_lossage ("invalid operand for '%%%c'", code);
3278           return;
3279         }
3280
3281       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3282       break;
3283
3284     case 'Q':
3285       /* Print the least significant register of a pair (TImode) of regs.  */
3286       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3287         {
3288           output_operand_lossage ("invalid operand for '%%%c'", code);
3289           return;
3290         }
3291       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3292       break;
3293
3294     case 'R':
3295       /* Print the most significant register of a pair (TImode) of regs.  */
3296       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3297         {
3298           output_operand_lossage ("invalid operand for '%%%c'", code);
3299           return;
3300         }
3301       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3302       break;
3303
3304     case 'm':
3305       /* Print a condition (eq, ne, etc).  */
3306
3307       /* CONST_TRUE_RTX means always -- that's the default.  */
3308       if (x == const_true_rtx)
3309         return;
3310
3311       if (!COMPARISON_P (x))
3312         {
3313           output_operand_lossage ("invalid operand for '%%%c'", code);
3314           return;
3315         }
3316
3317       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3318       break;
3319
3320     case 'M':
3321       /* Print the inverse of a condition (eq <-> ne, etc).  */
3322
3323       /* CONST_TRUE_RTX means never -- that's the default.  */
3324       if (x == const_true_rtx)
3325         {
3326           fputs ("nv", f);
3327           return;
3328         }
3329
3330       if (!COMPARISON_P (x))
3331         {
3332           output_operand_lossage ("invalid operand for '%%%c'", code);
3333           return;
3334         }
3335
3336       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3337                                   (aarch64_get_condition_code (x))], f);
3338       break;
3339
3340     case 'b':
3341     case 'h':
3342     case 's':
3343     case 'd':
3344     case 'q':
3345       /* Print a scalar FP/SIMD register name.  */
3346       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3347         {
3348           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3349           return;
3350         }
3351       asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3352       break;
3353
3354     case 'S':
3355     case 'T':
3356     case 'U':
3357     case 'V':
3358       /* Print the first FP/SIMD register name in a list.  */
3359       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3360         {
3361           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3362           return;
3363         }
3364       asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3365                                REGNO (x) - V0_REGNUM + (code - 'S'));
3366       break;
3367
3368     case 'w':
3369     case 'x':
3370       /* Print a general register name or the zero register (32-bit or
3371          64-bit).  */
3372       if (x == const0_rtx
3373           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3374         {
3375           asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3376           break;
3377         }
3378
3379       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3380         {
3381           asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3382                        REGNO (x) - R0_REGNUM);
3383           break;
3384         }
3385
3386       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3387         {
3388           asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3389           break;
3390         }
3391
3392       /* Fall through */
3393
3394     case 0:
3395       /* Print a normal operand, if it's a general register, then we
3396          assume DImode.  */
3397       if (x == NULL)
3398         {
3399           output_operand_lossage ("missing operand");
3400           return;
3401         }
3402
3403       switch (GET_CODE (x))
3404         {
3405         case REG:
3406           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3407           break;
3408
3409         case MEM:
3410           aarch64_memory_reference_mode = GET_MODE (x);
3411           output_address (XEXP (x, 0));
3412           break;
3413
3414         case LABEL_REF:
3415         case SYMBOL_REF:
3416           output_addr_const (asm_out_file, x);
3417           break;
3418
3419         case CONST_INT:
3420           asm_fprintf (f, "%wd", INTVAL (x));
3421           break;
3422
3423         case CONST_VECTOR:
3424           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3425             {
3426               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3427                                                             HOST_WIDE_INT_MIN,
3428                                                             HOST_WIDE_INT_MAX));
3429               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3430             }
3431           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3432             {
3433               fputc ('0', f);
3434             }
3435           else
3436             gcc_unreachable ();
3437           break;
3438
3439         case CONST_DOUBLE:
3440           /* CONST_DOUBLE can represent a double-width integer.
3441              In this case, the mode of x is VOIDmode.  */
3442           if (GET_MODE (x) == VOIDmode)
3443             ; /* Do Nothing.  */
3444           else if (aarch64_float_const_zero_rtx_p (x))
3445             {
3446               fputc ('0', f);
3447               break;
3448             }
3449           else if (aarch64_float_const_representable_p (x))
3450             {
3451 #define buf_size 20
3452               char float_buf[buf_size] = {'\0'};
3453               REAL_VALUE_TYPE r;
3454               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455               real_to_decimal_for_mode (float_buf, &r,
3456                                         buf_size, buf_size,
3457                                         1, GET_MODE (x));
3458               asm_fprintf (asm_out_file, "%s", float_buf);
3459               break;
3460 #undef buf_size
3461             }
3462           output_operand_lossage ("invalid constant");
3463           return;
3464         default:
3465           output_operand_lossage ("invalid operand");
3466           return;
3467         }
3468       break;
3469
3470     case 'A':
3471       if (GET_CODE (x) == HIGH)
3472         x = XEXP (x, 0);
3473
3474       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3475         {
3476         case SYMBOL_SMALL_GOT:
3477           asm_fprintf (asm_out_file, ":got:");
3478           break;
3479
3480         case SYMBOL_SMALL_TLSGD:
3481           asm_fprintf (asm_out_file, ":tlsgd:");
3482           break;
3483
3484         case SYMBOL_SMALL_TLSDESC:
3485           asm_fprintf (asm_out_file, ":tlsdesc:");
3486           break;
3487
3488         case SYMBOL_SMALL_GOTTPREL:
3489           asm_fprintf (asm_out_file, ":gottprel:");
3490           break;
3491
3492         case SYMBOL_SMALL_TPREL:
3493           asm_fprintf (asm_out_file, ":tprel:");
3494           break;
3495
3496         default:
3497           break;
3498         }
3499       output_addr_const (asm_out_file, x);
3500       break;
3501
3502     case 'L':
3503       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3504         {
3505         case SYMBOL_SMALL_GOT:
3506           asm_fprintf (asm_out_file, ":lo12:");
3507           break;
3508
3509         case SYMBOL_SMALL_TLSGD:
3510           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3511           break;
3512
3513         case SYMBOL_SMALL_TLSDESC:
3514           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3515           break;
3516
3517         case SYMBOL_SMALL_GOTTPREL:
3518           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3519           break;
3520
3521         case SYMBOL_SMALL_TPREL:
3522           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3523           break;
3524
3525         default:
3526           break;
3527         }
3528       output_addr_const (asm_out_file, x);
3529       break;
3530
3531     case 'G':
3532
3533       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3534         {
3535         case SYMBOL_SMALL_TPREL:
3536           asm_fprintf (asm_out_file, ":tprel_hi12:");
3537           break;
3538         default:
3539           break;
3540         }
3541       output_addr_const (asm_out_file, x);
3542       break;
3543
3544     default:
3545       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3546       return;
3547     }
3548 }
3549
3550 void
3551 aarch64_print_operand_address (FILE *f, rtx x)
3552 {
3553   struct aarch64_address_info addr;
3554
3555   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3556                              MEM, true))
3557     switch (addr.type)
3558       {
3559       case ADDRESS_REG_IMM:
3560         if (addr.offset == const0_rtx)
3561           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3562         else
3563           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3564                        INTVAL (addr.offset));
3565         return;
3566
3567       case ADDRESS_REG_REG:
3568         if (addr.shift == 0)
3569           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3570                        reg_names [REGNO (addr.offset)]);
3571         else
3572           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3573                        reg_names [REGNO (addr.offset)], addr.shift);
3574         return;
3575
3576       case ADDRESS_REG_UXTW:
3577         if (addr.shift == 0)
3578           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3579                        REGNO (addr.offset) - R0_REGNUM);
3580         else
3581           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3582                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3583         return;
3584
3585       case ADDRESS_REG_SXTW:
3586         if (addr.shift == 0)
3587           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3588                        REGNO (addr.offset) - R0_REGNUM);
3589         else
3590           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3591                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3592         return;
3593
3594       case ADDRESS_REG_WB:
3595         switch (GET_CODE (x))
3596           {
3597           case PRE_INC:
3598             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3599                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3600             return;
3601           case POST_INC:
3602             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3603                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3604             return;
3605           case PRE_DEC:
3606             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3607                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3608             return;
3609           case POST_DEC:
3610             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3611                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3612             return;
3613           case PRE_MODIFY:
3614             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3615                          INTVAL (addr.offset));
3616             return;
3617           case POST_MODIFY:
3618             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3619                          INTVAL (addr.offset));
3620             return;
3621           default:
3622             break;
3623           }
3624         break;
3625
3626       case ADDRESS_LO_SUM:
3627         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3628         output_addr_const (f, addr.offset);
3629         asm_fprintf (f, "]");
3630         return;
3631
3632       case ADDRESS_SYMBOLIC:
3633         break;
3634       }
3635
3636   output_addr_const (f, x);
3637 }
3638
3639 void
3640 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3641                            int labelno ATTRIBUTE_UNUSED)
3642 {
3643   sorry ("function profiling");
3644 }
3645
3646 bool
3647 aarch64_label_mentioned_p (rtx x)
3648 {
3649   const char *fmt;
3650   int i;
3651
3652   if (GET_CODE (x) == LABEL_REF)
3653     return true;
3654
3655   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3656      referencing instruction, but they are constant offsets, not
3657      symbols.  */
3658   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3659     return false;
3660
3661   fmt = GET_RTX_FORMAT (GET_CODE (x));
3662   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3663     {
3664       if (fmt[i] == 'E')
3665         {
3666           int j;
3667
3668           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3669             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3670               return 1;
3671         }
3672       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3673         return 1;
3674     }
3675
3676   return 0;
3677 }
3678
3679 /* Implement REGNO_REG_CLASS.  */
3680
3681 enum reg_class
3682 aarch64_regno_regclass (unsigned regno)
3683 {
3684   if (GP_REGNUM_P (regno))
3685     return CORE_REGS;
3686
3687   if (regno == SP_REGNUM)
3688     return STACK_REG;
3689
3690   if (regno == FRAME_POINTER_REGNUM
3691       || regno == ARG_POINTER_REGNUM)
3692     return CORE_REGS;
3693
3694   if (FP_REGNUM_P (regno))
3695     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3696
3697   return NO_REGS;
3698 }
3699
3700 /* Try a machine-dependent way of reloading an illegitimate address
3701    operand.  If we find one, push the reload and return the new rtx.  */
3702
3703 rtx
3704 aarch64_legitimize_reload_address (rtx *x_p,
3705                                    enum machine_mode mode,
3706                                    int opnum, int type,
3707                                    int ind_levels ATTRIBUTE_UNUSED)
3708 {
3709   rtx x = *x_p;
3710
3711   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3712   if (aarch64_vector_mode_p (mode)
3713       && GET_CODE (x) == PLUS
3714       && REG_P (XEXP (x, 0))
3715       && CONST_INT_P (XEXP (x, 1)))
3716     {
3717       rtx orig_rtx = x;
3718       x = copy_rtx (x);
3719       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3720                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3721                    opnum, (enum reload_type) type);
3722       return x;
3723     }
3724
3725   /* We must recognize output that we have already generated ourselves.  */
3726   if (GET_CODE (x) == PLUS
3727       && GET_CODE (XEXP (x, 0)) == PLUS
3728       && REG_P (XEXP (XEXP (x, 0), 0))
3729       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3730       && CONST_INT_P (XEXP (x, 1)))
3731     {
3732       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3733                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3734                    opnum, (enum reload_type) type);
3735       return x;
3736     }
3737
3738   /* We wish to handle large displacements off a base register by splitting
3739      the addend across an add and the mem insn.  This can cut the number of
3740      extra insns needed from 3 to 1.  It is only useful for load/store of a
3741      single register with 12 bit offset field.  */
3742   if (GET_CODE (x) == PLUS
3743       && REG_P (XEXP (x, 0))
3744       && CONST_INT_P (XEXP (x, 1))
3745       && HARD_REGISTER_P (XEXP (x, 0))
3746       && mode != TImode
3747       && mode != TFmode
3748       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3749     {
3750       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3751       HOST_WIDE_INT low = val & 0xfff;
3752       HOST_WIDE_INT high = val - low;
3753       HOST_WIDE_INT offs;
3754       rtx cst;
3755
3756       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3757          BLKmode alignment.  */
3758       if (GET_MODE_SIZE (mode) == 0)
3759         return NULL_RTX;
3760
3761       offs = low % GET_MODE_SIZE (mode);
3762
3763       /* Align misaligned offset by adjusting high part to compensate.  */
3764       if (offs != 0)
3765         {
3766           if (aarch64_uimm12_shift (high + offs))
3767             {
3768               /* Align down.  */
3769               low = low - offs;
3770               high = high + offs;
3771             }
3772           else
3773             {
3774               /* Align up.  */
3775               offs = GET_MODE_SIZE (mode) - offs;
3776               low = low + offs;
3777               high = high + (low & 0x1000) - offs;
3778               low &= 0xfff;
3779             }
3780         }
3781
3782       /* Check for overflow.  */
3783       if (high + low != val)
3784         return NULL_RTX;
3785
3786       cst = GEN_INT (high);
3787       if (!aarch64_uimm12_shift (high))
3788         cst = force_const_mem (Pmode, cst);
3789
3790       /* Reload high part into base reg, leaving the low part
3791          in the mem instruction.  */
3792       x = gen_rtx_PLUS (Pmode,
3793                         gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3794                         GEN_INT (low));
3795
3796       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3797                    BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3798                    opnum, (enum reload_type) type);
3799       return x;
3800     }
3801
3802   return NULL_RTX;
3803 }
3804
3805
3806 static reg_class_t
3807 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3808                           reg_class_t rclass,
3809                           enum machine_mode mode,
3810                           secondary_reload_info *sri)
3811 {
3812   /* Address expressions of the form PLUS (SP, large_offset) need two
3813      scratch registers, one for the constant, and one for holding a
3814      copy of SP, since SP cannot be used on the RHS of an add-reg
3815      instruction.  */
3816   if (mode == DImode
3817       && GET_CODE (x) == PLUS
3818       && XEXP (x, 0) == stack_pointer_rtx
3819       && CONST_INT_P (XEXP (x, 1))
3820       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3821     {
3822       sri->icode = CODE_FOR_reload_sp_immediate;
3823       return NO_REGS;
3824     }
3825
3826   /* Without the TARGET_SIMD instructions we cannot move a Q register
3827      to a Q register directly.  We need a scratch.  */
3828   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3829       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3830       && reg_class_subset_p (rclass, FP_REGS))
3831     {
3832       if (mode == TFmode)
3833         sri->icode = CODE_FOR_aarch64_reload_movtf;
3834       else if (mode == TImode)
3835         sri->icode = CODE_FOR_aarch64_reload_movti;
3836       return NO_REGS;
3837     }
3838
3839   /* A TFmode or TImode memory access should be handled via an FP_REGS
3840      because AArch64 has richer addressing modes for LDR/STR instructions
3841      than LDP/STP instructions.  */
3842   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3843       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3844     return FP_REGS;
3845
3846   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3847       return CORE_REGS;
3848
3849   return NO_REGS;
3850 }
3851
3852 static bool
3853 aarch64_can_eliminate (const int from, const int to)
3854 {
3855   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3856      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3857
3858   if (frame_pointer_needed)
3859     {
3860       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3861         return true;
3862       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3863         return false;
3864       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3865           && !cfun->calls_alloca)
3866         return true;
3867       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3868         return true;
3869     return false;
3870     }
3871   else
3872     {
3873       /* If we decided that we didn't need a frame pointer but then used
3874          LR in the function, then we do need a frame pointer after all, so
3875          prevent this elimination to ensure a frame pointer is used.  */
3876
3877       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3878           && df_regs_ever_live_p (LR_REGNUM))
3879         return false;
3880     }
3881   return true;
3882 }
3883
3884 HOST_WIDE_INT
3885 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3886 {
3887   HOST_WIDE_INT frame_size;
3888   HOST_WIDE_INT offset;
3889
3890   aarch64_layout_frame ();
3891   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3892                 + crtl->outgoing_args_size
3893                 + cfun->machine->saved_varargs_size);
3894
3895    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3896    offset = frame_size;
3897
3898    if (to == HARD_FRAME_POINTER_REGNUM)
3899      {
3900        if (from == ARG_POINTER_REGNUM)
3901          return offset - crtl->outgoing_args_size;
3902
3903        if (from == FRAME_POINTER_REGNUM)
3904          return cfun->machine->frame.saved_regs_size;
3905      }
3906
3907    if (to == STACK_POINTER_REGNUM)
3908      {
3909        if (from == FRAME_POINTER_REGNUM)
3910          {
3911            HOST_WIDE_INT elim = crtl->outgoing_args_size
3912                               + cfun->machine->frame.saved_regs_size
3913                               - cfun->machine->frame.fp_lr_offset;
3914            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3915            return elim;
3916          }
3917      }
3918
3919    return offset;
3920 }
3921
3922
3923 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3924    previous frame.  */
3925
3926 rtx
3927 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3928 {
3929   if (count != 0)
3930     return const0_rtx;
3931   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3932 }
3933
3934
3935 static void
3936 aarch64_asm_trampoline_template (FILE *f)
3937 {
3938   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3939   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3940   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3941   assemble_aligned_integer (4, const0_rtx);
3942   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3943   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3944 }
3945
3946 unsigned
3947 aarch64_trampoline_size (void)
3948 {
3949   return 32;  /* 3 insns + padding + 2 dwords.  */
3950 }
3951
3952 static void
3953 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3954 {
3955   rtx fnaddr, mem, a_tramp;
3956
3957   /* Don't need to copy the trailing D-words, we fill those in below.  */
3958   emit_block_move (m_tramp, assemble_trampoline_template (),
3959                    GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3960   mem = adjust_address (m_tramp, DImode, 16);
3961   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962   emit_move_insn (mem, fnaddr);
3963
3964   mem = adjust_address (m_tramp, DImode, 24);
3965   emit_move_insn (mem, chain_value);
3966
3967   /* XXX We should really define a "clear_cache" pattern and use
3968      gen_clear_cache().  */
3969   a_tramp = XEXP (m_tramp, 0);
3970   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3971                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3972                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3973 }
3974
3975 static unsigned char
3976 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3977 {
3978   switch (regclass)
3979     {
3980     case CORE_REGS:
3981     case POINTER_REGS:
3982     case GENERAL_REGS:
3983     case ALL_REGS:
3984     case FP_REGS:
3985     case FP_LO_REGS:
3986       return
3987         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3988                                        (GET_MODE_SIZE (mode) + 7) / 8;
3989     case STACK_REG:
3990       return 1;
3991
3992     case NO_REGS:
3993       return 0;
3994
3995     default:
3996       break;
3997     }
3998   gcc_unreachable ();
3999 }
4000
4001 static reg_class_t
4002 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4003 {
4004   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4005           ? GENERAL_REGS : regclass);
4006 }
4007
4008 void
4009 aarch64_asm_output_labelref (FILE* f, const char *name)
4010 {
4011   asm_fprintf (f, "%U%s", name);
4012 }
4013
4014 static void
4015 aarch64_elf_asm_constructor (rtx symbol, int priority)
4016 {
4017   if (priority == DEFAULT_INIT_PRIORITY)
4018     default_ctor_section_asm_out_constructor (symbol, priority);
4019   else
4020     {
4021       section *s;
4022       char buf[18];
4023       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4024       s = get_section (buf, SECTION_WRITE, NULL);
4025       switch_to_section (s);
4026       assemble_align (POINTER_SIZE);
4027       fputs ("\t.dword\t", asm_out_file);
4028       output_addr_const (asm_out_file, symbol);
4029       fputc ('\n', asm_out_file);
4030     }
4031 }
4032
4033 static void
4034 aarch64_elf_asm_destructor (rtx symbol, int priority)
4035 {
4036   if (priority == DEFAULT_INIT_PRIORITY)
4037     default_dtor_section_asm_out_destructor (symbol, priority);
4038   else
4039     {
4040       section *s;
4041       char buf[18];
4042       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4043       s = get_section (buf, SECTION_WRITE, NULL);
4044       switch_to_section (s);
4045       assemble_align (POINTER_SIZE);
4046       fputs ("\t.dword\t", asm_out_file);
4047       output_addr_const (asm_out_file, symbol);
4048       fputc ('\n', asm_out_file);
4049     }
4050 }
4051
4052 const char*
4053 aarch64_output_casesi (rtx *operands)
4054 {
4055   char buf[100];
4056   char label[100];
4057   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4058   int index;
4059   static const char *const patterns[4][2] =
4060   {
4061     {
4062       "ldrb\t%w3, [%0,%w1,uxtw]",
4063       "add\t%3, %4, %w3, sxtb #2"
4064     },
4065     {
4066       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4067       "add\t%3, %4, %w3, sxth #2"
4068     },
4069     {
4070       "ldr\t%w3, [%0,%w1,uxtw #2]",
4071       "add\t%3, %4, %w3, sxtw #2"
4072     },
4073     /* We assume that DImode is only generated when not optimizing and
4074        that we don't really need 64-bit address offsets.  That would
4075        imply an object file with 8GB of code in a single function!  */
4076     {
4077       "ldr\t%w3, [%0,%w1,uxtw #2]",
4078       "add\t%3, %4, %w3, sxtw #2"
4079     }
4080   };
4081
4082   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4083
4084   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4085
4086   gcc_assert (index >= 0 && index <= 3);
4087
4088   /* Need to implement table size reduction, by chaning the code below.  */
4089   output_asm_insn (patterns[index][0], operands);
4090   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4091   snprintf (buf, sizeof (buf),
4092             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4093   output_asm_insn (buf, operands);
4094   output_asm_insn (patterns[index][1], operands);
4095   output_asm_insn ("br\t%3", operands);
4096   assemble_label (asm_out_file, label);
4097   return "";
4098 }
4099
4100
4101 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4102    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4103    operator.  */
4104
4105 int
4106 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4107 {
4108   if (shift >= 0 && shift <= 3)
4109     {
4110       int size;
4111       for (size = 8; size <= 32; size *= 2)
4112         {
4113           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4114           if (mask == bits << shift)
4115             return size;
4116         }
4117     }
4118   return 0;
4119 }
4120
4121 static bool
4122 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4123                                    const_rtx x ATTRIBUTE_UNUSED)
4124 {
4125   /* We can't use blocks for constants when we're using a per-function
4126      constant pool.  */
4127   return false;
4128 }
4129
4130 static section *
4131 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4132                             rtx x ATTRIBUTE_UNUSED,
4133                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4134 {
4135   /* Force all constant pool entries into the current function section.  */
4136   return function_section (current_function_decl);
4137 }
4138
4139
4140 /* Costs.  */
4141
4142 /* Helper function for rtx cost calculation.  Strip a shift expression
4143    from X.  Returns the inner operand if successful, or the original
4144    expression on failure.  */
4145 static rtx
4146 aarch64_strip_shift (rtx x)
4147 {
4148   rtx op = x;
4149
4150   if ((GET_CODE (op) == ASHIFT
4151        || GET_CODE (op) == ASHIFTRT
4152        || GET_CODE (op) == LSHIFTRT)
4153       && CONST_INT_P (XEXP (op, 1)))
4154     return XEXP (op, 0);
4155
4156   if (GET_CODE (op) == MULT
4157       && CONST_INT_P (XEXP (op, 1))
4158       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4159     return XEXP (op, 0);
4160
4161   return x;
4162 }
4163
4164 /* Helper function for rtx cost calculation.  Strip a shift or extend
4165    expression from X.  Returns the inner operand if successful, or the
4166    original expression on failure.  We deal with a number of possible
4167    canonicalization variations here.  */
4168 static rtx
4169 aarch64_strip_shift_or_extend (rtx x)
4170 {
4171   rtx op = x;
4172
4173   /* Zero and sign extraction of a widened value.  */
4174   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4175       && XEXP (op, 2) == const0_rtx
4176       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4177                                          XEXP (op, 1)))
4178     return XEXP (XEXP (op, 0), 0);
4179
4180   /* It can also be represented (for zero-extend) as an AND with an
4181      immediate.  */
4182   if (GET_CODE (op) == AND
4183       && GET_CODE (XEXP (op, 0)) == MULT
4184       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4185       && CONST_INT_P (XEXP (op, 1))
4186       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4187                            INTVAL (XEXP (op, 1))) != 0)
4188     return XEXP (XEXP (op, 0), 0);
4189
4190   /* Now handle extended register, as this may also have an optional
4191      left shift by 1..4.  */
4192   if (GET_CODE (op) == ASHIFT
4193       && CONST_INT_P (XEXP (op, 1))
4194       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4195     op = XEXP (op, 0);
4196
4197   if (GET_CODE (op) == ZERO_EXTEND
4198       || GET_CODE (op) == SIGN_EXTEND)
4199     op = XEXP (op, 0);
4200
4201   if (op != x)
4202     return op;
4203
4204   return aarch64_strip_shift (x);
4205 }
4206
4207 /* Calculate the cost of calculating X, storing it in *COST.  Result
4208    is true if the total cost of the operation has now been calculated.  */
4209 static bool
4210 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4211                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4212 {
4213   rtx op0, op1;
4214   const struct cpu_rtx_cost_table *extra_cost
4215     = aarch64_tune_params->insn_extra_cost;
4216
4217   switch (code)
4218     {
4219     case SET:
4220       op0 = SET_DEST (x);
4221       op1 = SET_SRC (x);
4222
4223       switch (GET_CODE (op0))
4224         {
4225         case MEM:
4226           if (speed)
4227             *cost += extra_cost->memory_store;
4228
4229           if (op1 != const0_rtx)
4230             *cost += rtx_cost (op1, SET, 1, speed);
4231           return true;
4232
4233         case SUBREG:
4234           if (! REG_P (SUBREG_REG (op0)))
4235             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4236           /* Fall through.  */
4237         case REG:
4238           /* Cost is just the cost of the RHS of the set.  */
4239           *cost += rtx_cost (op1, SET, 1, true);
4240           return true;
4241
4242         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4243         case SIGN_EXTRACT:
4244           /* Strip any redundant widening of the RHS to meet the width of
4245              the target.  */
4246           if (GET_CODE (op1) == SUBREG)
4247             op1 = SUBREG_REG (op1);
4248           if ((GET_CODE (op1) == ZERO_EXTEND
4249                || GET_CODE (op1) == SIGN_EXTEND)
4250               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4251               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4252                   >= INTVAL (XEXP (op0, 1))))
4253             op1 = XEXP (op1, 0);
4254           *cost += rtx_cost (op1, SET, 1, speed);
4255           return true;
4256
4257         default:
4258           break;
4259         }
4260       return false;
4261
4262     case MEM:
4263       if (speed)
4264         *cost += extra_cost->memory_load;
4265
4266       return true;
4267
4268     case NEG:
4269       op0 = CONST0_RTX (GET_MODE (x));
4270       op1 = XEXP (x, 0);
4271       goto cost_minus;
4272
4273     case COMPARE:
4274       op0 = XEXP (x, 0);
4275       op1 = XEXP (x, 1);
4276
4277       if (op1 == const0_rtx
4278           && GET_CODE (op0) == AND)
4279         {
4280           x = op0;
4281           goto cost_logic;
4282         }
4283
4284       /* Comparisons can work if the order is swapped.
4285          Canonicalization puts the more complex operation first, but
4286          we want it in op1.  */
4287       if (! (REG_P (op0)
4288              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4289         {
4290           op0 = XEXP (x, 1);
4291           op1 = XEXP (x, 0);
4292         }
4293       goto cost_minus;
4294
4295     case MINUS:
4296       op0 = XEXP (x, 0);
4297       op1 = XEXP (x, 1);
4298
4299     cost_minus:
4300       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4301           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4302               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4303         {
4304           if (op0 != const0_rtx)
4305             *cost += rtx_cost (op0, MINUS, 0, speed);
4306
4307           if (CONST_INT_P (op1))
4308             {
4309               if (!aarch64_uimm12_shift (INTVAL (op1)))
4310                 *cost += rtx_cost (op1, MINUS, 1, speed);
4311             }
4312           else
4313             {
4314               op1 = aarch64_strip_shift_or_extend (op1);
4315               *cost += rtx_cost (op1, MINUS, 1, speed);
4316             }
4317           return true;
4318         }
4319
4320       return false;
4321
4322     case PLUS:
4323       op0 = XEXP (x, 0);
4324       op1 = XEXP (x, 1);
4325
4326       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4327         {
4328           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4329             {
4330               *cost += rtx_cost (op0, PLUS, 0, speed);
4331             }
4332           else
4333             {
4334               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4335
4336               if (new_op0 == op0
4337                   && GET_CODE (op0) == MULT)
4338                 {
4339                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4340                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4341                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4342                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4343                     {
4344                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4345                                           speed)
4346                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4347                                             speed)
4348                                 + rtx_cost (op1, PLUS, 1, speed));
4349                       if (speed)
4350                         *cost += extra_cost->int_multiply_extend_add;
4351                       return true;
4352                     }
4353                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4354                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4355                             + rtx_cost (op1, PLUS, 1, speed));
4356
4357                   if (speed)
4358                     *cost += extra_cost->int_multiply_add;
4359                 }
4360
4361               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4362                         + rtx_cost (op1, PLUS, 1, speed));
4363             }
4364           return true;
4365         }
4366
4367       return false;
4368
4369     case IOR:
4370     case XOR:
4371     case AND:
4372     cost_logic:
4373       op0 = XEXP (x, 0);
4374       op1 = XEXP (x, 1);
4375
4376       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4377         {
4378           if (CONST_INT_P (op1)
4379               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4380             {
4381               *cost += rtx_cost (op0, AND, 0, speed);
4382             }
4383           else
4384             {
4385               if (GET_CODE (op0) == NOT)
4386                 op0 = XEXP (op0, 0);
4387               op0 = aarch64_strip_shift (op0);
4388               *cost += (rtx_cost (op0, AND, 0, speed)
4389                         + rtx_cost (op1, AND, 1, speed));
4390             }
4391           return true;
4392         }
4393       return false;
4394
4395     case ZERO_EXTEND:
4396       if ((GET_MODE (x) == DImode
4397            && GET_MODE (XEXP (x, 0)) == SImode)
4398           || GET_CODE (XEXP (x, 0)) == MEM)
4399         {
4400           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4401           return true;
4402         }
4403       return false;
4404
4405     case SIGN_EXTEND:
4406       if (GET_CODE (XEXP (x, 0)) == MEM)
4407         {
4408           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4409           return true;
4410         }
4411       return false;
4412
4413     case ROTATE:
4414       if (!CONST_INT_P (XEXP (x, 1)))
4415         *cost += COSTS_N_INSNS (2);
4416       /* Fall through.  */
4417     case ROTATERT:
4418     case LSHIFTRT:
4419     case ASHIFT:
4420     case ASHIFTRT:
4421
4422       /* Shifting by a register often takes an extra cycle.  */
4423       if (speed && !CONST_INT_P (XEXP (x, 1)))
4424         *cost += extra_cost->register_shift;
4425
4426       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4427       return true;
4428
4429     case HIGH:
4430       if (!CONSTANT_P (XEXP (x, 0)))
4431         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4432       return true;
4433
4434     case LO_SUM:
4435       if (!CONSTANT_P (XEXP (x, 1)))
4436         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4437       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4438       return true;
4439
4440     case ZERO_EXTRACT:
4441     case SIGN_EXTRACT:
4442       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4443       return true;
4444
4445     case MULT:
4446       op0 = XEXP (x, 0);
4447       op1 = XEXP (x, 1);
4448
4449       *cost = COSTS_N_INSNS (1);
4450       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4451         {
4452           if (CONST_INT_P (op1)
4453               && exact_log2 (INTVAL (op1)) > 0)
4454             {
4455               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4456               return true;
4457             }
4458
4459           if ((GET_CODE (op0) == ZERO_EXTEND
4460                && GET_CODE (op1) == ZERO_EXTEND)
4461               || (GET_CODE (op0) == SIGN_EXTEND
4462                   && GET_CODE (op1) == SIGN_EXTEND))
4463             {
4464               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4465                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4466               if (speed)
4467                 *cost += extra_cost->int_multiply_extend;
4468               return true;
4469             }
4470
4471           if (speed)
4472             *cost += extra_cost->int_multiply;
4473         }
4474       else if (speed)
4475         {
4476           if (GET_MODE (x) == DFmode)
4477             *cost += extra_cost->double_multiply;
4478           else if (GET_MODE (x) == SFmode)
4479             *cost += extra_cost->float_multiply;
4480         }
4481
4482       return false;  /* All arguments need to be in registers.  */
4483
4484     case MOD:
4485     case UMOD:
4486       *cost = COSTS_N_INSNS (2);
4487       if (speed)
4488         {
4489           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4490             *cost += (extra_cost->int_multiply_add
4491                       + extra_cost->int_divide);
4492           else if (GET_MODE (x) == DFmode)
4493             *cost += (extra_cost->double_multiply
4494                       + extra_cost->double_divide);
4495           else if (GET_MODE (x) == SFmode)
4496             *cost += (extra_cost->float_multiply
4497                       + extra_cost->float_divide);
4498         }
4499       return false;  /* All arguments need to be in registers.  */
4500
4501     case DIV:
4502     case UDIV:
4503       *cost = COSTS_N_INSNS (1);
4504       if (speed)
4505         {
4506           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507             *cost += extra_cost->int_divide;
4508           else if (GET_MODE (x) == DFmode)
4509             *cost += extra_cost->double_divide;
4510           else if (GET_MODE (x) == SFmode)
4511             *cost += extra_cost->float_divide;
4512         }
4513       return false;  /* All arguments need to be in registers.  */
4514
4515     default:
4516       break;
4517     }
4518   return false;
4519 }
4520
4521 static int
4522 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4523                   enum machine_mode mode ATTRIBUTE_UNUSED,
4524                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4525 {
4526   enum rtx_code c  = GET_CODE (x);
4527   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4528
4529   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4530     return addr_cost->pre_modify;
4531
4532   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4533     return addr_cost->post_modify;
4534
4535   if (c == PLUS)
4536     {
4537       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4538         return addr_cost->imm_offset;
4539       else if (GET_CODE (XEXP (x, 0)) == MULT
4540                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4541                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4542         return addr_cost->register_extend;
4543
4544       return addr_cost->register_offset;
4545     }
4546   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4547     return addr_cost->imm_offset;
4548
4549   return 0;
4550 }
4551
4552 static int
4553 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4554                             reg_class_t from, reg_class_t to)
4555 {
4556   const struct cpu_regmove_cost *regmove_cost
4557     = aarch64_tune_params->regmove_cost;
4558
4559   if (from == GENERAL_REGS && to == GENERAL_REGS)
4560     return regmove_cost->GP2GP;
4561   else if (from == GENERAL_REGS)
4562     return regmove_cost->GP2FP;
4563   else if (to == GENERAL_REGS)
4564     return regmove_cost->FP2GP;
4565
4566   /* When AdvSIMD instructions are disabled it is not possible to move
4567      a 128-bit value directly between Q registers.  This is handled in
4568      secondary reload.  A general register is used as a scratch to move
4569      the upper DI value and the lower DI value is moved directly,
4570      hence the cost is the sum of three moves. */
4571
4572   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4573     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4574
4575   return regmove_cost->FP2FP;
4576 }
4577
4578 static int
4579 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4580                           reg_class_t rclass ATTRIBUTE_UNUSED,
4581                           bool in ATTRIBUTE_UNUSED)
4582 {
4583   return aarch64_tune_params->memmov_cost;
4584 }
4585
4586 static void initialize_aarch64_code_model (void);
4587
4588 /* Parse the architecture extension string.  */
4589
4590 static void
4591 aarch64_parse_extension (char *str)
4592 {
4593   /* The extension string is parsed left to right.  */
4594   const struct aarch64_option_extension *opt = NULL;
4595
4596   /* Flag to say whether we are adding or removing an extension.  */
4597   int adding_ext = -1;
4598
4599   while (str != NULL && *str != 0)
4600     {
4601       char *ext;
4602       size_t len;
4603
4604       str++;
4605       ext = strchr (str, '+');
4606
4607       if (ext != NULL)
4608         len = ext - str;
4609       else
4610         len = strlen (str);
4611
4612       if (len >= 2 && strncmp (str, "no", 2) == 0)
4613         {
4614           adding_ext = 0;
4615           len -= 2;
4616           str += 2;
4617         }
4618       else if (len > 0)
4619         adding_ext = 1;
4620
4621       if (len == 0)
4622         {
4623           error ("missing feature modifier after %qs", "+no");
4624           return;
4625         }
4626
4627       /* Scan over the extensions table trying to find an exact match.  */
4628       for (opt = all_extensions; opt->name != NULL; opt++)
4629         {
4630           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4631             {
4632               /* Add or remove the extension.  */
4633               if (adding_ext)
4634                 aarch64_isa_flags |= opt->flags_on;
4635               else
4636                 aarch64_isa_flags &= ~(opt->flags_off);
4637               break;
4638             }
4639         }
4640
4641       if (opt->name == NULL)
4642         {
4643           /* Extension not found in list.  */
4644           error ("unknown feature modifier %qs", str);
4645           return;
4646         }
4647
4648       str = ext;
4649     };
4650
4651   return;
4652 }
4653
4654 /* Parse the ARCH string.  */
4655
4656 static void
4657 aarch64_parse_arch (void)
4658 {
4659   char *ext;
4660   const struct processor *arch;
4661   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4662   size_t len;
4663
4664   strcpy (str, aarch64_arch_string);
4665
4666   ext = strchr (str, '+');
4667
4668   if (ext != NULL)
4669     len = ext - str;
4670   else
4671     len = strlen (str);
4672
4673   if (len == 0)
4674     {
4675       error ("missing arch name in -march=%qs", str);
4676       return;
4677     }
4678
4679   /* Loop through the list of supported ARCHs to find a match.  */
4680   for (arch = all_architectures; arch->name != NULL; arch++)
4681     {
4682       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4683         {
4684           selected_arch = arch;
4685           aarch64_isa_flags = selected_arch->flags;
4686           selected_cpu = &all_cores[selected_arch->core];
4687
4688           if (ext != NULL)
4689             {
4690               /* ARCH string contains at least one extension.  */
4691               aarch64_parse_extension (ext);
4692             }
4693
4694           return;
4695         }
4696     }
4697
4698   /* ARCH name not found in list.  */
4699   error ("unknown value %qs for -march", str);
4700   return;
4701 }
4702
4703 /* Parse the CPU string.  */
4704
4705 static void
4706 aarch64_parse_cpu (void)
4707 {
4708   char *ext;
4709   const struct processor *cpu;
4710   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4711   size_t len;
4712
4713   strcpy (str, aarch64_cpu_string);
4714
4715   ext = strchr (str, '+');
4716
4717   if (ext != NULL)
4718     len = ext - str;
4719   else
4720     len = strlen (str);
4721
4722   if (len == 0)
4723     {
4724       error ("missing cpu name in -mcpu=%qs", str);
4725       return;
4726     }
4727
4728   /* Loop through the list of supported CPUs to find a match.  */
4729   for (cpu = all_cores; cpu->name != NULL; cpu++)
4730     {
4731       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4732         {
4733           selected_cpu = cpu;
4734           aarch64_isa_flags = selected_cpu->flags;
4735
4736           if (ext != NULL)
4737             {
4738               /* CPU string contains at least one extension.  */
4739               aarch64_parse_extension (ext);
4740             }
4741
4742           return;
4743         }
4744     }
4745
4746   /* CPU name not found in list.  */
4747   error ("unknown value %qs for -mcpu", str);
4748   return;
4749 }
4750
4751 /* Parse the TUNE string.  */
4752
4753 static void
4754 aarch64_parse_tune (void)
4755 {
4756   const struct processor *cpu;
4757   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4758   strcpy (str, aarch64_tune_string);
4759
4760   /* Loop through the list of supported CPUs to find a match.  */
4761   for (cpu = all_cores; cpu->name != NULL; cpu++)
4762     {
4763       if (strcmp (cpu->name, str) == 0)
4764         {
4765           selected_tune = cpu;
4766           return;
4767         }
4768     }
4769
4770   /* CPU name not found in list.  */
4771   error ("unknown value %qs for -mtune", str);
4772   return;
4773 }
4774
4775
4776 /* Implement TARGET_OPTION_OVERRIDE.  */
4777
4778 static void
4779 aarch64_override_options (void)
4780 {
4781   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4782      otherwise march remains undefined.  mtune can be used with either march or
4783      mcpu.  */
4784
4785   if (aarch64_arch_string)
4786     {
4787       aarch64_parse_arch ();
4788       aarch64_cpu_string = NULL;
4789     }
4790
4791   if (aarch64_cpu_string)
4792     {
4793       aarch64_parse_cpu ();
4794       selected_arch = NULL;
4795     }
4796
4797   if (aarch64_tune_string)
4798     {
4799       aarch64_parse_tune ();
4800     }
4801
4802   initialize_aarch64_code_model ();
4803
4804   aarch64_build_bitmask_table ();
4805
4806   /* This target defaults to strict volatile bitfields.  */
4807   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4808     flag_strict_volatile_bitfields = 1;
4809
4810   /* If the user did not specify a processor, choose the default
4811      one for them.  This will be the CPU set during configuration using
4812      --with-cpu, otherwise it is "generic".  */
4813   if (!selected_cpu)
4814     {
4815       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4816       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4817     }
4818
4819   gcc_assert (selected_cpu);
4820
4821   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4822   if (!selected_tune)
4823     selected_tune = &all_cores[selected_cpu->core];
4824
4825   aarch64_tune_flags = selected_tune->flags;
4826   aarch64_tune = selected_tune->core;
4827   aarch64_tune_params = selected_tune->tune;
4828
4829   aarch64_override_options_after_change ();
4830 }
4831
4832 /* Implement targetm.override_options_after_change.  */
4833
4834 static void
4835 aarch64_override_options_after_change (void)
4836 {
4837   faked_omit_frame_pointer = false;
4838
4839   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4840      that aarch64_frame_pointer_required will be called.  We need to remember
4841      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4842
4843   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4844     {
4845       flag_omit_frame_pointer = true;
4846       faked_omit_frame_pointer = true;
4847     }
4848 }
4849
4850 static struct machine_function *
4851 aarch64_init_machine_status (void)
4852 {
4853   struct machine_function *machine;
4854   machine = ggc_alloc_cleared_machine_function ();
4855   return machine;
4856 }
4857
4858 void
4859 aarch64_init_expanders (void)
4860 {
4861   init_machine_status = aarch64_init_machine_status;
4862 }
4863
4864 /* A checking mechanism for the implementation of the various code models.  */
4865 static void
4866 initialize_aarch64_code_model (void)
4867 {
4868    if (flag_pic)
4869      {
4870        switch (aarch64_cmodel_var)
4871          {
4872          case AARCH64_CMODEL_TINY:
4873            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4874            break;
4875          case AARCH64_CMODEL_SMALL:
4876            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4877            break;
4878          case AARCH64_CMODEL_LARGE:
4879            sorry ("code model %qs with -f%s", "large",
4880                   flag_pic > 1 ? "PIC" : "pic");
4881          default:
4882            gcc_unreachable ();
4883          }
4884      }
4885    else
4886      aarch64_cmodel = aarch64_cmodel_var;
4887 }
4888
4889 /* Return true if SYMBOL_REF X binds locally.  */
4890
4891 static bool
4892 aarch64_symbol_binds_local_p (const_rtx x)
4893 {
4894   return (SYMBOL_REF_DECL (x)
4895           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4896           : SYMBOL_REF_LOCAL_P (x));
4897 }
4898
4899 /* Return true if SYMBOL_REF X is thread local */
4900 static bool
4901 aarch64_tls_symbol_p (rtx x)
4902 {
4903   if (! TARGET_HAVE_TLS)
4904     return false;
4905
4906   if (GET_CODE (x) != SYMBOL_REF)
4907     return false;
4908
4909   return SYMBOL_REF_TLS_MODEL (x) != 0;
4910 }
4911
4912 /* Classify a TLS symbol into one of the TLS kinds.  */
4913 enum aarch64_symbol_type
4914 aarch64_classify_tls_symbol (rtx x)
4915 {
4916   enum tls_model tls_kind = tls_symbolic_operand_type (x);
4917
4918   switch (tls_kind)
4919     {
4920     case TLS_MODEL_GLOBAL_DYNAMIC:
4921     case TLS_MODEL_LOCAL_DYNAMIC:
4922       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4923
4924     case TLS_MODEL_INITIAL_EXEC:
4925       return SYMBOL_SMALL_GOTTPREL;
4926
4927     case TLS_MODEL_LOCAL_EXEC:
4928       return SYMBOL_SMALL_TPREL;
4929
4930     case TLS_MODEL_EMULATED:
4931     case TLS_MODEL_NONE:
4932       return SYMBOL_FORCE_TO_MEM;
4933
4934     default:
4935       gcc_unreachable ();
4936     }
4937 }
4938
4939 /* Return the method that should be used to access SYMBOL_REF or
4940    LABEL_REF X in context CONTEXT.  */
4941 enum aarch64_symbol_type
4942 aarch64_classify_symbol (rtx x,
4943                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4944 {
4945   if (GET_CODE (x) == LABEL_REF)
4946     {
4947       switch (aarch64_cmodel)
4948         {
4949         case AARCH64_CMODEL_LARGE:
4950           return SYMBOL_FORCE_TO_MEM;
4951
4952         case AARCH64_CMODEL_TINY_PIC:
4953         case AARCH64_CMODEL_TINY:
4954         case AARCH64_CMODEL_SMALL_PIC:
4955         case AARCH64_CMODEL_SMALL:
4956           return SYMBOL_SMALL_ABSOLUTE;
4957
4958         default:
4959           gcc_unreachable ();
4960         }
4961     }
4962
4963   gcc_assert (GET_CODE (x) == SYMBOL_REF);
4964
4965   switch (aarch64_cmodel)
4966     {
4967     case AARCH64_CMODEL_LARGE:
4968       return SYMBOL_FORCE_TO_MEM;
4969
4970     case AARCH64_CMODEL_TINY:
4971     case AARCH64_CMODEL_SMALL:
4972
4973       /* This is needed to get DFmode, TImode constants to be loaded off
4974          the constant pool.  Is it necessary to dump TImode values into
4975          the constant pool.  We don't handle TImode constant loads properly
4976          yet and hence need to use the constant pool.  */
4977       if (CONSTANT_POOL_ADDRESS_P (x))
4978         return SYMBOL_FORCE_TO_MEM;
4979
4980       if (aarch64_tls_symbol_p (x))
4981         return aarch64_classify_tls_symbol (x);
4982
4983       if (SYMBOL_REF_WEAK (x))
4984         return SYMBOL_FORCE_TO_MEM;
4985
4986       return SYMBOL_SMALL_ABSOLUTE;
4987
4988     case AARCH64_CMODEL_TINY_PIC:
4989     case AARCH64_CMODEL_SMALL_PIC:
4990
4991       if (CONSTANT_POOL_ADDRESS_P (x))
4992         return SYMBOL_FORCE_TO_MEM;
4993
4994       if (aarch64_tls_symbol_p (x))
4995         return aarch64_classify_tls_symbol (x);
4996
4997       if (!aarch64_symbol_binds_local_p (x))
4998         return SYMBOL_SMALL_GOT;
4999
5000       return SYMBOL_SMALL_ABSOLUTE;
5001
5002     default:
5003       gcc_unreachable ();
5004     }
5005   /* By default push everything into the constant pool.  */
5006   return SYMBOL_FORCE_TO_MEM;
5007 }
5008
5009 /* Return true if X is a symbolic constant that can be used in context
5010    CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
5011
5012 bool
5013 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5014                              enum aarch64_symbol_type *symbol_type)
5015 {
5016   rtx offset;
5017   split_const (x, &x, &offset);
5018   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5019     *symbol_type = aarch64_classify_symbol (x, context);
5020   else
5021     return false;
5022
5023   /* No checking of offset at this point.  */
5024   return true;
5025 }
5026
5027 bool
5028 aarch64_constant_address_p (rtx x)
5029 {
5030   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5031 }
5032
5033 bool
5034 aarch64_legitimate_pic_operand_p (rtx x)
5035 {
5036   if (GET_CODE (x) == SYMBOL_REF
5037       || (GET_CODE (x) == CONST
5038           && GET_CODE (XEXP (x, 0)) == PLUS
5039           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5040      return false;
5041
5042   return true;
5043 }
5044
5045 /* Return true if X holds either a quarter-precision or
5046      floating-point +0.0 constant.  */
5047 static bool
5048 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5049 {
5050   if (!CONST_DOUBLE_P (x))
5051     return false;
5052
5053   /* TODO: We could handle moving 0.0 to a TFmode register,
5054      but first we would like to refactor the movtf_aarch64
5055      to be more amicable to split moves properly and
5056      correctly gate on TARGET_SIMD.  For now - reject all
5057      constants which are not to SFmode or DFmode registers.  */
5058   if (!(mode == SFmode || mode == DFmode))
5059     return false;
5060
5061   if (aarch64_float_const_zero_rtx_p (x))
5062     return true;
5063   return aarch64_float_const_representable_p (x);
5064 }
5065
5066 static bool
5067 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5068 {
5069   /* Do not allow vector struct mode constants.  We could support
5070      0 and -1 easily, but they need support in aarch64-simd.md.  */
5071   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5072     return false;
5073
5074   /* This could probably go away because
5075      we now decompose CONST_INTs according to expand_mov_immediate.  */
5076   if ((GET_CODE (x) == CONST_VECTOR
5077        && aarch64_simd_valid_immediate (x, mode, false,
5078                                         NULL, NULL, NULL, NULL, NULL) != -1)
5079       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5080         return !targetm.cannot_force_const_mem (mode, x);
5081
5082   if (GET_CODE (x) == HIGH
5083       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5084     return true;
5085
5086   return aarch64_constant_address_p (x);
5087 }
5088
5089 rtx
5090 aarch64_load_tp (rtx target)
5091 {
5092   if (!target
5093       || GET_MODE (target) != Pmode
5094       || !register_operand (target, Pmode))
5095     target = gen_reg_rtx (Pmode);
5096
5097   /* Can return in any reg.  */
5098   emit_insn (gen_aarch64_load_tp_hard (target));
5099   return target;
5100 }
5101
5102 /* On AAPCS systems, this is the "struct __va_list".  */
5103 static GTY(()) tree va_list_type;
5104
5105 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5106    Return the type to use as __builtin_va_list.
5107
5108    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5109
5110    struct __va_list
5111    {
5112      void *__stack;
5113      void *__gr_top;
5114      void *__vr_top;
5115      int   __gr_offs;
5116      int   __vr_offs;
5117    };  */
5118
5119 static tree
5120 aarch64_build_builtin_va_list (void)
5121 {
5122   tree va_list_name;
5123   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5124
5125   /* Create the type.  */
5126   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5127   /* Give it the required name.  */
5128   va_list_name = build_decl (BUILTINS_LOCATION,
5129                              TYPE_DECL,
5130                              get_identifier ("__va_list"),
5131                              va_list_type);
5132   DECL_ARTIFICIAL (va_list_name) = 1;
5133   TYPE_NAME (va_list_type) = va_list_name;
5134   TYPE_STUB_DECL (va_list_type) = va_list_name;
5135
5136   /* Create the fields.  */
5137   f_stack = build_decl (BUILTINS_LOCATION,
5138                         FIELD_DECL, get_identifier ("__stack"),
5139                         ptr_type_node);
5140   f_grtop = build_decl (BUILTINS_LOCATION,
5141                         FIELD_DECL, get_identifier ("__gr_top"),
5142                         ptr_type_node);
5143   f_vrtop = build_decl (BUILTINS_LOCATION,
5144                         FIELD_DECL, get_identifier ("__vr_top"),
5145                         ptr_type_node);
5146   f_groff = build_decl (BUILTINS_LOCATION,
5147                         FIELD_DECL, get_identifier ("__gr_offs"),
5148                         integer_type_node);
5149   f_vroff = build_decl (BUILTINS_LOCATION,
5150                         FIELD_DECL, get_identifier ("__vr_offs"),
5151                         integer_type_node);
5152
5153   DECL_ARTIFICIAL (f_stack) = 1;
5154   DECL_ARTIFICIAL (f_grtop) = 1;
5155   DECL_ARTIFICIAL (f_vrtop) = 1;
5156   DECL_ARTIFICIAL (f_groff) = 1;
5157   DECL_ARTIFICIAL (f_vroff) = 1;
5158
5159   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5160   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5161   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5162   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5163   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5164
5165   TYPE_FIELDS (va_list_type) = f_stack;
5166   DECL_CHAIN (f_stack) = f_grtop;
5167   DECL_CHAIN (f_grtop) = f_vrtop;
5168   DECL_CHAIN (f_vrtop) = f_groff;
5169   DECL_CHAIN (f_groff) = f_vroff;
5170
5171   /* Compute its layout.  */
5172   layout_type (va_list_type);
5173
5174   return va_list_type;
5175 }
5176
5177 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5178 static void
5179 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5180 {
5181   const CUMULATIVE_ARGS *cum;
5182   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5183   tree stack, grtop, vrtop, groff, vroff;
5184   tree t;
5185   int gr_save_area_size;
5186   int vr_save_area_size;
5187   int vr_offset;
5188
5189   cum = &crtl->args.info;
5190   gr_save_area_size
5191     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5192   vr_save_area_size
5193     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5194
5195   if (TARGET_GENERAL_REGS_ONLY)
5196     {
5197       if (cum->aapcs_nvrn > 0)
5198         sorry ("%qs and floating point or vector arguments",
5199                "-mgeneral-regs-only");
5200       vr_save_area_size = 0;
5201     }
5202
5203   f_stack = TYPE_FIELDS (va_list_type_node);
5204   f_grtop = DECL_CHAIN (f_stack);
5205   f_vrtop = DECL_CHAIN (f_grtop);
5206   f_groff = DECL_CHAIN (f_vrtop);
5207   f_vroff = DECL_CHAIN (f_groff);
5208
5209   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5210                   NULL_TREE);
5211   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5212                   NULL_TREE);
5213   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5214                   NULL_TREE);
5215   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5216                   NULL_TREE);
5217   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5218                   NULL_TREE);
5219
5220   /* Emit code to initialize STACK, which points to the next varargs stack
5221      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5222      by named arguments.  STACK is 8-byte aligned.  */
5223   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5224   if (cum->aapcs_stack_size > 0)
5225     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5226   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5227   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5228
5229   /* Emit code to initialize GRTOP, the top of the GR save area.
5230      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5231   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5232   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5233   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5234
5235   /* Emit code to initialize VRTOP, the top of the VR save area.
5236      This address is gr_save_area_bytes below GRTOP, rounded
5237      down to the next 16-byte boundary.  */
5238   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5239   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5240                              STACK_BOUNDARY / BITS_PER_UNIT);
5241
5242   if (vr_offset)
5243     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5244   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5245   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246
5247   /* Emit code to initialize GROFF, the offset from GRTOP of the
5248      next GPR argument.  */
5249   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5250               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5251   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252
5253   /* Likewise emit code to initialize VROFF, the offset from FTOP
5254      of the next VR argument.  */
5255   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5256               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5257   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5258 }
5259
5260 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5261
5262 static tree
5263 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5264                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5265 {
5266   tree addr;
5267   bool indirect_p;
5268   bool is_ha;           /* is HFA or HVA.  */
5269   bool dw_align;        /* double-word align.  */
5270   enum machine_mode ag_mode = VOIDmode;
5271   int nregs;
5272   enum machine_mode mode;
5273
5274   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5275   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5276   HOST_WIDE_INT size, rsize, adjust, align;
5277   tree t, u, cond1, cond2;
5278
5279   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5280   if (indirect_p)
5281     type = build_pointer_type (type);
5282
5283   mode = TYPE_MODE (type);
5284
5285   f_stack = TYPE_FIELDS (va_list_type_node);
5286   f_grtop = DECL_CHAIN (f_stack);
5287   f_vrtop = DECL_CHAIN (f_grtop);
5288   f_groff = DECL_CHAIN (f_vrtop);
5289   f_vroff = DECL_CHAIN (f_groff);
5290
5291   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5292                   f_stack, NULL_TREE);
5293   size = int_size_in_bytes (type);
5294   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5295
5296   dw_align = false;
5297   adjust = 0;
5298   if (aarch64_vfp_is_call_or_return_candidate (mode,
5299                                                type,
5300                                                &ag_mode,
5301                                                &nregs,
5302                                                &is_ha))
5303     {
5304       /* TYPE passed in fp/simd registers.  */
5305       if (TARGET_GENERAL_REGS_ONLY)
5306         sorry ("%qs and floating point or vector arguments",
5307                "-mgeneral-regs-only");
5308
5309       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5310                       unshare_expr (valist), f_vrtop, NULL_TREE);
5311       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5312                       unshare_expr (valist), f_vroff, NULL_TREE);
5313
5314       rsize = nregs * UNITS_PER_VREG;
5315
5316       if (is_ha)
5317         {
5318           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5319             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5320         }
5321       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5322                && size < UNITS_PER_VREG)
5323         {
5324           adjust = UNITS_PER_VREG - size;
5325         }
5326     }
5327   else
5328     {
5329       /* TYPE passed in general registers.  */
5330       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5331                       unshare_expr (valist), f_grtop, NULL_TREE);
5332       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5333                       unshare_expr (valist), f_groff, NULL_TREE);
5334       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5335       nregs = rsize / UNITS_PER_WORD;
5336
5337       if (align > 8)
5338         dw_align = true;
5339
5340       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5341           && size < UNITS_PER_WORD)
5342         {
5343           adjust = UNITS_PER_WORD  - size;
5344         }
5345     }
5346
5347   /* Get a local temporary for the field value.  */
5348   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5349
5350   /* Emit code to branch if off >= 0.  */
5351   t = build2 (GE_EXPR, boolean_type_node, off,
5352               build_int_cst (TREE_TYPE (off), 0));
5353   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5354
5355   if (dw_align)
5356     {
5357       /* Emit: offs = (offs + 15) & -16.  */
5358       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5359                   build_int_cst (TREE_TYPE (off), 15));
5360       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5361                   build_int_cst (TREE_TYPE (off), -16));
5362       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5363     }
5364   else
5365     roundup = NULL;
5366
5367   /* Update ap.__[g|v]r_offs  */
5368   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5369               build_int_cst (TREE_TYPE (off), rsize));
5370   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5371
5372   /* String up.  */
5373   if (roundup)
5374     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5375
5376   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5377   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5378               build_int_cst (TREE_TYPE (f_off), 0));
5379   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5380
5381   /* String up: make sure the assignment happens before the use.  */
5382   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5383   COND_EXPR_ELSE (cond1) = t;
5384
5385   /* Prepare the trees handling the argument that is passed on the stack;
5386      the top level node will store in ON_STACK.  */
5387   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5388   if (align > 8)
5389     {
5390       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5391       t = fold_convert (intDI_type_node, arg);
5392       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5393                   build_int_cst (TREE_TYPE (t), 15));
5394       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5395                   build_int_cst (TREE_TYPE (t), -16));
5396       t = fold_convert (TREE_TYPE (arg), t);
5397       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5398     }
5399   else
5400     roundup = NULL;
5401   /* Advance ap.__stack  */
5402   t = fold_convert (intDI_type_node, arg);
5403   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5404               build_int_cst (TREE_TYPE (t), size + 7));
5405   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5406               build_int_cst (TREE_TYPE (t), -8));
5407   t = fold_convert (TREE_TYPE (arg), t);
5408   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5409   /* String up roundup and advance.  */
5410   if (roundup)
5411     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5412   /* String up with arg */
5413   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5414   /* Big-endianness related address adjustment.  */
5415   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5416       && size < UNITS_PER_WORD)
5417   {
5418     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5419                 size_int (UNITS_PER_WORD - size));
5420     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5421   }
5422
5423   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5424   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5425
5426   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5427   t = off;
5428   if (adjust)
5429     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5430                 build_int_cst (TREE_TYPE (off), adjust));
5431
5432   t = fold_convert (sizetype, t);
5433   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5434
5435   if (is_ha)
5436     {
5437       /* type ha; // treat as "struct {ftype field[n];}"
5438          ... [computing offs]
5439          for (i = 0; i <nregs; ++i, offs += 16)
5440            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5441          return ha;  */
5442       int i;
5443       tree tmp_ha, field_t, field_ptr_t;
5444
5445       /* Declare a local variable.  */
5446       tmp_ha = create_tmp_var_raw (type, "ha");
5447       gimple_add_tmp_var (tmp_ha);
5448
5449       /* Establish the base type.  */
5450       switch (ag_mode)
5451         {
5452         case SFmode:
5453           field_t = float_type_node;
5454           field_ptr_t = float_ptr_type_node;
5455           break;
5456         case DFmode:
5457           field_t = double_type_node;
5458           field_ptr_t = double_ptr_type_node;
5459           break;
5460         case TFmode:
5461           field_t = long_double_type_node;
5462           field_ptr_t = long_double_ptr_type_node;
5463           break;
5464 /* The half precision and quad precision are not fully supported yet.  Enable
5465    the following code after the support is complete.  Need to find the correct
5466    type node for __fp16 *.  */
5467 #if 0
5468         case HFmode:
5469           field_t = float_type_node;
5470           field_ptr_t = float_ptr_type_node;
5471           break;
5472 #endif
5473         case V2SImode:
5474         case V4SImode:
5475             {
5476               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5477               field_t = build_vector_type_for_mode (innertype, ag_mode);
5478               field_ptr_t = build_pointer_type (field_t);
5479             }
5480           break;
5481         default:
5482           gcc_assert (0);
5483         }
5484
5485       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5486       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5487       addr = t;
5488       t = fold_convert (field_ptr_t, addr);
5489       t = build2 (MODIFY_EXPR, field_t,
5490                   build1 (INDIRECT_REF, field_t, tmp_ha),
5491                   build1 (INDIRECT_REF, field_t, t));
5492
5493       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5494       for (i = 1; i < nregs; ++i)
5495         {
5496           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5497           u = fold_convert (field_ptr_t, addr);
5498           u = build2 (MODIFY_EXPR, field_t,
5499                       build2 (MEM_REF, field_t, tmp_ha,
5500                               build_int_cst (field_ptr_t,
5501                                              (i *
5502                                               int_size_in_bytes (field_t)))),
5503                       build1 (INDIRECT_REF, field_t, u));
5504           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5505         }
5506
5507       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5508       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5509     }
5510
5511   COND_EXPR_ELSE (cond2) = t;
5512   addr = fold_convert (build_pointer_type (type), cond1);
5513   addr = build_va_arg_indirect_ref (addr);
5514
5515   if (indirect_p)
5516     addr = build_va_arg_indirect_ref (addr);
5517
5518   return addr;
5519 }
5520
5521 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5522
5523 static void
5524 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5525                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5526                                 int no_rtl)
5527 {
5528   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5529   CUMULATIVE_ARGS local_cum;
5530   int gr_saved, vr_saved;
5531
5532   /* The caller has advanced CUM up to, but not beyond, the last named
5533      argument.  Advance a local copy of CUM past the last "real" named
5534      argument, to find out how many registers are left over.  */
5535   local_cum = *cum;
5536   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5537
5538   /* Found out how many registers we need to save.  */
5539   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5540   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5541
5542   if (TARGET_GENERAL_REGS_ONLY)
5543     {
5544       if (local_cum.aapcs_nvrn > 0)
5545         sorry ("%qs and floating point or vector arguments",
5546                "-mgeneral-regs-only");
5547       vr_saved = 0;
5548     }
5549
5550   if (!no_rtl)
5551     {
5552       if (gr_saved > 0)
5553         {
5554           rtx ptr, mem;
5555
5556           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5557           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5558                                - gr_saved * UNITS_PER_WORD);
5559           mem = gen_frame_mem (BLKmode, ptr);
5560           set_mem_alias_set (mem, get_varargs_alias_set ());
5561
5562           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5563                                mem, gr_saved);
5564         }
5565       if (vr_saved > 0)
5566         {
5567           /* We can't use move_block_from_reg, because it will use
5568              the wrong mode, storing D regs only.  */
5569           enum machine_mode mode = TImode;
5570           int off, i;
5571
5572           /* Set OFF to the offset from virtual_incoming_args_rtx of
5573              the first vector register.  The VR save area lies below
5574              the GR one, and is aligned to 16 bytes.  */
5575           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5576                                    STACK_BOUNDARY / BITS_PER_UNIT);
5577           off -= vr_saved * UNITS_PER_VREG;
5578
5579           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5580             {
5581               rtx ptr, mem;
5582
5583               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5584               mem = gen_frame_mem (mode, ptr);
5585               set_mem_alias_set (mem, get_varargs_alias_set ());
5586               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5587               off += UNITS_PER_VREG;
5588             }
5589         }
5590     }
5591
5592   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5593      any complication of having crtl->args.pretend_args_size changed.  */
5594   cfun->machine->saved_varargs_size
5595     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5596                       STACK_BOUNDARY / BITS_PER_UNIT)
5597        + vr_saved * UNITS_PER_VREG);
5598 }
5599
5600 static void
5601 aarch64_conditional_register_usage (void)
5602 {
5603   int i;
5604   if (!TARGET_FLOAT)
5605     {
5606       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5607         {
5608           fixed_regs[i] = 1;
5609           call_used_regs[i] = 1;
5610         }
5611     }
5612 }
5613
5614 /* Walk down the type tree of TYPE counting consecutive base elements.
5615    If *MODEP is VOIDmode, then set it to the first valid floating point
5616    type.  If a non-floating point type is found, or if a floating point
5617    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5618    otherwise return the count in the sub-tree.  */
5619 static int
5620 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5621 {
5622   enum machine_mode mode;
5623   HOST_WIDE_INT size;
5624
5625   switch (TREE_CODE (type))
5626     {
5627     case REAL_TYPE:
5628       mode = TYPE_MODE (type);
5629       if (mode != DFmode && mode != SFmode && mode != TFmode)
5630         return -1;
5631
5632       if (*modep == VOIDmode)
5633         *modep = mode;
5634
5635       if (*modep == mode)
5636         return 1;
5637
5638       break;
5639
5640     case COMPLEX_TYPE:
5641       mode = TYPE_MODE (TREE_TYPE (type));
5642       if (mode != DFmode && mode != SFmode && mode != TFmode)
5643         return -1;
5644
5645       if (*modep == VOIDmode)
5646         *modep = mode;
5647
5648       if (*modep == mode)
5649         return 2;
5650
5651       break;
5652
5653     case VECTOR_TYPE:
5654       /* Use V2SImode and V4SImode as representatives of all 64-bit
5655          and 128-bit vector types.  */
5656       size = int_size_in_bytes (type);
5657       switch (size)
5658         {
5659         case 8:
5660           mode = V2SImode;
5661           break;
5662         case 16:
5663           mode = V4SImode;
5664           break;
5665         default:
5666           return -1;
5667         }
5668
5669       if (*modep == VOIDmode)
5670         *modep = mode;
5671
5672       /* Vector modes are considered to be opaque: two vectors are
5673          equivalent for the purposes of being homogeneous aggregates
5674          if they are the same size.  */
5675       if (*modep == mode)
5676         return 1;
5677
5678       break;
5679
5680     case ARRAY_TYPE:
5681       {
5682         int count;
5683         tree index = TYPE_DOMAIN (type);
5684
5685         /* Can't handle incomplete types.  */
5686         if (!COMPLETE_TYPE_P (type))
5687           return -1;
5688
5689         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5690         if (count == -1
5691             || !index
5692             || !TYPE_MAX_VALUE (index)
5693             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5694             || !TYPE_MIN_VALUE (index)
5695             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5696             || count < 0)
5697           return -1;
5698
5699         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5700                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5701
5702         /* There must be no padding.  */
5703         if (!host_integerp (TYPE_SIZE (type), 1)
5704             || (tree_low_cst (TYPE_SIZE (type), 1)
5705                 != count * GET_MODE_BITSIZE (*modep)))
5706           return -1;
5707
5708         return count;
5709       }
5710
5711     case RECORD_TYPE:
5712       {
5713         int count = 0;
5714         int sub_count;
5715         tree field;
5716
5717         /* Can't handle incomplete types.  */
5718         if (!COMPLETE_TYPE_P (type))
5719           return -1;
5720
5721         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5722           {
5723             if (TREE_CODE (field) != FIELD_DECL)
5724               continue;
5725
5726             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5727             if (sub_count < 0)
5728               return -1;
5729             count += sub_count;
5730           }
5731
5732         /* There must be no padding.  */
5733         if (!host_integerp (TYPE_SIZE (type), 1)
5734             || (tree_low_cst (TYPE_SIZE (type), 1)
5735                 != count * GET_MODE_BITSIZE (*modep)))
5736           return -1;
5737
5738         return count;
5739       }
5740
5741     case UNION_TYPE:
5742     case QUAL_UNION_TYPE:
5743       {
5744         /* These aren't very interesting except in a degenerate case.  */
5745         int count = 0;
5746         int sub_count;
5747         tree field;
5748
5749         /* Can't handle incomplete types.  */
5750         if (!COMPLETE_TYPE_P (type))
5751           return -1;
5752
5753         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5754           {
5755             if (TREE_CODE (field) != FIELD_DECL)
5756               continue;
5757
5758             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5759             if (sub_count < 0)
5760               return -1;
5761             count = count > sub_count ? count : sub_count;
5762           }
5763
5764         /* There must be no padding.  */
5765         if (!host_integerp (TYPE_SIZE (type), 1)
5766             || (tree_low_cst (TYPE_SIZE (type), 1)
5767                 != count * GET_MODE_BITSIZE (*modep)))
5768           return -1;
5769
5770         return count;
5771       }
5772
5773     default:
5774       break;
5775     }
5776
5777   return -1;
5778 }
5779
5780 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5781    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5782    array types.  The C99 floating-point complex types are also considered
5783    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5784    types, which are GCC extensions and out of the scope of AAPCS64, are
5785    treated as composite types here as well.
5786
5787    Note that MODE itself is not sufficient in determining whether a type
5788    is such a composite type or not.  This is because
5789    stor-layout.c:compute_record_mode may have already changed the MODE
5790    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5791    structure with only one field may have its MODE set to the mode of the
5792    field.  Also an integer mode whose size matches the size of the
5793    RECORD_TYPE type may be used to substitute the original mode
5794    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5795    solely relied on.  */
5796
5797 static bool
5798 aarch64_composite_type_p (const_tree type,
5799                           enum machine_mode mode)
5800 {
5801   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5802     return true;
5803
5804   if (mode == BLKmode
5805       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5806       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5807     return true;
5808
5809   return false;
5810 }
5811
5812 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5813    type as described in AAPCS64 \S 4.1.2.
5814
5815    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5816
5817 static bool
5818 aarch64_short_vector_p (const_tree type,
5819                         enum machine_mode mode)
5820 {
5821   HOST_WIDE_INT size = -1;
5822
5823   if (type && TREE_CODE (type) == VECTOR_TYPE)
5824     size = int_size_in_bytes (type);
5825   else if (!aarch64_composite_type_p (type, mode)
5826            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5827                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5828     size = GET_MODE_SIZE (mode);
5829
5830   return (size == 8 || size == 16) ? true : false;
5831 }
5832
5833 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5834    shall be passed or returned in simd/fp register(s) (providing these
5835    parameter passing registers are available).
5836
5837    Upon successful return, *COUNT returns the number of needed registers,
5838    *BASE_MODE returns the mode of the individual register and when IS_HAF
5839    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5840    floating-point aggregate or a homogeneous short-vector aggregate.  */
5841
5842 static bool
5843 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5844                                          const_tree type,
5845                                          enum machine_mode *base_mode,
5846                                          int *count,
5847                                          bool *is_ha)
5848 {
5849   enum machine_mode new_mode = VOIDmode;
5850   bool composite_p = aarch64_composite_type_p (type, mode);
5851
5852   if (is_ha != NULL) *is_ha = false;
5853
5854   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5855       || aarch64_short_vector_p (type, mode))
5856     {
5857       *count = 1;
5858       new_mode = mode;
5859     }
5860   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5861     {
5862       if (is_ha != NULL) *is_ha = true;
5863       *count = 2;
5864       new_mode = GET_MODE_INNER (mode);
5865     }
5866   else if (type && composite_p)
5867     {
5868       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5869
5870       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5871         {
5872           if (is_ha != NULL) *is_ha = true;
5873           *count = ag_count;
5874         }
5875       else
5876         return false;
5877     }
5878   else
5879     return false;
5880
5881   *base_mode = new_mode;
5882   return true;
5883 }
5884
5885 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5886
5887 static rtx
5888 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5889                           int incoming ATTRIBUTE_UNUSED)
5890 {
5891   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5892 }
5893
5894 /* Implements target hook vector_mode_supported_p.  */
5895 static bool
5896 aarch64_vector_mode_supported_p (enum machine_mode mode)
5897 {
5898   if (TARGET_SIMD
5899       && (mode == V4SImode  || mode == V8HImode
5900           || mode == V16QImode || mode == V2DImode
5901           || mode == V2SImode  || mode == V4HImode
5902           || mode == V8QImode || mode == V2SFmode
5903           || mode == V4SFmode || mode == V2DFmode))
5904     return true;
5905
5906   return false;
5907 }
5908
5909 /* Return quad mode as the preferred SIMD mode.  */
5910 static enum machine_mode
5911 aarch64_preferred_simd_mode (enum machine_mode mode)
5912 {
5913   if (TARGET_SIMD)
5914     switch (mode)
5915       {
5916       case DFmode:
5917         return V2DFmode;
5918       case SFmode:
5919         return V4SFmode;
5920       case SImode:
5921         return V4SImode;
5922       case HImode:
5923         return V8HImode;
5924       case QImode:
5925         return V16QImode;
5926       case DImode:
5927           return V2DImode;
5928         break;
5929
5930       default:;
5931       }
5932   return word_mode;
5933 }
5934
5935 /* Return the bitmask of possible vector sizes for the vectorizer
5936    to iterate over.  */
5937 static unsigned int
5938 aarch64_autovectorize_vector_sizes (void)
5939 {
5940   return (16 | 8);
5941 }
5942
5943 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5944    vector types in order to conform to the AAPCS64 (see "Procedure
5945    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
5946    qualify for emission with the mangled names defined in that document,
5947    a vector type must not only be of the correct mode but also be
5948    composed of AdvSIMD vector element types (e.g.
5949    _builtin_aarch64_simd_qi); these types are registered by
5950    aarch64_init_simd_builtins ().  In other words, vector types defined
5951    in other ways e.g. via vector_size attribute will get default
5952    mangled names.  */
5953 typedef struct
5954 {
5955   enum machine_mode mode;
5956   const char *element_type_name;
5957   const char *mangled_name;
5958 } aarch64_simd_mangle_map_entry;
5959
5960 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5961   /* 64-bit containerized types.  */
5962   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
5963   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
5964   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
5965   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
5966   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
5967   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
5968   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
5969   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
5970   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5971   /* 128-bit containerized types.  */
5972   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
5973   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
5974   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
5975   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
5976   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
5977   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
5978   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
5979   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
5980   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
5981   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
5982   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
5983   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
5984   { VOIDmode, NULL, NULL }
5985 };
5986
5987 /* Implement TARGET_MANGLE_TYPE.  */
5988
5989 static const char *
5990 aarch64_mangle_type (const_tree type)
5991 {
5992   /* The AArch64 ABI documents say that "__va_list" has to be
5993      managled as if it is in the "std" namespace.  */
5994   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
5995     return "St9__va_list";
5996
5997   /* Check the mode of the vector type, and the name of the vector
5998      element type, against the table.  */
5999   if (TREE_CODE (type) == VECTOR_TYPE)
6000     {
6001       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6002
6003       while (pos->mode != VOIDmode)
6004         {
6005           tree elt_type = TREE_TYPE (type);
6006
6007           if (pos->mode == TYPE_MODE (type)
6008               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6009               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6010                           pos->element_type_name))
6011             return pos->mangled_name;
6012
6013           pos++;
6014         }
6015     }
6016
6017   /* Use the default mangling.  */
6018   return NULL;
6019 }
6020
6021 /* Return the equivalent letter for size.  */
6022 static unsigned char
6023 sizetochar (int size)
6024 {
6025   switch (size)
6026     {
6027     case 64: return 'd';
6028     case 32: return 's';
6029     case 16: return 'h';
6030     case 8 : return 'b';
6031     default: gcc_unreachable ();
6032     }
6033 }
6034
6035 /* Return true iff x is a uniform vector of floating-point
6036    constants, and the constant can be represented in
6037    quarter-precision form.  Note, as aarch64_float_const_representable
6038    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6039 static bool
6040 aarch64_vect_float_const_representable_p (rtx x)
6041 {
6042   int i = 0;
6043   REAL_VALUE_TYPE r0, ri;
6044   rtx x0, xi;
6045
6046   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6047     return false;
6048
6049   x0 = CONST_VECTOR_ELT (x, 0);
6050   if (!CONST_DOUBLE_P (x0))
6051     return false;
6052
6053   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6054
6055   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6056     {
6057       xi = CONST_VECTOR_ELT (x, i);
6058       if (!CONST_DOUBLE_P (xi))
6059         return false;
6060
6061       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6062       if (!REAL_VALUES_EQUAL (r0, ri))
6063         return false;
6064     }
6065
6066   return aarch64_float_const_representable_p (x0);
6067 }
6068
6069 /* TODO: This function returns values similar to those
6070    returned by neon_valid_immediate in gcc/config/arm/arm.c
6071    but the API here is different enough that these magic numbers
6072    are not used.  It should be sufficient to return true or false.  */
6073 static int
6074 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6075                               rtx *modconst, int *elementwidth,
6076                               unsigned char *elementchar,
6077                               int *mvn, int *shift)
6078 {
6079 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6080   matches = 1;                                          \
6081   for (i = 0; i < idx; i += (STRIDE))                   \
6082     if (!(TEST))                                        \
6083       matches = 0;                                      \
6084   if (matches)                                          \
6085     {                                                   \
6086       immtype = (CLASS);                                \
6087       elsize = (ELSIZE);                                \
6088       elchar = sizetochar (elsize);                     \
6089       eshift = (SHIFT);                                 \
6090       emvn = (NEG);                                     \
6091       break;                                            \
6092     }
6093
6094   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6095   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6096   unsigned char bytes[16];
6097   unsigned char elchar = 0;
6098   int immtype = -1, matches;
6099   unsigned int invmask = inverse ? 0xff : 0;
6100   int eshift, emvn;
6101
6102   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6103     {
6104       bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6105       int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6106
6107       if (!(simd_imm_zero
6108             || aarch64_vect_float_const_representable_p (op)))
6109         return -1;
6110
6111         if (modconst)
6112           *modconst = CONST_VECTOR_ELT (op, 0);
6113
6114         if (elementwidth)
6115           *elementwidth = elem_width;
6116
6117         if (elementchar)
6118           *elementchar = sizetochar (elem_width);
6119
6120         if (shift)
6121           *shift = 0;
6122
6123         if (simd_imm_zero)
6124           return 19;
6125         else
6126           return 18;
6127     }
6128
6129   /* Splat vector constant out into a byte vector.  */
6130   for (i = 0; i < n_elts; i++)
6131     {
6132       rtx el = CONST_VECTOR_ELT (op, i);
6133       unsigned HOST_WIDE_INT elpart;
6134       unsigned int part, parts;
6135
6136       if (GET_CODE (el) == CONST_INT)
6137         {
6138           elpart = INTVAL (el);
6139           parts = 1;
6140         }
6141       else if (GET_CODE (el) == CONST_DOUBLE)
6142         {
6143           elpart = CONST_DOUBLE_LOW (el);
6144           parts = 2;
6145         }
6146       else
6147         gcc_unreachable ();
6148
6149       for (part = 0; part < parts; part++)
6150         {
6151           unsigned int byte;
6152           for (byte = 0; byte < innersize; byte++)
6153             {
6154               bytes[idx++] = (elpart & 0xff) ^ invmask;
6155               elpart >>= BITS_PER_UNIT;
6156             }
6157           if (GET_CODE (el) == CONST_DOUBLE)
6158             elpart = CONST_DOUBLE_HIGH (el);
6159         }
6160     }
6161
6162   /* Sanity check.  */
6163   gcc_assert (idx == GET_MODE_SIZE (mode));
6164
6165   do
6166     {
6167       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6168              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6169
6170       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6171              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6172
6173       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6174              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6175
6176       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6177              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6178
6179       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6180
6181       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6182
6183       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6184              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6185
6186       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6187              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6188
6189       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6190              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6191
6192       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6193              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6194
6195       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6196
6197       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6198
6199       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6200              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6201
6202       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6203              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6204
6205       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6206              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6207
6208       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6209              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6210
6211       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6212
6213       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6214              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6215     }
6216   while (0);
6217
6218   /* TODO: Currently the assembler cannot handle types 12 to 15.
6219      And there is no way to specify cmode through the compiler.
6220      Disable them till there is support in the assembler.  */
6221   if (immtype == -1
6222       || (immtype >= 12 && immtype <= 15)
6223       || immtype == 18)
6224     return -1;
6225
6226
6227   if (elementwidth)
6228     *elementwidth = elsize;
6229
6230   if (elementchar)
6231     *elementchar = elchar;
6232
6233   if (mvn)
6234     *mvn = emvn;
6235
6236   if (shift)
6237     *shift = eshift;
6238
6239   if (modconst)
6240     {
6241       unsigned HOST_WIDE_INT imm = 0;
6242
6243       /* Un-invert bytes of recognized vector, if necessary.  */
6244       if (invmask != 0)
6245         for (i = 0; i < idx; i++)
6246           bytes[i] ^= invmask;
6247
6248       if (immtype == 17)
6249         {
6250           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6251           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6252
6253           for (i = 0; i < 8; i++)
6254             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6255               << (i * BITS_PER_UNIT);
6256
6257           *modconst = GEN_INT (imm);
6258         }
6259       else
6260         {
6261           unsigned HOST_WIDE_INT imm = 0;
6262
6263           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6264             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6265
6266           /* Construct 'abcdefgh' because the assembler cannot handle
6267              generic constants.  */
6268           gcc_assert (shift != NULL && mvn != NULL);
6269           if (*mvn)
6270             imm = ~imm;
6271           imm = (imm >> *shift) & 0xff;
6272           *modconst = GEN_INT (imm);
6273         }
6274     }
6275
6276   return immtype;
6277 #undef CHECK
6278 }
6279
6280 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6281    (or, implicitly, MVNI) immediate.  Write back width per element
6282    to *ELEMENTWIDTH, and a modified constant (whatever should be output
6283    for a MOVI instruction) in *MODCONST.  */
6284 int
6285 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6286                                        rtx *modconst, int *elementwidth,
6287                                        unsigned char *elementchar,
6288                                        int *mvn, int *shift)
6289 {
6290   rtx tmpconst;
6291   int tmpwidth;
6292   unsigned char tmpwidthc;
6293   int tmpmvn = 0, tmpshift = 0;
6294   int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6295                                              &tmpwidth, &tmpwidthc,
6296                                              &tmpmvn, &tmpshift);
6297
6298   if (retval == -1)
6299     return 0;
6300
6301   if (modconst)
6302     *modconst = tmpconst;
6303
6304   if (elementwidth)
6305     *elementwidth = tmpwidth;
6306
6307   if (elementchar)
6308     *elementchar = tmpwidthc;
6309
6310   if (mvn)
6311     *mvn = tmpmvn;
6312
6313   if (shift)
6314     *shift = tmpshift;
6315
6316   return 1;
6317 }
6318
6319 static bool
6320 aarch64_const_vec_all_same_int_p (rtx x,
6321                                   HOST_WIDE_INT minval,
6322                                   HOST_WIDE_INT maxval)
6323 {
6324   HOST_WIDE_INT firstval;
6325   int count, i;
6326
6327   if (GET_CODE (x) != CONST_VECTOR
6328       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6329     return false;
6330
6331   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6332   if (firstval < minval || firstval > maxval)
6333     return false;
6334
6335   count = CONST_VECTOR_NUNITS (x);
6336   for (i = 1; i < count; i++)
6337     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6338       return false;
6339
6340   return true;
6341 }
6342
6343 /* Check of immediate shift constants are within range.  */
6344 bool
6345 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6346 {
6347   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6348   if (left)
6349     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6350   else
6351     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6352 }
6353
6354 /* Return true if X is a uniform vector where all elements
6355    are either the floating-point constant 0.0 or the
6356    integer constant 0.  */
6357 bool
6358 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6359 {
6360   return x == CONST0_RTX (mode);
6361 }
6362
6363 bool
6364 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6365 {
6366   HOST_WIDE_INT imm = INTVAL (x);
6367   int i;
6368
6369   for (i = 0; i < 8; i++)
6370     {
6371       unsigned int byte = imm & 0xff;
6372       if (byte != 0xff && byte != 0)
6373        return false;
6374       imm >>= 8;
6375     }
6376
6377   return true;
6378 }
6379
6380 /* Return a const_int vector of VAL.  */
6381 rtx
6382 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6383 {
6384   int nunits = GET_MODE_NUNITS (mode);
6385   rtvec v = rtvec_alloc (nunits);
6386   int i;
6387
6388   for (i=0; i < nunits; i++)
6389     RTVEC_ELT (v, i) = GEN_INT (val);
6390
6391   return gen_rtx_CONST_VECTOR (mode, v);
6392 }
6393
6394 /* Construct and return a PARALLEL RTX vector.  */
6395 rtx
6396 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6397 {
6398   int nunits = GET_MODE_NUNITS (mode);
6399   rtvec v = rtvec_alloc (nunits / 2);
6400   int base = high ? nunits / 2 : 0;
6401   rtx t1;
6402   int i;
6403
6404   for (i=0; i < nunits / 2; i++)
6405     RTVEC_ELT (v, i) = GEN_INT (base + i);
6406
6407   t1 = gen_rtx_PARALLEL (mode, v);
6408   return t1;
6409 }
6410
6411 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6412    HIGH (exclusive).  */
6413 void
6414 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6415 {
6416   HOST_WIDE_INT lane;
6417   gcc_assert (GET_CODE (operand) == CONST_INT);
6418   lane = INTVAL (operand);
6419
6420   if (lane < low || lane >= high)
6421     error ("lane out of range");
6422 }
6423
6424 void
6425 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6426 {
6427   gcc_assert (GET_CODE (operand) == CONST_INT);
6428   HOST_WIDE_INT lane = INTVAL (operand);
6429
6430   if (lane < low || lane >= high)
6431     error ("constant out of range");
6432 }
6433
6434 /* Emit code to reinterpret one AdvSIMD type as another,
6435    without altering bits.  */
6436 void
6437 aarch64_simd_reinterpret (rtx dest, rtx src)
6438 {
6439   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6440 }
6441
6442 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6443    registers).  */
6444 void
6445 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6446                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6447                             rtx op1)
6448 {
6449   rtx mem = gen_rtx_MEM (mode, destaddr);
6450   rtx tmp1 = gen_reg_rtx (mode);
6451   rtx tmp2 = gen_reg_rtx (mode);
6452
6453   emit_insn (intfn (tmp1, op1, tmp2));
6454
6455   emit_move_insn (mem, tmp1);
6456   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6457   emit_move_insn (mem, tmp2);
6458 }
6459
6460 /* Return TRUE if OP is a valid vector addressing mode.  */
6461 bool
6462 aarch64_simd_mem_operand_p (rtx op)
6463 {
6464   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6465                         || GET_CODE (XEXP (op, 0)) == REG);
6466 }
6467
6468 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6469    not to early-clobber SRC registers in the process.
6470
6471    We assume that the operands described by SRC and DEST represent a
6472    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6473    number of components into which the copy has been decomposed.  */
6474 void
6475 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6476                                 rtx *src, unsigned int count)
6477 {
6478   unsigned int i;
6479
6480   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6481       || REGNO (operands[0]) < REGNO (operands[1]))
6482     {
6483       for (i = 0; i < count; i++)
6484         {
6485           operands[2 * i] = dest[i];
6486           operands[2 * i + 1] = src[i];
6487         }
6488     }
6489   else
6490     {
6491       for (i = 0; i < count; i++)
6492         {
6493           operands[2 * i] = dest[count - i - 1];
6494           operands[2 * i + 1] = src[count - i - 1];
6495         }
6496     }
6497 }
6498
6499 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6500    one of VSTRUCT modes: OI, CI or XI.  */
6501 int
6502 aarch64_simd_attr_length_move (rtx insn)
6503 {
6504   enum machine_mode mode;
6505
6506   extract_insn_cached (insn);
6507
6508   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6509     {
6510       mode = GET_MODE (recog_data.operand[0]);
6511       switch (mode)
6512         {
6513         case OImode:
6514           return 8;
6515         case CImode:
6516           return 12;
6517         case XImode:
6518           return 16;
6519         default:
6520           gcc_unreachable ();
6521         }
6522     }
6523   return 4;
6524 }
6525
6526 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6527    alignment of a vector to 128 bits.  */
6528 static HOST_WIDE_INT
6529 aarch64_simd_vector_alignment (const_tree type)
6530 {
6531   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6532   return MIN (align, 128);
6533 }
6534
6535 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6536 static bool
6537 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6538 {
6539   if (is_packed)
6540     return false;
6541
6542   /* We guarantee alignment for vectors up to 128-bits.  */
6543   if (tree_int_cst_compare (TYPE_SIZE (type),
6544                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6545     return false;
6546
6547   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6548   return true;
6549 }
6550
6551 /* If VALS is a vector constant that can be loaded into a register
6552    using DUP, generate instructions to do so and return an RTX to
6553    assign to the register.  Otherwise return NULL_RTX.  */
6554 static rtx
6555 aarch64_simd_dup_constant (rtx vals)
6556 {
6557   enum machine_mode mode = GET_MODE (vals);
6558   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6559   int n_elts = GET_MODE_NUNITS (mode);
6560   bool all_same = true;
6561   rtx x;
6562   int i;
6563
6564   if (GET_CODE (vals) != CONST_VECTOR)
6565     return NULL_RTX;
6566
6567   for (i = 1; i < n_elts; ++i)
6568     {
6569       x = CONST_VECTOR_ELT (vals, i);
6570       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6571         all_same = false;
6572     }
6573
6574   if (!all_same)
6575     return NULL_RTX;
6576
6577   /* We can load this constant by using DUP and a constant in a
6578      single ARM register.  This will be cheaper than a vector
6579      load.  */
6580   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6581   return gen_rtx_VEC_DUPLICATE (mode, x);
6582 }
6583
6584
6585 /* Generate code to load VALS, which is a PARALLEL containing only
6586    constants (for vec_init) or CONST_VECTOR, efficiently into a
6587    register.  Returns an RTX to copy into the register, or NULL_RTX
6588    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6589 static rtx
6590 aarch64_simd_make_constant (rtx vals)
6591 {
6592   enum machine_mode mode = GET_MODE (vals);
6593   rtx const_dup;
6594   rtx const_vec = NULL_RTX;
6595   int n_elts = GET_MODE_NUNITS (mode);
6596   int n_const = 0;
6597   int i;
6598
6599   if (GET_CODE (vals) == CONST_VECTOR)
6600     const_vec = vals;
6601   else if (GET_CODE (vals) == PARALLEL)
6602     {
6603       /* A CONST_VECTOR must contain only CONST_INTs and
6604          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6605          Only store valid constants in a CONST_VECTOR.  */
6606       for (i = 0; i < n_elts; ++i)
6607         {
6608           rtx x = XVECEXP (vals, 0, i);
6609           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6610             n_const++;
6611         }
6612       if (n_const == n_elts)
6613         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6614     }
6615   else
6616     gcc_unreachable ();
6617
6618   if (const_vec != NULL_RTX
6619       && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6620                                                 NULL, NULL, NULL))
6621     /* Load using MOVI/MVNI.  */
6622     return const_vec;
6623   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6624     /* Loaded using DUP.  */
6625     return const_dup;
6626   else if (const_vec != NULL_RTX)
6627     /* Load from constant pool. We can not take advantage of single-cycle
6628        LD1 because we need a PC-relative addressing mode.  */
6629     return const_vec;
6630   else
6631     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6632        We can not construct an initializer.  */
6633     return NULL_RTX;
6634 }
6635
6636 void
6637 aarch64_expand_vector_init (rtx target, rtx vals)
6638 {
6639   enum machine_mode mode = GET_MODE (target);
6640   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6641   int n_elts = GET_MODE_NUNITS (mode);
6642   int n_var = 0, one_var = -1;
6643   bool all_same = true;
6644   rtx x, mem;
6645   int i;
6646
6647   x = XVECEXP (vals, 0, 0);
6648   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6649     n_var = 1, one_var = 0;
6650
6651   for (i = 1; i < n_elts; ++i)
6652     {
6653       x = XVECEXP (vals, 0, i);
6654       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6655         ++n_var, one_var = i;
6656
6657       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6658         all_same = false;
6659     }
6660
6661   if (n_var == 0)
6662     {
6663       rtx constant = aarch64_simd_make_constant (vals);
6664       if (constant != NULL_RTX)
6665         {
6666           emit_move_insn (target, constant);
6667           return;
6668         }
6669     }
6670
6671   /* Splat a single non-constant element if we can.  */
6672   if (all_same)
6673     {
6674       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6675       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6676       return;
6677     }
6678
6679   /* One field is non-constant.  Load constant then overwrite varying
6680      field.  This is more efficient than using the stack.  */
6681   if (n_var == 1)
6682     {
6683       rtx copy = copy_rtx (vals);
6684       rtx index = GEN_INT (one_var);
6685       enum insn_code icode;
6686
6687       /* Load constant part of vector, substitute neighboring value for
6688          varying element.  */
6689       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6690       aarch64_expand_vector_init (target, copy);
6691
6692       /* Insert variable.  */
6693       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6694       icode = optab_handler (vec_set_optab, mode);
6695       gcc_assert (icode != CODE_FOR_nothing);
6696       emit_insn (GEN_FCN (icode) (target, x, index));
6697       return;
6698     }
6699
6700   /* Construct the vector in memory one field at a time
6701      and load the whole vector.  */
6702   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6703   for (i = 0; i < n_elts; i++)
6704     emit_move_insn (adjust_address_nv (mem, inner_mode,
6705                                     i * GET_MODE_SIZE (inner_mode)),
6706                     XVECEXP (vals, 0, i));
6707   emit_move_insn (target, mem);
6708
6709 }
6710
6711 static unsigned HOST_WIDE_INT
6712 aarch64_shift_truncation_mask (enum machine_mode mode)
6713 {
6714   return
6715     (aarch64_vector_mode_supported_p (mode)
6716      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6717 }
6718
6719 #ifndef TLS_SECTION_ASM_FLAG
6720 #define TLS_SECTION_ASM_FLAG 'T'
6721 #endif
6722
6723 void
6724 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6725                                tree decl ATTRIBUTE_UNUSED)
6726 {
6727   char flagchars[10], *f = flagchars;
6728
6729   /* If we have already declared this section, we can use an
6730      abbreviated form to switch back to it -- unless this section is
6731      part of a COMDAT groups, in which case GAS requires the full
6732      declaration every time.  */
6733   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6734       && (flags & SECTION_DECLARED))
6735     {
6736       fprintf (asm_out_file, "\t.section\t%s\n", name);
6737       return;
6738     }
6739
6740   if (!(flags & SECTION_DEBUG))
6741     *f++ = 'a';
6742   if (flags & SECTION_WRITE)
6743     *f++ = 'w';
6744   if (flags & SECTION_CODE)
6745     *f++ = 'x';
6746   if (flags & SECTION_SMALL)
6747     *f++ = 's';
6748   if (flags & SECTION_MERGE)
6749     *f++ = 'M';
6750   if (flags & SECTION_STRINGS)
6751     *f++ = 'S';
6752   if (flags & SECTION_TLS)
6753     *f++ = TLS_SECTION_ASM_FLAG;
6754   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6755     *f++ = 'G';
6756   *f = '\0';
6757
6758   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6759
6760   if (!(flags & SECTION_NOTYPE))
6761     {
6762       const char *type;
6763       const char *format;
6764
6765       if (flags & SECTION_BSS)
6766         type = "nobits";
6767       else
6768         type = "progbits";
6769
6770 #ifdef TYPE_OPERAND_FMT
6771       format = "," TYPE_OPERAND_FMT;
6772 #else
6773       format = ",@%s";
6774 #endif
6775
6776       fprintf (asm_out_file, format, type);
6777
6778       if (flags & SECTION_ENTSIZE)
6779         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6780       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6781         {
6782           if (TREE_CODE (decl) == IDENTIFIER_NODE)
6783             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6784           else
6785             fprintf (asm_out_file, ",%s,comdat",
6786                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6787         }
6788     }
6789
6790   putc ('\n', asm_out_file);
6791 }
6792
6793 /* Select a format to encode pointers in exception handling data.  */
6794 int
6795 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6796 {
6797    int type;
6798    switch (aarch64_cmodel)
6799      {
6800      case AARCH64_CMODEL_TINY:
6801      case AARCH64_CMODEL_TINY_PIC:
6802      case AARCH64_CMODEL_SMALL:
6803      case AARCH64_CMODEL_SMALL_PIC:
6804        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6805           for everything.  */
6806        type = DW_EH_PE_sdata4;
6807        break;
6808      default:
6809        /* No assumptions here.  8-byte relocs required.  */
6810        type = DW_EH_PE_sdata8;
6811        break;
6812      }
6813    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6814 }
6815
6816 /* Emit load exclusive.  */
6817
6818 static void
6819 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6820                              rtx mem, rtx model_rtx)
6821 {
6822   rtx (*gen) (rtx, rtx, rtx);
6823
6824   switch (mode)
6825     {
6826     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6827     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6828     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6829     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6830     default:
6831       gcc_unreachable ();
6832     }
6833
6834   emit_insn (gen (rval, mem, model_rtx));
6835 }
6836
6837 /* Emit store exclusive.  */
6838
6839 static void
6840 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6841                               rtx rval, rtx mem, rtx model_rtx)
6842 {
6843   rtx (*gen) (rtx, rtx, rtx, rtx);
6844
6845   switch (mode)
6846     {
6847     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6848     case HImode: gen = gen_aarch64_store_exclusivehi; break;
6849     case SImode: gen = gen_aarch64_store_exclusivesi; break;
6850     case DImode: gen = gen_aarch64_store_exclusivedi; break;
6851     default:
6852       gcc_unreachable ();
6853     }
6854
6855   emit_insn (gen (bval, rval, mem, model_rtx));
6856 }
6857
6858 /* Mark the previous jump instruction as unlikely.  */
6859
6860 static void
6861 aarch64_emit_unlikely_jump (rtx insn)
6862 {
6863   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6864
6865   insn = emit_jump_insn (insn);
6866   add_reg_note (insn, REG_BR_PROB, very_unlikely);
6867 }
6868
6869 /* Expand a compare and swap pattern.  */
6870
6871 void
6872 aarch64_expand_compare_and_swap (rtx operands[])
6873 {
6874   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6875   enum machine_mode mode, cmp_mode;
6876   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6877
6878   bval = operands[0];
6879   rval = operands[1];
6880   mem = operands[2];
6881   oldval = operands[3];
6882   newval = operands[4];
6883   is_weak = operands[5];
6884   mod_s = operands[6];
6885   mod_f = operands[7];
6886   mode = GET_MODE (mem);
6887   cmp_mode = mode;
6888
6889   /* Normally the succ memory model must be stronger than fail, but in the
6890      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6891      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
6892
6893   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6894       && INTVAL (mod_s) == MEMMODEL_RELEASE)
6895     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6896
6897   switch (mode)
6898     {
6899     case QImode:
6900     case HImode:
6901       /* For short modes, we're going to perform the comparison in SImode,
6902          so do the zero-extension now.  */
6903       cmp_mode = SImode;
6904       rval = gen_reg_rtx (SImode);
6905       oldval = convert_modes (SImode, mode, oldval, true);
6906       /* Fall through.  */
6907
6908     case SImode:
6909     case DImode:
6910       /* Force the value into a register if needed.  */
6911       if (!aarch64_plus_operand (oldval, mode))
6912         oldval = force_reg (cmp_mode, oldval);
6913       break;
6914
6915     default:
6916       gcc_unreachable ();
6917     }
6918
6919   switch (mode)
6920     {
6921     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6922     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6923     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6924     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6925     default:
6926       gcc_unreachable ();
6927     }
6928
6929   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6930
6931   if (mode == QImode || mode == HImode)
6932     emit_move_insn (operands[1], gen_lowpart (mode, rval));
6933
6934   x = gen_rtx_REG (CCmode, CC_REGNUM);
6935   x = gen_rtx_EQ (SImode, x, const0_rtx);
6936   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6937 }
6938
6939 /* Split a compare and swap pattern.  */
6940
6941 void
6942 aarch64_split_compare_and_swap (rtx operands[])
6943 {
6944   rtx rval, mem, oldval, newval, scratch;
6945   enum machine_mode mode;
6946   bool is_weak;
6947   rtx label1, label2, x, cond;
6948
6949   rval = operands[0];
6950   mem = operands[1];
6951   oldval = operands[2];
6952   newval = operands[3];
6953   is_weak = (operands[4] != const0_rtx);
6954   scratch = operands[7];
6955   mode = GET_MODE (mem);
6956
6957   label1 = NULL_RTX;
6958   if (!is_weak)
6959     {
6960       label1 = gen_label_rtx ();
6961       emit_label (label1);
6962     }
6963   label2 = gen_label_rtx ();
6964
6965   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6966
6967   cond = aarch64_gen_compare_reg (NE, rval, oldval);
6968   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6969   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6970                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6971   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6972
6973   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6974
6975   if (!is_weak)
6976     {
6977       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6978       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6979                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6980       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6981     }
6982   else
6983     {
6984       cond = gen_rtx_REG (CCmode, CC_REGNUM);
6985       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6986       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6987     }
6988
6989   emit_label (label2);
6990 }
6991
6992 /* Split an atomic operation.  */
6993
6994 void
6995 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6996                      rtx value, rtx model_rtx, rtx cond)
6997 {
6998   enum machine_mode mode = GET_MODE (mem);
6999   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7000   rtx label, x;
7001
7002   label = gen_label_rtx ();
7003   emit_label (label);
7004
7005   if (new_out)
7006     new_out = gen_lowpart (wmode, new_out);
7007   if (old_out)
7008     old_out = gen_lowpart (wmode, old_out);
7009   else
7010     old_out = new_out;
7011   value = simplify_gen_subreg (wmode, value, mode, 0);
7012
7013   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7014
7015   switch (code)
7016     {
7017     case SET:
7018       new_out = value;
7019       break;
7020
7021     case NOT:
7022       x = gen_rtx_AND (wmode, old_out, value);
7023       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7024       x = gen_rtx_NOT (wmode, new_out);
7025       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7026       break;
7027
7028     case MINUS:
7029       if (CONST_INT_P (value))
7030         {
7031           value = GEN_INT (-INTVAL (value));
7032           code = PLUS;
7033         }
7034       /* Fall through.  */
7035
7036     default:
7037       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7038       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7039       break;
7040     }
7041
7042   aarch64_emit_store_exclusive (mode, cond, mem,
7043                                 gen_lowpart (mode, new_out), model_rtx);
7044
7045   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7046   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7047                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7048   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7049 }
7050
7051 static void
7052 aarch64_start_file (void)
7053 {
7054   if (selected_arch)
7055     asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
7056   else if (selected_cpu)
7057     asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
7058   default_file_start();
7059 }
7060
7061 /* Target hook for c_mode_for_suffix.  */
7062 static enum machine_mode
7063 aarch64_c_mode_for_suffix (char suffix)
7064 {
7065   if (suffix == 'q')
7066     return TFmode;
7067
7068   return VOIDmode;
7069 }
7070
7071 /* We can only represent floating point constants which will fit in
7072    "quarter-precision" values.  These values are characterised by
7073    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7074    by:
7075
7076    (-1)^s * (n/16) * 2^r
7077
7078    Where:
7079      's' is the sign bit.
7080      'n' is an integer in the range 16 <= n <= 31.
7081      'r' is an integer in the range -3 <= r <= 4.  */
7082
7083 /* Return true iff X can be represented by a quarter-precision
7084    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7085 bool
7086 aarch64_float_const_representable_p (rtx x)
7087 {
7088   /* This represents our current view of how many bits
7089      make up the mantissa.  */
7090   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7091   int exponent;
7092   unsigned HOST_WIDE_INT mantissa, mask;
7093   HOST_WIDE_INT m1, m2;
7094   REAL_VALUE_TYPE r, m;
7095
7096   if (!CONST_DOUBLE_P (x))
7097     return false;
7098
7099   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7100
7101   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7102      know if we have +zero until we analyse the mantissa, but we
7103      can reject the other invalid values.  */
7104   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7105       || REAL_VALUE_MINUS_ZERO (r))
7106     return false;
7107
7108   /* Extract exponent.  */
7109   r = real_value_abs (&r);
7110   exponent = REAL_EXP (&r);
7111
7112   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7113      highest (sign) bit, with a fixed binary point at bit point_pos.
7114      m1 holds the low part of the mantissa, m2 the high part.
7115      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7116      bits for the mantissa, this can fail (low bits will be lost).  */
7117   real_ldexp (&m, &r, point_pos - exponent);
7118   REAL_VALUE_TO_INT (&m1, &m2, m);
7119
7120   /* If the low part of the mantissa has bits set we cannot represent
7121      the value.  */
7122   if (m1 != 0)
7123     return false;
7124   /* We have rejected the lower HOST_WIDE_INT, so update our
7125      understanding of how many bits lie in the mantissa and
7126      look only at the high HOST_WIDE_INT.  */
7127   mantissa = m2;
7128   point_pos -= HOST_BITS_PER_WIDE_INT;
7129
7130   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7131   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7132   if ((mantissa & mask) != 0)
7133     return false;
7134
7135   /* Having filtered unrepresentable values, we may now remove all
7136      but the highest 5 bits.  */
7137   mantissa >>= point_pos - 5;
7138
7139   /* We cannot represent the value 0.0, so reject it.  This is handled
7140      elsewhere.  */
7141   if (mantissa == 0)
7142     return false;
7143
7144   /* Then, as bit 4 is always set, we can mask it off, leaving
7145      the mantissa in the range [0, 15].  */
7146   mantissa &= ~(1 << 4);
7147   gcc_assert (mantissa <= 15);
7148
7149   /* GCC internally does not use IEEE754-like encoding (where normalized
7150      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7151      Our mantissa values are shifted 4 places to the left relative to
7152      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7153      by 5 places to correct for GCC's representation.  */
7154   exponent = 5 - exponent;
7155
7156   return (exponent >= 0 && exponent <= 7);
7157 }
7158
7159 char*
7160 aarch64_output_simd_mov_immediate (rtx *const_vector,
7161                                    enum machine_mode mode,
7162                                    unsigned width)
7163 {
7164   int is_valid;
7165   unsigned char widthc;
7166   int lane_width_bits;
7167   static char templ[40];
7168   int shift = 0, mvn = 0;
7169   const char *mnemonic;
7170   unsigned int lane_count = 0;
7171
7172   is_valid =
7173     aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7174                                            const_vector, &lane_width_bits,
7175                                            &widthc, &mvn, &shift);
7176   gcc_assert (is_valid);
7177
7178   mode = GET_MODE_INNER (mode);
7179   if (mode == SFmode || mode == DFmode)
7180     {
7181       bool zero_p =
7182         aarch64_float_const_zero_rtx_p (*const_vector);
7183       gcc_assert (shift == 0);
7184       mnemonic = zero_p ? "movi" : "fmov";
7185     }
7186   else
7187     mnemonic = mvn ? "mvni" : "movi";
7188
7189   gcc_assert (lane_width_bits != 0);
7190   lane_count = width / lane_width_bits;
7191
7192   if (lane_count == 1)
7193     snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7194   else if (shift)
7195     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7196               mnemonic, lane_count, widthc, shift);
7197   else
7198     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7199               mnemonic, lane_count, widthc);
7200   return templ;
7201 }
7202
7203 /* Split operands into moves from op[1] + op[2] into op[0].  */
7204
7205 void
7206 aarch64_split_combinev16qi (rtx operands[3])
7207 {
7208   unsigned int dest = REGNO (operands[0]);
7209   unsigned int src1 = REGNO (operands[1]);
7210   unsigned int src2 = REGNO (operands[2]);
7211   enum machine_mode halfmode = GET_MODE (operands[1]);
7212   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7213   rtx destlo, desthi;
7214
7215   gcc_assert (halfmode == V16QImode);
7216
7217   if (src1 == dest && src2 == dest + halfregs)
7218     {
7219       /* No-op move.  Can't split to nothing; emit something.  */
7220       emit_note (NOTE_INSN_DELETED);
7221       return;
7222     }
7223
7224   /* Preserve register attributes for variable tracking.  */
7225   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7226   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7227                                GET_MODE_SIZE (halfmode));
7228
7229   /* Special case of reversed high/low parts.  */
7230   if (reg_overlap_mentioned_p (operands[2], destlo)
7231       && reg_overlap_mentioned_p (operands[1], desthi))
7232     {
7233       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7234       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7235       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7236     }
7237   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7238     {
7239       /* Try to avoid unnecessary moves if part of the result
7240          is in the right place already.  */
7241       if (src1 != dest)
7242         emit_move_insn (destlo, operands[1]);
7243       if (src2 != dest + halfregs)
7244         emit_move_insn (desthi, operands[2]);
7245     }
7246   else
7247     {
7248       if (src2 != dest + halfregs)
7249         emit_move_insn (desthi, operands[2]);
7250       if (src1 != dest)
7251         emit_move_insn (destlo, operands[1]);
7252     }
7253 }
7254
7255 /* vec_perm support.  */
7256
7257 #define MAX_VECT_LEN 16
7258
7259 struct expand_vec_perm_d
7260 {
7261   rtx target, op0, op1;
7262   unsigned char perm[MAX_VECT_LEN];
7263   enum machine_mode vmode;
7264   unsigned char nelt;
7265   bool one_vector_p;
7266   bool testing_p;
7267 };
7268
7269 /* Generate a variable permutation.  */
7270
7271 static void
7272 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7273 {
7274   enum machine_mode vmode = GET_MODE (target);
7275   bool one_vector_p = rtx_equal_p (op0, op1);
7276
7277   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7278   gcc_checking_assert (GET_MODE (op0) == vmode);
7279   gcc_checking_assert (GET_MODE (op1) == vmode);
7280   gcc_checking_assert (GET_MODE (sel) == vmode);
7281   gcc_checking_assert (TARGET_SIMD);
7282
7283   if (one_vector_p)
7284     {
7285       if (vmode == V8QImode)
7286         {
7287           /* Expand the argument to a V16QI mode by duplicating it.  */
7288           rtx pair = gen_reg_rtx (V16QImode);
7289           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7290           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7291         }
7292       else
7293         {
7294           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7295         }
7296     }
7297   else
7298     {
7299       rtx pair;
7300
7301       if (vmode == V8QImode)
7302         {
7303           pair = gen_reg_rtx (V16QImode);
7304           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7305           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7306         }
7307       else
7308         {
7309           pair = gen_reg_rtx (OImode);
7310           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7311           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7312         }
7313     }
7314 }
7315
7316 void
7317 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7318 {
7319   enum machine_mode vmode = GET_MODE (target);
7320   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7321   bool one_vector_p = rtx_equal_p (op0, op1);
7322   rtx rmask[MAX_VECT_LEN], mask;
7323
7324   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7325
7326   /* The TBL instruction does not use a modulo index, so we must take care
7327      of that ourselves.  */
7328   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7329   for (i = 0; i < nelt; ++i)
7330     rmask[i] = mask;
7331   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7332   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7333
7334   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7335 }
7336
7337 /* Recognize patterns suitable for the TRN instructions.  */
7338 static bool
7339 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7340 {
7341   unsigned int i, odd, mask, nelt = d->nelt;
7342   rtx out, in0, in1, x;
7343   rtx (*gen) (rtx, rtx, rtx);
7344   enum machine_mode vmode = d->vmode;
7345
7346   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7347     return false;
7348
7349   /* Note that these are little-endian tests.
7350      We correct for big-endian later.  */
7351   if (d->perm[0] == 0)
7352     odd = 0;
7353   else if (d->perm[0] == 1)
7354     odd = 1;
7355   else
7356     return false;
7357   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7358
7359   for (i = 0; i < nelt; i += 2)
7360     {
7361       if (d->perm[i] != i + odd)
7362         return false;
7363       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7364         return false;
7365     }
7366
7367   /* Success!  */
7368   if (d->testing_p)
7369     return true;
7370
7371   in0 = d->op0;
7372   in1 = d->op1;
7373   if (BYTES_BIG_ENDIAN)
7374     {
7375       x = in0, in0 = in1, in1 = x;
7376       odd = !odd;
7377     }
7378   out = d->target;
7379
7380   if (odd)
7381     {
7382       switch (vmode)
7383         {
7384         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7385         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7386         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7387         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7388         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7389         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7390         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7391         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7392         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7393         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7394         default:
7395           return false;
7396         }
7397     }
7398   else
7399     {
7400       switch (vmode)
7401         {
7402         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7403         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7404         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7405         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7406         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7407         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7408         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7409         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7410         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7411         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7412         default:
7413           return false;
7414         }
7415     }
7416
7417   emit_insn (gen (out, in0, in1));
7418   return true;
7419 }
7420
7421 /* Recognize patterns suitable for the UZP instructions.  */
7422 static bool
7423 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7424 {
7425   unsigned int i, odd, mask, nelt = d->nelt;
7426   rtx out, in0, in1, x;
7427   rtx (*gen) (rtx, rtx, rtx);
7428   enum machine_mode vmode = d->vmode;
7429
7430   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7431     return false;
7432
7433   /* Note that these are little-endian tests.
7434      We correct for big-endian later.  */
7435   if (d->perm[0] == 0)
7436     odd = 0;
7437   else if (d->perm[0] == 1)
7438     odd = 1;
7439   else
7440     return false;
7441   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7442
7443   for (i = 0; i < nelt; i++)
7444     {
7445       unsigned elt = (i * 2 + odd) & mask;
7446       if (d->perm[i] != elt)
7447         return false;
7448     }
7449
7450   /* Success!  */
7451   if (d->testing_p)
7452     return true;
7453
7454   in0 = d->op0;
7455   in1 = d->op1;
7456   if (BYTES_BIG_ENDIAN)
7457     {
7458       x = in0, in0 = in1, in1 = x;
7459       odd = !odd;
7460     }
7461   out = d->target;
7462
7463   if (odd)
7464     {
7465       switch (vmode)
7466         {
7467         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7468         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7469         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7470         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7471         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7472         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7473         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7474         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7475         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7476         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7477         default:
7478           return false;
7479         }
7480     }
7481   else
7482     {
7483       switch (vmode)
7484         {
7485         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7486         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7487         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7488         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7489         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7490         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7491         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7492         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7493         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7494         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7495         default:
7496           return false;
7497         }
7498     }
7499
7500   emit_insn (gen (out, in0, in1));
7501   return true;
7502 }
7503
7504 /* Recognize patterns suitable for the ZIP instructions.  */
7505 static bool
7506 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7507 {
7508   unsigned int i, high, mask, nelt = d->nelt;
7509   rtx out, in0, in1, x;
7510   rtx (*gen) (rtx, rtx, rtx);
7511   enum machine_mode vmode = d->vmode;
7512
7513   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7514     return false;
7515
7516   /* Note that these are little-endian tests.
7517      We correct for big-endian later.  */
7518   high = nelt / 2;
7519   if (d->perm[0] == high)
7520     /* Do Nothing.  */
7521     ;
7522   else if (d->perm[0] == 0)
7523     high = 0;
7524   else
7525     return false;
7526   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7527
7528   for (i = 0; i < nelt / 2; i++)
7529     {
7530       unsigned elt = (i + high) & mask;
7531       if (d->perm[i * 2] != elt)
7532         return false;
7533       elt = (elt + nelt) & mask;
7534       if (d->perm[i * 2 + 1] != elt)
7535         return false;
7536     }
7537
7538   /* Success!  */
7539   if (d->testing_p)
7540     return true;
7541
7542   in0 = d->op0;
7543   in1 = d->op1;
7544   if (BYTES_BIG_ENDIAN)
7545     {
7546       x = in0, in0 = in1, in1 = x;
7547       high = !high;
7548     }
7549   out = d->target;
7550
7551   if (high)
7552     {
7553       switch (vmode)
7554         {
7555         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7556         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7557         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7558         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7559         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7560         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7561         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7562         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7563         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7564         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7565         default:
7566           return false;
7567         }
7568     }
7569   else
7570     {
7571       switch (vmode)
7572         {
7573         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7574         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7575         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7576         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7577         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7578         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7579         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7580         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7581         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7582         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7583         default:
7584           return false;
7585         }
7586     }
7587
7588   emit_insn (gen (out, in0, in1));
7589   return true;
7590 }
7591
7592 static bool
7593 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7594 {
7595   rtx rperm[MAX_VECT_LEN], sel;
7596   enum machine_mode vmode = d->vmode;
7597   unsigned int i, nelt = d->nelt;
7598
7599   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7600      numbering of elements for big-endian, we must reverse the order.  */
7601   if (BYTES_BIG_ENDIAN)
7602     return false;
7603
7604   if (d->testing_p)
7605     return true;
7606
7607   /* Generic code will try constant permutation twice.  Once with the
7608      original mode and again with the elements lowered to QImode.
7609      So wait and don't do the selector expansion ourselves.  */
7610   if (vmode != V8QImode && vmode != V16QImode)
7611     return false;
7612
7613   for (i = 0; i < nelt; ++i)
7614     rperm[i] = GEN_INT (d->perm[i]);
7615   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7616   sel = force_reg (vmode, sel);
7617
7618   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7619   return true;
7620 }
7621
7622 static bool
7623 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7624 {
7625   /* The pattern matching functions above are written to look for a small
7626      number to begin the sequence (0, 1, N/2).  If we begin with an index
7627      from the second operand, we can swap the operands.  */
7628   if (d->perm[0] >= d->nelt)
7629     {
7630       unsigned i, nelt = d->nelt;
7631       rtx x;
7632
7633       for (i = 0; i < nelt; ++i)
7634         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7635
7636       x = d->op0;
7637       d->op0 = d->op1;
7638       d->op1 = x;
7639     }
7640
7641   if (TARGET_SIMD)
7642     {
7643       if (aarch64_evpc_zip (d))
7644         return true;
7645       else if (aarch64_evpc_uzp (d))
7646         return true;
7647       else if (aarch64_evpc_trn (d))
7648         return true;
7649       return aarch64_evpc_tbl (d);
7650     }
7651   return false;
7652 }
7653
7654 /* Expand a vec_perm_const pattern.  */
7655
7656 bool
7657 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7658 {
7659   struct expand_vec_perm_d d;
7660   int i, nelt, which;
7661
7662   d.target = target;
7663   d.op0 = op0;
7664   d.op1 = op1;
7665
7666   d.vmode = GET_MODE (target);
7667   gcc_assert (VECTOR_MODE_P (d.vmode));
7668   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7669   d.testing_p = false;
7670
7671   for (i = which = 0; i < nelt; ++i)
7672     {
7673       rtx e = XVECEXP (sel, 0, i);
7674       int ei = INTVAL (e) & (2 * nelt - 1);
7675       which |= (ei < nelt ? 1 : 2);
7676       d.perm[i] = ei;
7677     }
7678
7679   switch (which)
7680     {
7681     default:
7682       gcc_unreachable ();
7683
7684     case 3:
7685       d.one_vector_p = false;
7686       if (!rtx_equal_p (op0, op1))
7687         break;
7688
7689       /* The elements of PERM do not suggest that only the first operand
7690          is used, but both operands are identical.  Allow easier matching
7691          of the permutation by folding the permutation into the single
7692          input vector.  */
7693       /* Fall Through.  */
7694     case 2:
7695       for (i = 0; i < nelt; ++i)
7696         d.perm[i] &= nelt - 1;
7697       d.op0 = op1;
7698       d.one_vector_p = true;
7699       break;
7700
7701     case 1:
7702       d.op1 = op0;
7703       d.one_vector_p = true;
7704       break;
7705     }
7706
7707   return aarch64_expand_vec_perm_const_1 (&d);
7708 }
7709
7710 static bool
7711 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7712                                      const unsigned char *sel)
7713 {
7714   struct expand_vec_perm_d d;
7715   unsigned int i, nelt, which;
7716   bool ret;
7717
7718   d.vmode = vmode;
7719   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7720   d.testing_p = true;
7721   memcpy (d.perm, sel, nelt);
7722
7723   /* Calculate whether all elements are in one vector.  */
7724   for (i = which = 0; i < nelt; ++i)
7725     {
7726       unsigned char e = d.perm[i];
7727       gcc_assert (e < 2 * nelt);
7728       which |= (e < nelt ? 1 : 2);
7729     }
7730
7731   /* If all elements are from the second vector, reindex as if from the
7732      first vector.  */
7733   if (which == 2)
7734     for (i = 0; i < nelt; ++i)
7735       d.perm[i] -= nelt;
7736
7737   /* Check whether the mask can be applied to a single vector.  */
7738   d.one_vector_p = (which != 3);
7739
7740   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7741   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7742   if (!d.one_vector_p)
7743     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7744
7745   start_sequence ();
7746   ret = aarch64_expand_vec_perm_const_1 (&d);
7747   end_sequence ();
7748
7749   return ret;
7750 }
7751
7752 #undef TARGET_ADDRESS_COST
7753 #define TARGET_ADDRESS_COST aarch64_address_cost
7754
7755 /* This hook will determines whether unnamed bitfields affect the alignment
7756    of the containing structure.  The hook returns true if the structure
7757    should inherit the alignment requirements of an unnamed bitfield's
7758    type.  */
7759 #undef TARGET_ALIGN_ANON_BITFIELD
7760 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7761
7762 #undef TARGET_ASM_ALIGNED_DI_OP
7763 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7764
7765 #undef TARGET_ASM_ALIGNED_HI_OP
7766 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7767
7768 #undef TARGET_ASM_ALIGNED_SI_OP
7769 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7770
7771 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7772 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7773   hook_bool_const_tree_hwi_hwi_const_tree_true
7774
7775 #undef TARGET_ASM_FILE_START
7776 #define TARGET_ASM_FILE_START aarch64_start_file
7777
7778 #undef TARGET_ASM_OUTPUT_MI_THUNK
7779 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7780
7781 #undef TARGET_ASM_SELECT_RTX_SECTION
7782 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7783
7784 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7785 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7786
7787 #undef TARGET_BUILD_BUILTIN_VA_LIST
7788 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7789
7790 #undef TARGET_CALLEE_COPIES
7791 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7792
7793 #undef TARGET_CAN_ELIMINATE
7794 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7795
7796 #undef TARGET_CANNOT_FORCE_CONST_MEM
7797 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7798
7799 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7800 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7801
7802 /* Only the least significant bit is used for initialization guard
7803    variables.  */
7804 #undef TARGET_CXX_GUARD_MASK_BIT
7805 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7806
7807 #undef TARGET_C_MODE_FOR_SUFFIX
7808 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7809
7810 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7811 #undef  TARGET_DEFAULT_TARGET_FLAGS
7812 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7813 #endif
7814
7815 #undef TARGET_CLASS_MAX_NREGS
7816 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7817
7818 #undef TARGET_BUILTIN_DECL
7819 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7820
7821 #undef  TARGET_EXPAND_BUILTIN
7822 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7823
7824 #undef TARGET_EXPAND_BUILTIN_VA_START
7825 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7826
7827 #undef TARGET_FUNCTION_ARG
7828 #define TARGET_FUNCTION_ARG aarch64_function_arg
7829
7830 #undef TARGET_FUNCTION_ARG_ADVANCE
7831 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7832
7833 #undef TARGET_FUNCTION_ARG_BOUNDARY
7834 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7835
7836 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7837 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7838
7839 #undef TARGET_FUNCTION_VALUE
7840 #define TARGET_FUNCTION_VALUE aarch64_function_value
7841
7842 #undef TARGET_FUNCTION_VALUE_REGNO_P
7843 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7844
7845 #undef TARGET_FRAME_POINTER_REQUIRED
7846 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7847
7848 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7849 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7850
7851 #undef  TARGET_INIT_BUILTINS
7852 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
7853
7854 #undef TARGET_LEGITIMATE_ADDRESS_P
7855 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7856
7857 #undef TARGET_LEGITIMATE_CONSTANT_P
7858 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7859
7860 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7861 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7862
7863 #undef TARGET_MANGLE_TYPE
7864 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7865
7866 #undef TARGET_MEMORY_MOVE_COST
7867 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7868
7869 #undef TARGET_MUST_PASS_IN_STACK
7870 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7871
7872 /* This target hook should return true if accesses to volatile bitfields
7873    should use the narrowest mode possible.  It should return false if these
7874    accesses should use the bitfield container type.  */
7875 #undef TARGET_NARROW_VOLATILE_BITFIELD
7876 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7877
7878 #undef  TARGET_OPTION_OVERRIDE
7879 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7880
7881 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7882 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7883   aarch64_override_options_after_change
7884
7885 #undef TARGET_PASS_BY_REFERENCE
7886 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7887
7888 #undef TARGET_PREFERRED_RELOAD_CLASS
7889 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7890
7891 #undef TARGET_SECONDARY_RELOAD
7892 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7893
7894 #undef TARGET_SHIFT_TRUNCATION_MASK
7895 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7896
7897 #undef TARGET_SETUP_INCOMING_VARARGS
7898 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7899
7900 #undef TARGET_STRUCT_VALUE_RTX
7901 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
7902
7903 #undef TARGET_REGISTER_MOVE_COST
7904 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7905
7906 #undef TARGET_RETURN_IN_MEMORY
7907 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7908
7909 #undef TARGET_RETURN_IN_MSB
7910 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7911
7912 #undef TARGET_RTX_COSTS
7913 #define TARGET_RTX_COSTS aarch64_rtx_costs
7914
7915 #undef TARGET_TRAMPOLINE_INIT
7916 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7917
7918 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7919 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7920
7921 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7922 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7923
7924 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7925 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7926
7927 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7928 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7929
7930 #undef TARGET_VECTORIZE_BUILTINS
7931 #define TARGET_VECTORIZE_BUILTINS
7932
7933 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7934 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7935   aarch64_builtin_vectorized_function
7936
7937 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7938 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7939   aarch64_autovectorize_vector_sizes
7940
7941 /* Section anchor support.  */
7942
7943 #undef TARGET_MIN_ANCHOR_OFFSET
7944 #define TARGET_MIN_ANCHOR_OFFSET -256
7945
7946 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7947    byte offset; we can do much more for larger data types, but have no way
7948    to determine the size of the access.  We assume accesses are aligned.  */
7949 #undef TARGET_MAX_ANCHOR_OFFSET
7950 #define TARGET_MAX_ANCHOR_OFFSET 4095
7951
7952 #undef TARGET_VECTOR_ALIGNMENT
7953 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7954
7955 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7956 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7957   aarch64_simd_vector_alignment_reachable
7958
7959 /* vec_perm support.  */
7960
7961 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7962 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7963   aarch64_vectorize_vec_perm_const_ok
7964
7965
7966 #undef TARGET_FIXED_CONDITION_CODE_REGS
7967 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
7968
7969 struct gcc_target targetm = TARGET_INITIALIZER;
7970
7971 #include "gt-aarch64.h"