gcc-4_9-branch/gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 146
 147 /* The processor for which instructions should be scheduled.  */
 148 enum aarch64_processor aarch64_tune = cortexa53;
 149
 150 /* The current tuning set.  */
 151 const struct tune_params *aarch64_tune_params;
 152
 153 /* Mask to specify which instructions we are allowed to generate.  */
 154 unsigned long aarch64_isa_flags = 0;
 155
 156 /* Mask to specify which instruction scheduling options should be used.  */
 157 unsigned long aarch64_tune_flags = 0;
 158
 159 /* Tuning parameters.  */
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS
 162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 163 #else
 164 #define NAMED_PARAM(NAME, VAL) (VAL)
 165 #endif
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_addrcost_table generic_addrcost_table =
 175 {
 176 #if HAVE_DESIGNATED_INITIALIZERS
 177   .addr_scale_costs =
 178 #endif
 179     {
 180       NAMED_PARAM (qi, 0),
 181       NAMED_PARAM (hi, 0),
 182       NAMED_PARAM (si, 0),
 183       NAMED_PARAM (ti, 0),
 184     },
 185   NAMED_PARAM (pre_modify, 0),
 186   NAMED_PARAM (post_modify, 0),
 187   NAMED_PARAM (register_offset, 0),
 188   NAMED_PARAM (register_extend, 0),
 189   NAMED_PARAM (imm_offset, 0)
 190 };
 191
 192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 193 __extension__
 194 #endif
 195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 196 {
 197 #if HAVE_DESIGNATED_INITIALIZERS
 198   .addr_scale_costs =
 199 #endif
 200     {
 201       NAMED_PARAM (qi, 0),
 202       NAMED_PARAM (hi, 1),
 203       NAMED_PARAM (si, 0),
 204       NAMED_PARAM (ti, 1),
 205     },
 206   NAMED_PARAM (pre_modify, 0),
 207   NAMED_PARAM (post_modify, 0),
 208   NAMED_PARAM (register_offset, 0),
 209   NAMED_PARAM (register_extend, 0),
 210   NAMED_PARAM (imm_offset, 0),
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct cpu_regmove_cost generic_regmove_cost =
 217 {
 218   NAMED_PARAM (GP2GP, 1),
 219   NAMED_PARAM (GP2FP, 2),
 220   NAMED_PARAM (FP2GP, 2),
 221   /* We currently do not provide direct support for TFmode Q->Q move.
 222      Therefore we need to raise the cost above 2 in order to have
 223      reload handle the situation.  */
 224   NAMED_PARAM (FP2FP, 4)
 225 };
 226
 227 /* Generic costs for vector insn classes.  */
 228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 229 __extension__
 230 #endif
 231 static const struct cpu_vector_cost generic_vector_cost =
 232 {
 233   NAMED_PARAM (scalar_stmt_cost, 1),
 234   NAMED_PARAM (scalar_load_cost, 1),
 235   NAMED_PARAM (scalar_store_cost, 1),
 236   NAMED_PARAM (vec_stmt_cost, 1),
 237   NAMED_PARAM (vec_to_scalar_cost, 1),
 238   NAMED_PARAM (scalar_to_vec_cost, 1),
 239   NAMED_PARAM (vec_align_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_store_cost, 1),
 242   NAMED_PARAM (vec_store_cost, 1),
 243   NAMED_PARAM (cond_taken_branch_cost, 3),
 244   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 245 };
 246
 247 /* Generic costs for vector insn classes.  */
 248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 249 __extension__
 250 #endif
 251 static const struct cpu_vector_cost cortexa57_vector_cost =
 252 {
 253   NAMED_PARAM (scalar_stmt_cost, 1),
 254   NAMED_PARAM (scalar_load_cost, 4),
 255   NAMED_PARAM (scalar_store_cost, 1),
 256   NAMED_PARAM (vec_stmt_cost, 3),
 257   NAMED_PARAM (vec_to_scalar_cost, 8),
 258   NAMED_PARAM (scalar_to_vec_cost, 8),
 259   NAMED_PARAM (vec_align_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_store_cost, 1),
 262   NAMED_PARAM (vec_store_cost, 1),
 263   NAMED_PARAM (cond_taken_branch_cost, 1),
 264   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 265 };
 266
 267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 268 __extension__
 269 #endif
 270 static const struct tune_params generic_tunings =
 271 {
 272   &cortexa57_extra_costs,
 273   &generic_addrcost_table,
 274   &generic_regmove_cost,
 275   &generic_vector_cost,
 276   NAMED_PARAM (memmov_cost, 4),
 277   NAMED_PARAM (issue_rate, 2)
 278 };
 279
 280 static const struct tune_params cortexa53_tunings =
 281 {
 282   &cortexa53_extra_costs,
 283   &generic_addrcost_table,
 284   &generic_regmove_cost,
 285   &generic_vector_cost,
 286   NAMED_PARAM (memmov_cost, 4),
 287   NAMED_PARAM (issue_rate, 2)
 288 };
 289
 290 static const struct tune_params cortexa57_tunings =
 291 {
 292   &cortexa57_extra_costs,
 293   &cortexa57_addrcost_table,
 294   &generic_regmove_cost,
 295   &cortexa57_vector_cost,
 296   NAMED_PARAM (memmov_cost, 4),
 297   NAMED_PARAM (issue_rate, 3)
 298 };
 299
 300 /* A processor implementing AArch64.  */
 301 struct processor
 302 {
 303   const char *const name;
 304   enum aarch64_processor core;
 305   const char *arch;
 306   const unsigned long flags;
 307   const struct tune_params *const tune;
 308 };
 309
 310 /* Processor cores implementing AArch64.  */
 311 static const struct processor all_cores[] =
 312 {
 313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 314   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 315 #include "aarch64-cores.def"
 316 #undef AARCH64_CORE
 317   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 318   {NULL, aarch64_none, NULL, 0, NULL}
 319 };
 320
 321 /* Architectures implementing AArch64.  */
 322 static const struct processor all_architectures[] =
 323 {
 324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 325   {NAME, CORE, #ARCH, FLAGS, NULL},
 326 #include "aarch64-arches.def"
 327 #undef AARCH64_ARCH
 328   {NULL, aarch64_none, NULL, 0, NULL}
 329 };
 330
 331 /* Target specification.  These are populated as commandline arguments
 332    are processed, or NULL if not specified.  */
 333 static const struct processor *selected_arch;
 334 static const struct processor *selected_cpu;
 335 static const struct processor *selected_tune;
 336
 337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 338
 339 /* An ISA extension in the co-processor and main instruction set space.  */
 340 struct aarch64_option_extension
 341 {
 342   const char *const name;
 343   const unsigned long flags_on;
 344   const unsigned long flags_off;
 345 };
 346
 347 /* ISA extensions in AArch64.  */
 348 static const struct aarch64_option_extension all_extensions[] =
 349 {
 350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 351   {NAME, FLAGS_ON, FLAGS_OFF},
 352 #include "aarch64-option-extensions.def"
 353 #undef AARCH64_OPT_EXTENSION
 354   {NULL, 0, 0}
 355 };
 356
 357 /* Used to track the size of an address when generating a pre/post
 358    increment address.  */
 359 static enum machine_mode aarch64_memory_reference_mode;
 360
 361 /* Used to force GTY into this file.  */
 362 static GTY(()) int gty_dummy;
 363
 364 /* A table of valid AArch64 "bitmask immediate" values for
 365    logical instructions.  */
 366
 367 #define AARCH64_NUM_BITMASKS  5334
 368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 369
 370 typedef enum aarch64_cond_code
 371 {
 372   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 373   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 374   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 375 }
 376 aarch64_cc;
 377
 378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 379
 380 /* The condition codes of the processor, and the inverse function.  */
 381 static const char * const aarch64_condition_codes[] =
 382 {
 383   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 384   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 385 };
 386
 387 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 388 unsigned
 389 aarch64_dbx_register_number (unsigned regno)
 390 {
 391    if (GP_REGNUM_P (regno))
 392      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 393    else if (regno == SP_REGNUM)
 394      return AARCH64_DWARF_SP;
 395    else if (FP_REGNUM_P (regno))
 396      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 397
 398    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 399       equivalent DWARF register.  */
 400    return DWARF_FRAME_REGISTERS;
 401 }
 402
 403 /* Return TRUE if MODE is any of the large INT modes.  */
 404 static bool
 405 aarch64_vect_struct_mode_p (enum machine_mode mode)
 406 {
 407   return mode == OImode || mode == CImode || mode == XImode;
 408 }
 409
 410 /* Return TRUE if MODE is any of the vector modes.  */
 411 static bool
 412 aarch64_vector_mode_p (enum machine_mode mode)
 413 {
 414   return aarch64_vector_mode_supported_p (mode)
 415          || aarch64_vect_struct_mode_p (mode);
 416 }
 417
 418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 419 static bool
 420 aarch64_array_mode_supported_p (enum machine_mode mode,
 421                                 unsigned HOST_WIDE_INT nelems)
 422 {
 423   if (TARGET_SIMD
 424       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 425       && (nelems >= 2 && nelems <= 4))
 426     return true;
 427
 428   return false;
 429 }
 430
 431 /* Implement HARD_REGNO_NREGS.  */
 432
 433 int
 434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 435 {
 436   switch (aarch64_regno_regclass (regno))
 437     {
 438     case FP_REGS:
 439     case FP_LO_REGS:
 440       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 441     default:
 442       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 443     }
 444   gcc_unreachable ();
 445 }
 446
 447 /* Implement HARD_REGNO_MODE_OK.  */
 448
 449 int
 450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 451 {
 452   if (GET_MODE_CLASS (mode) == MODE_CC)
 453     return regno == CC_REGNUM;
 454
 455   if (regno == SP_REGNUM)
 456     /* The purpose of comparing with ptr_mode is to support the
 457        global register variable associated with the stack pointer
 458        register via the syntax of asm ("wsp") in ILP32.  */
 459     return mode == Pmode || mode == ptr_mode;
 460
 461   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 462     return mode == Pmode;
 463
 464   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 465     return 1;
 466
 467   if (FP_REGNUM_P (regno))
 468     {
 469       if (aarch64_vect_struct_mode_p (mode))
 470         return
 471           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 472       else
 473         return 1;
 474     }
 475
 476   return 0;
 477 }
 478
 479 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 480 enum machine_mode
 481 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 482                                      enum machine_mode mode)
 483 {
 484   /* Handle modes that fit within single registers.  */
 485   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 486     {
 487       if (GET_MODE_SIZE (mode) >= 4)
 488         return mode;
 489       else
 490         return SImode;
 491     }
 492   /* Fall back to generic for multi-reg and very large modes.  */
 493   else
 494     return choose_hard_reg_mode (regno, nregs, false);
 495 }
 496
 497 /* Return true if calls to DECL should be treated as
 498    long-calls (ie called via a register).  */
 499 static bool
 500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 501 {
 502   return false;
 503 }
 504
 505 /* Return true if calls to symbol-ref SYM should be treated as
 506    long-calls (ie called via a register).  */
 507 bool
 508 aarch64_is_long_call_p (rtx sym)
 509 {
 510   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 511 }
 512
 513 /* Return true if the offsets to a zero/sign-extract operation
 514    represent an expression that matches an extend operation.  The
 515    operands represent the paramters from
 516
 517    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 518 bool
 519 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 520                                 rtx extract_imm)
 521 {
 522   HOST_WIDE_INT mult_val, extract_val;
 523
 524   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 525     return false;
 526
 527   mult_val = INTVAL (mult_imm);
 528   extract_val = INTVAL (extract_imm);
 529
 530   if (extract_val > 8
 531       && extract_val < GET_MODE_BITSIZE (mode)
 532       && exact_log2 (extract_val & ~7) > 0
 533       && (extract_val & 7) <= 4
 534       && mult_val == (1 << (extract_val & 7)))
 535     return true;
 536
 537   return false;
 538 }
 539
 540 /* Emit an insn that's a simple single-set.  Both the operands must be
 541    known to be valid.  */
 542 inline static rtx
 543 emit_set_insn (rtx x, rtx y)
 544 {
 545   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 546 }
 547
 548 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 549    return the rtx for register 0 in the proper mode.  */
 550 rtx
 551 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 552 {
 553   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 554   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 555
 556   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 557   return cc_reg;
 558 }
 559
 560 /* Build the SYMBOL_REF for __tls_get_addr.  */
 561
 562 static GTY(()) rtx tls_get_addr_libfunc;
 563
 564 rtx
 565 aarch64_tls_get_addr (void)
 566 {
 567   if (!tls_get_addr_libfunc)
 568     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 569   return tls_get_addr_libfunc;
 570 }
 571
 572 /* Return the TLS model to use for ADDR.  */
 573
 574 static enum tls_model
 575 tls_symbolic_operand_type (rtx addr)
 576 {
 577   enum tls_model tls_kind = TLS_MODEL_NONE;
 578   rtx sym, addend;
 579
 580   if (GET_CODE (addr) == CONST)
 581     {
 582       split_const (addr, &sym, &addend);
 583       if (GET_CODE (sym) == SYMBOL_REF)
 584         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 585     }
 586   else if (GET_CODE (addr) == SYMBOL_REF)
 587     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 588
 589   return tls_kind;
 590 }
 591
 592 /* We'll allow lo_sum's in addresses in our legitimate addresses
 593    so that combine would take care of combining addresses where
 594    necessary, but for generation purposes, we'll generate the address
 595    as :
 596    RTL                               Absolute
 597    tmp = hi (symbol_ref);            adrp  x1, foo
 598    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 599                                      nop
 600
 601    PIC                               TLS
 602    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 603    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 604                                      bl   __tls_get_addr
 605                                      nop
 606
 607    Load TLS symbol, depending on TLS mechanism and TLS access model.
 608
 609    Global Dynamic - Traditional TLS:
 610    adrp tmp, :tlsgd:imm
 611    add  dest, tmp, #:tlsgd_lo12:imm
 612    bl   __tls_get_addr
 613
 614    Global Dynamic - TLS Descriptors:
 615    adrp dest, :tlsdesc:imm
 616    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 617    add  dest, dest, #:tlsdesc_lo12:imm
 618    blr  tmp
 619    mrs  tp, tpidr_el0
 620    add  dest, dest, tp
 621
 622    Initial Exec:
 623    mrs  tp, tpidr_el0
 624    adrp tmp, :gottprel:imm
 625    ldr  dest, [tmp, #:gottprel_lo12:imm]
 626    add  dest, dest, tp
 627
 628    Local Exec:
 629    mrs  tp, tpidr_el0
 630    add  t0, tp, #:tprel_hi12:imm
 631    add  t0, #:tprel_lo12_nc:imm
 632 */
 633
 634 static void
 635 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 636                                    enum aarch64_symbol_type type)
 637 {
 638   switch (type)
 639     {
 640     case SYMBOL_SMALL_ABSOLUTE:
 641       {
 642         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 643         rtx tmp_reg = dest;
 644         enum machine_mode mode = GET_MODE (dest);
 645
 646         gcc_assert (mode == Pmode || mode == ptr_mode);
 647
 648         if (can_create_pseudo_p ())
 649           tmp_reg = gen_reg_rtx (mode);
 650
 651         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 652         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 653         return;
 654       }
 655
 656     case SYMBOL_TINY_ABSOLUTE:
 657       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 658       return;
 659
 660     case SYMBOL_SMALL_GOT:
 661       {
 662         /* In ILP32, the mode of dest can be either SImode or DImode,
 663            while the got entry is always of SImode size.  The mode of
 664            dest depends on how dest is used: if dest is assigned to a
 665            pointer (e.g. in the memory), it has SImode; it may have
 666            DImode if dest is dereferenced to access the memeory.
 667            This is why we have to handle three different ldr_got_small
 668            patterns here (two patterns for ILP32).  */
 669         rtx tmp_reg = dest;
 670         enum machine_mode mode = GET_MODE (dest);
 671
 672         if (can_create_pseudo_p ())
 673           tmp_reg = gen_reg_rtx (mode);
 674
 675         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 676         if (mode == ptr_mode)
 677           {
 678             if (mode == DImode)
 679               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 680             else
 681               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 682           }
 683         else
 684           {
 685             gcc_assert (mode == Pmode);
 686             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 687           }
 688
 689         return;
 690       }
 691
 692     case SYMBOL_SMALL_TLSGD:
 693       {
 694         rtx insns;
 695         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 696
 697         start_sequence ();
 698         emit_call_insn (gen_tlsgd_small (result, imm));
 699         insns = get_insns ();
 700         end_sequence ();
 701
 702         RTL_CONST_CALL_P (insns) = 1;
 703         emit_libcall_block (insns, dest, result, imm);
 704         return;
 705       }
 706
 707     case SYMBOL_SMALL_TLSDESC:
 708       {
 709         enum machine_mode mode = GET_MODE (dest);
 710         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 711         rtx tp;
 712
 713         gcc_assert (mode == Pmode || mode == ptr_mode);
 714
 715         /* In ILP32, the got entry is always of SImode size.  Unlike
 716            small GOT, the dest is fixed at reg 0.  */
 717         if (TARGET_ILP32)
 718           emit_insn (gen_tlsdesc_small_si (imm));
 719         else
 720           emit_insn (gen_tlsdesc_small_di (imm));
 721         tp = aarch64_load_tp (NULL);
 722
 723         if (mode != Pmode)
 724           tp = gen_lowpart (mode, tp);
 725
 726         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 727         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 728         return;
 729       }
 730
 731     case SYMBOL_SMALL_GOTTPREL:
 732       {
 733         /* In ILP32, the mode of dest can be either SImode or DImode,
 734            while the got entry is always of SImode size.  The mode of
 735            dest depends on how dest is used: if dest is assigned to a
 736            pointer (e.g. in the memory), it has SImode; it may have
 737            DImode if dest is dereferenced to access the memeory.
 738            This is why we have to handle three different tlsie_small
 739            patterns here (two patterns for ILP32).  */
 740         enum machine_mode mode = GET_MODE (dest);
 741         rtx tmp_reg = gen_reg_rtx (mode);
 742         rtx tp = aarch64_load_tp (NULL);
 743
 744         if (mode == ptr_mode)
 745           {
 746             if (mode == DImode)
 747               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 748             else
 749               {
 750                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 751                 tp = gen_lowpart (mode, tp);
 752               }
 753           }
 754         else
 755           {
 756             gcc_assert (mode == Pmode);
 757             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 758           }
 759
 760         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 761         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 762         return;
 763       }
 764
 765     case SYMBOL_SMALL_TPREL:
 766       {
 767         rtx tp = aarch64_load_tp (NULL);
 768         emit_insn (gen_tlsle_small (dest, tp, imm));
 769         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 770         return;
 771       }
 772
 773     case SYMBOL_TINY_GOT:
 774       emit_insn (gen_ldr_got_tiny (dest, imm));
 775       return;
 776
 777     default:
 778       gcc_unreachable ();
 779     }
 780 }
 781
 782 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 783    handle all moves if !can_create_pseudo_p ().  The distinction is
 784    important because, unlike emit_move_insn, the move expanders know
 785    how to force Pmode objects into the constant pool even when the
 786    constant pool address is not itself legitimate.  */
 787 static rtx
 788 aarch64_emit_move (rtx dest, rtx src)
 789 {
 790   return (can_create_pseudo_p ()
 791           ? emit_move_insn (dest, src)
 792           : emit_move_insn_1 (dest, src));
 793 }
 794
 795 /* Split a 128-bit move operation into two 64-bit move operations,
 796    taking care to handle partial overlap of register to register
 797    copies.  Special cases are needed when moving between GP regs and
 798    FP regs.  SRC can be a register, constant or memory; DST a register
 799    or memory.  If either operand is memory it must not have any side
 800    effects.  */
 801 void
 802 aarch64_split_128bit_move (rtx dst, rtx src)
 803 {
 804   rtx dst_lo, dst_hi;
 805   rtx src_lo, src_hi;
 806
 807   enum machine_mode mode = GET_MODE (dst);
 808
 809   gcc_assert (mode == TImode || mode == TFmode);
 810   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 811   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 812
 813   if (REG_P (dst) && REG_P (src))
 814     {
 815       int src_regno = REGNO (src);
 816       int dst_regno = REGNO (dst);
 817
 818       /* Handle FP <-> GP regs.  */
 819       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 820         {
 821           src_lo = gen_lowpart (word_mode, src);
 822           src_hi = gen_highpart (word_mode, src);
 823
 824           if (mode == TImode)
 825             {
 826               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 827               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 828             }
 829           else
 830             {
 831               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 832               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 833             }
 834           return;
 835         }
 836       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 837         {
 838           dst_lo = gen_lowpart (word_mode, dst);
 839           dst_hi = gen_highpart (word_mode, dst);
 840
 841           if (mode == TImode)
 842             {
 843               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 844               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 845             }
 846           else
 847             {
 848               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 849               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 850             }
 851           return;
 852         }
 853     }
 854
 855   dst_lo = gen_lowpart (word_mode, dst);
 856   dst_hi = gen_highpart (word_mode, dst);
 857   src_lo = gen_lowpart (word_mode, src);
 858   src_hi = gen_highpart_mode (word_mode, mode, src);
 859
 860   /* At most one pairing may overlap.  */
 861   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 862     {
 863       aarch64_emit_move (dst_hi, src_hi);
 864       aarch64_emit_move (dst_lo, src_lo);
 865     }
 866   else
 867     {
 868       aarch64_emit_move (dst_lo, src_lo);
 869       aarch64_emit_move (dst_hi, src_hi);
 870     }
 871 }
 872
 873 bool
 874 aarch64_split_128bit_move_p (rtx dst, rtx src)
 875 {
 876   return (! REG_P (src)
 877           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 878 }
 879
 880 /* Split a complex SIMD combine.  */
 881
 882 void
 883 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 884 {
 885   enum machine_mode src_mode = GET_MODE (src1);
 886   enum machine_mode dst_mode = GET_MODE (dst);
 887
 888   gcc_assert (VECTOR_MODE_P (dst_mode));
 889
 890   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 891     {
 892       rtx (*gen) (rtx, rtx, rtx);
 893
 894       switch (src_mode)
 895         {
 896         case V8QImode:
 897           gen = gen_aarch64_simd_combinev8qi;
 898           break;
 899         case V4HImode:
 900           gen = gen_aarch64_simd_combinev4hi;
 901           break;
 902         case V2SImode:
 903           gen = gen_aarch64_simd_combinev2si;
 904           break;
 905         case V2SFmode:
 906           gen = gen_aarch64_simd_combinev2sf;
 907           break;
 908         case DImode:
 909           gen = gen_aarch64_simd_combinedi;
 910           break;
 911         case DFmode:
 912           gen = gen_aarch64_simd_combinedf;
 913           break;
 914         default:
 915           gcc_unreachable ();
 916         }
 917
 918       emit_insn (gen (dst, src1, src2));
 919       return;
 920     }
 921 }
 922
 923 /* Split a complex SIMD move.  */
 924
 925 void
 926 aarch64_split_simd_move (rtx dst, rtx src)
 927 {
 928   enum machine_mode src_mode = GET_MODE (src);
 929   enum machine_mode dst_mode = GET_MODE (dst);
 930
 931   gcc_assert (VECTOR_MODE_P (dst_mode));
 932
 933   if (REG_P (dst) && REG_P (src))
 934     {
 935       rtx (*gen) (rtx, rtx);
 936
 937       gcc_assert (VECTOR_MODE_P (src_mode));
 938
 939       switch (src_mode)
 940         {
 941         case V16QImode:
 942           gen = gen_aarch64_split_simd_movv16qi;
 943           break;
 944         case V8HImode:
 945           gen = gen_aarch64_split_simd_movv8hi;
 946           break;
 947         case V4SImode:
 948           gen = gen_aarch64_split_simd_movv4si;
 949           break;
 950         case V2DImode:
 951           gen = gen_aarch64_split_simd_movv2di;
 952           break;
 953         case V4SFmode:
 954           gen = gen_aarch64_split_simd_movv4sf;
 955           break;
 956         case V2DFmode:
 957           gen = gen_aarch64_split_simd_movv2df;
 958           break;
 959         default:
 960           gcc_unreachable ();
 961         }
 962
 963       emit_insn (gen (dst, src));
 964       return;
 965     }
 966 }
 967
 968 static rtx
 969 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 970 {
 971   if (can_create_pseudo_p ())
 972     return force_reg (mode, value);
 973   else
 974     {
 975       x = aarch64_emit_move (x, value);
 976       return x;
 977     }
 978 }
 979
 980
 981 static rtx
 982 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 983 {
 984   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 985     {
 986       rtx high;
 987       /* Load the full offset into a register.  This
 988          might be improvable in the future.  */
 989       high = GEN_INT (offset);
 990       offset = 0;
 991       high = aarch64_force_temporary (mode, temp, high);
 992       reg = aarch64_force_temporary (mode, temp,
 993                                      gen_rtx_PLUS (mode, high, reg));
 994     }
 995   return plus_constant (mode, reg, offset);
 996 }
 997
 998 void
 999 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1000 {
1001   enum machine_mode mode = GET_MODE (dest);
1002   unsigned HOST_WIDE_INT mask;
1003   int i;
1004   bool first;
1005   unsigned HOST_WIDE_INT val;
1006   bool subtargets;
1007   rtx subtarget;
1008   int one_match, zero_match;
1009
1010   gcc_assert (mode == SImode || mode == DImode);
1011
1012   /* Check on what type of symbol it is.  */
1013   if (GET_CODE (imm) == SYMBOL_REF
1014       || GET_CODE (imm) == LABEL_REF
1015       || GET_CODE (imm) == CONST)
1016     {
1017       rtx mem, base, offset;
1018       enum aarch64_symbol_type sty;
1019
1020       /* If we have (const (plus symbol offset)), separate out the offset
1021          before we start classifying the symbol.  */
1022       split_const (imm, &base, &offset);
1023
1024       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025       switch (sty)
1026         {
1027         case SYMBOL_FORCE_TO_MEM:
1028           if (offset != const0_rtx
1029               && targetm.cannot_force_const_mem (mode, imm))
1030             {
1031               gcc_assert (can_create_pseudo_p ());
1032               base = aarch64_force_temporary (mode, dest, base);
1033               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034               aarch64_emit_move (dest, base);
1035               return;
1036             }
1037           mem = force_const_mem (ptr_mode, imm);
1038           gcc_assert (mem);
1039           if (mode != ptr_mode)
1040             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1041           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042           return;
1043
1044         case SYMBOL_SMALL_TLSGD:
1045         case SYMBOL_SMALL_TLSDESC:
1046         case SYMBOL_SMALL_GOTTPREL:
1047         case SYMBOL_SMALL_GOT:
1048         case SYMBOL_TINY_GOT:
1049           if (offset != const0_rtx)
1050             {
1051               gcc_assert(can_create_pseudo_p ());
1052               base = aarch64_force_temporary (mode, dest, base);
1053               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054               aarch64_emit_move (dest, base);
1055               return;
1056             }
1057           /* FALLTHRU */
1058
1059         case SYMBOL_SMALL_TPREL:
1060         case SYMBOL_SMALL_ABSOLUTE:
1061         case SYMBOL_TINY_ABSOLUTE:
1062           aarch64_load_symref_appropriately (dest, imm, sty);
1063           return;
1064
1065         default:
1066           gcc_unreachable ();
1067         }
1068     }
1069
1070   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1071     {
1072       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073       return;
1074     }
1075
1076   if (!CONST_INT_P (imm))
1077     {
1078       if (GET_CODE (imm) == HIGH)
1079         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080       else
1081         {
1082           rtx mem = force_const_mem (mode, imm);
1083           gcc_assert (mem);
1084           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1085         }
1086
1087       return;
1088     }
1089
1090   if (mode == SImode)
1091     {
1092       /* We know we can't do this in 1 insn, and we must be able to do it
1093          in two; so don't mess around looking for sequences that don't buy
1094          us anything.  */
1095       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098       return;
1099     }
1100
1101   /* Remaining cases are all for DImode.  */
1102
1103   val = INTVAL (imm);
1104   subtargets = optimize && can_create_pseudo_p ();
1105
1106   one_match = 0;
1107   zero_match = 0;
1108   mask = 0xffff;
1109
1110   for (i = 0; i < 64; i += 16, mask <<= 16)
1111     {
1112       if ((val & mask) == 0)
1113         zero_match++;
1114       else if ((val & mask) == mask)
1115         one_match++;
1116     }
1117
1118   if (one_match == 2)
1119     {
1120       mask = 0xffff;
1121       for (i = 0; i < 64; i += 16, mask <<= 16)
1122         {
1123           if ((val & mask) != mask)
1124             {
1125               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127                                          GEN_INT ((val >> i) & 0xffff)));
1128               return;
1129             }
1130         }
1131       gcc_unreachable ();
1132     }
1133
1134   if (zero_match == 2)
1135     goto simple_sequence;
1136
1137   mask = 0x0ffff0000UL;
1138   for (i = 16; i < 64; i += 16, mask <<= 16)
1139     {
1140       HOST_WIDE_INT comp = mask & ~(mask - 1);
1141
1142       if (aarch64_uimm12_shift (val - (val & mask)))
1143         {
1144           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1145
1146           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147           emit_insn (gen_adddi3 (dest, subtarget,
1148                                  GEN_INT (val - (val & mask))));
1149           return;
1150         }
1151       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1152         {
1153           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1154
1155           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156                                   GEN_INT ((val + comp) & mask)));
1157           emit_insn (gen_adddi3 (dest, subtarget,
1158                                  GEN_INT (val - ((val + comp) & mask))));
1159           return;
1160         }
1161       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1162         {
1163           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1164
1165           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166                                   GEN_INT ((val - comp) | ~mask)));
1167           emit_insn (gen_adddi3 (dest, subtarget,
1168                                  GEN_INT (val - ((val - comp) | ~mask))));
1169           return;
1170         }
1171       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1172         {
1173           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1174
1175           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176                                   GEN_INT (val | ~mask)));
1177           emit_insn (gen_adddi3 (dest, subtarget,
1178                                  GEN_INT (val - (val | ~mask))));
1179           return;
1180         }
1181     }
1182
1183   /* See if we can do it by arithmetically combining two
1184      immediates.  */
1185   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1186     {
1187       int j;
1188       mask = 0xffff;
1189
1190       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1192         {
1193           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195                                   GEN_INT (aarch64_bitmasks[i])));
1196           emit_insn (gen_adddi3 (dest, subtarget,
1197                                  GEN_INT (val - aarch64_bitmasks[i])));
1198           return;
1199         }
1200
1201       for (j = 0; j < 64; j += 16, mask <<= 16)
1202         {
1203           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1204             {
1205               emit_insn (gen_rtx_SET (VOIDmode, dest,
1206                                       GEN_INT (aarch64_bitmasks[i])));
1207               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208                                          GEN_INT ((val >> j) & 0xffff)));
1209               return;
1210             }
1211         }
1212     }
1213
1214   /* See if we can do it by logically combining two immediates.  */
1215   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1216     {
1217       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1218         {
1219           int j;
1220
1221           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1223               {
1224                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226                                         GEN_INT (aarch64_bitmasks[i])));
1227                 emit_insn (gen_iordi3 (dest, subtarget,
1228                                        GEN_INT (aarch64_bitmasks[j])));
1229                 return;
1230               }
1231         }
1232       else if ((val & aarch64_bitmasks[i]) == val)
1233         {
1234           int j;
1235
1236           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1238               {
1239
1240                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242                                         GEN_INT (aarch64_bitmasks[j])));
1243                 emit_insn (gen_anddi3 (dest, subtarget,
1244                                        GEN_INT (aarch64_bitmasks[i])));
1245                 return;
1246               }
1247         }
1248     }
1249
1250  simple_sequence:
1251   first = true;
1252   mask = 0xffff;
1253   for (i = 0; i < 64; i += 16, mask <<= 16)
1254     {
1255       if ((val & mask) != 0)
1256         {
1257           if (first)
1258             {
1259               emit_insn (gen_rtx_SET (VOIDmode, dest,
1260                                       GEN_INT (val & mask)));
1261               first = false;
1262             }
1263           else
1264             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265                                        GEN_INT ((val >> i) & 0xffff)));
1266         }
1267     }
1268 }
1269
1270 static bool
1271 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1272 {
1273   /* Indirect calls are not currently supported.  */
1274   if (decl == NULL)
1275     return false;
1276
1277   /* Cannot tail-call to long-calls, since these are outside of the
1278      range of a branch instruction (we could handle this if we added
1279      support for indirect tail-calls.  */
1280   if (aarch64_decl_is_long_call_p (decl))
1281     return false;
1282
1283   return true;
1284 }
1285
1286 /* Implement TARGET_PASS_BY_REFERENCE.  */
1287
1288 static bool
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1290                            enum machine_mode mode,
1291                            const_tree type,
1292                            bool named ATTRIBUTE_UNUSED)
1293 {
1294   HOST_WIDE_INT size;
1295   enum machine_mode dummymode;
1296   int nregs;
1297
1298   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1299   size = (mode == BLKmode && type)
1300     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1301
1302   /* Aggregates are passed by reference based on their size.  */
1303   if (type && AGGREGATE_TYPE_P (type))
1304     {
1305       size = int_size_in_bytes (type);
1306     }
1307
1308   /* Variable sized arguments are always returned by reference.  */
1309   if (size < 0)
1310     return true;
1311
1312   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1313   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1314                                                &dummymode, &nregs,
1315                                                NULL))
1316     return false;
1317
1318   /* Arguments which are variable sized or larger than 2 registers are
1319      passed by reference unless they are a homogenous floating point
1320      aggregate.  */
1321   return size > 2 * UNITS_PER_WORD;
1322 }
1323
1324 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1325 static bool
1326 aarch64_return_in_msb (const_tree valtype)
1327 {
1328   enum machine_mode dummy_mode;
1329   int dummy_int;
1330
1331   /* Never happens in little-endian mode.  */
1332   if (!BYTES_BIG_ENDIAN)
1333     return false;
1334
1335   /* Only composite types smaller than or equal to 16 bytes can
1336      be potentially returned in registers.  */
1337   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1338       || int_size_in_bytes (valtype) <= 0
1339       || int_size_in_bytes (valtype) > 16)
1340     return false;
1341
1342   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344      is always passed/returned in the least significant bits of fp/simd
1345      register(s).  */
1346   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1347                                                &dummy_mode, &dummy_int, NULL))
1348     return false;
1349
1350   return true;
1351 }
1352
1353 /* Implement TARGET_FUNCTION_VALUE.
1354    Define how to find the value returned by a function.  */
1355
1356 static rtx
1357 aarch64_function_value (const_tree type, const_tree func,
1358                         bool outgoing ATTRIBUTE_UNUSED)
1359 {
1360   enum machine_mode mode;
1361   int unsignedp;
1362   int count;
1363   enum machine_mode ag_mode;
1364
1365   mode = TYPE_MODE (type);
1366   if (INTEGRAL_TYPE_P (type))
1367     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1368
1369   if (aarch64_return_in_msb (type))
1370     {
1371       HOST_WIDE_INT size = int_size_in_bytes (type);
1372
1373       if (size % UNITS_PER_WORD != 0)
1374         {
1375           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1376           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1377         }
1378     }
1379
1380   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1381                                                &ag_mode, &count, NULL))
1382     {
1383       if (!aarch64_composite_type_p (type, mode))
1384         {
1385           gcc_assert (count == 1 && mode == ag_mode);
1386           return gen_rtx_REG (mode, V0_REGNUM);
1387         }
1388       else
1389         {
1390           int i;
1391           rtx par;
1392
1393           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1394           for (i = 0; i < count; i++)
1395             {
1396               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1397               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1398                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1399               XVECEXP (par, 0, i) = tmp;
1400             }
1401           return par;
1402         }
1403     }
1404   else
1405     return gen_rtx_REG (mode, R0_REGNUM);
1406 }
1407
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409    Return true if REGNO is the number of a hard register in which the values
1410    of called function may come back.  */
1411
1412 static bool
1413 aarch64_function_value_regno_p (const unsigned int regno)
1414 {
1415   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1416      of 16-byte return values are: 128-bit integers and 16-byte small
1417      structures (excluding homogeneous floating-point aggregates).  */
1418   if (regno == R0_REGNUM || regno == R1_REGNUM)
1419     return true;
1420
1421   /* Up to four fp/simd registers can return a function value, e.g. a
1422      homogeneous floating-point aggregate having four members.  */
1423   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1424     return !TARGET_GENERAL_REGS_ONLY;
1425
1426   return false;
1427 }
1428
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1430
1431    If the type T of the result of a function is such that
1432      void func (T arg)
1433    would require that arg be passed as a value in a register (or set of
1434    registers) according to the parameter passing rules, then the result
1435    is returned in the same registers as would be used for such an
1436    argument.  */
1437
1438 static bool
1439 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1440 {
1441   HOST_WIDE_INT size;
1442   enum machine_mode ag_mode;
1443   int count;
1444
1445   if (!AGGREGATE_TYPE_P (type)
1446       && TREE_CODE (type) != COMPLEX_TYPE
1447       && TREE_CODE (type) != VECTOR_TYPE)
1448     /* Simple scalar types always returned in registers.  */
1449     return false;
1450
1451   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1452                                                type,
1453                                                &ag_mode,
1454                                                &count,
1455                                                NULL))
1456     return false;
1457
1458   /* Types larger than 2 registers returned in memory.  */
1459   size = int_size_in_bytes (type);
1460   return (size < 0 || size > 2 * UNITS_PER_WORD);
1461 }
1462
1463 static bool
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1465                                const_tree type, int *nregs)
1466 {
1467   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1468   return aarch64_vfp_is_call_or_return_candidate (mode,
1469                                                   type,
1470                                                   &pcum->aapcs_vfp_rmode,
1471                                                   nregs,
1472                                                   NULL);
1473 }
1474
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476    bits.  The idea is to suppress any stronger alignment requested by
1477    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478    This is a helper function for local use only.  */
1479
1480 static unsigned int
1481 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1482 {
1483   unsigned int alignment;
1484
1485   if (type)
1486     {
1487       if (!integer_zerop (TYPE_SIZE (type)))
1488         {
1489           if (TYPE_MODE (type) == mode)
1490             alignment = TYPE_ALIGN (type);
1491           else
1492             alignment = GET_MODE_ALIGNMENT (mode);
1493         }
1494       else
1495         alignment = 0;
1496     }
1497   else
1498     alignment = GET_MODE_ALIGNMENT (mode);
1499
1500   return alignment;
1501 }
1502
1503 /* Layout a function argument according to the AAPCS64 rules.  The rule
1504    numbers refer to the rule numbers in the AAPCS64.  */
1505
1506 static void
1507 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1508                     const_tree type,
1509                     bool named ATTRIBUTE_UNUSED)
1510 {
1511   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1512   int ncrn, nvrn, nregs;
1513   bool allocate_ncrn, allocate_nvrn;
1514   HOST_WIDE_INT size;
1515
1516   /* We need to do this once per argument.  */
1517   if (pcum->aapcs_arg_processed)
1518     return;
1519
1520   pcum->aapcs_arg_processed = true;
1521
1522   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1523   size
1524     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1525                         UNITS_PER_WORD);
1526
1527   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1528   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1529                                                  mode,
1530                                                  type,
1531                                                  &nregs);
1532
1533   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1534      The following code thus handles passing by SIMD/FP registers first.  */
1535
1536   nvrn = pcum->aapcs_nvrn;
1537
1538   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1539      and homogenous short-vector aggregates (HVA).  */
1540   if (allocate_nvrn)
1541     {
1542       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1543         {
1544           pcum->aapcs_nextnvrn = nvrn + nregs;
1545           if (!aarch64_composite_type_p (type, mode))
1546             {
1547               gcc_assert (nregs == 1);
1548               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1549             }
1550           else
1551             {
1552               rtx par;
1553               int i;
1554               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1555               for (i = 0; i < nregs; i++)
1556                 {
1557                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1558                                          V0_REGNUM + nvrn + i);
1559                   tmp = gen_rtx_EXPR_LIST
1560                     (VOIDmode, tmp,
1561                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1562                   XVECEXP (par, 0, i) = tmp;
1563                 }
1564               pcum->aapcs_reg = par;
1565             }
1566           return;
1567         }
1568       else
1569         {
1570           /* C.3 NSRN is set to 8.  */
1571           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1572           goto on_stack;
1573         }
1574     }
1575
1576   ncrn = pcum->aapcs_ncrn;
1577   nregs = size / UNITS_PER_WORD;
1578
1579   /* C6 - C9.  though the sign and zero extension semantics are
1580      handled elsewhere.  This is the case where the argument fits
1581      entirely general registers.  */
1582   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1583     {
1584       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1585
1586       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1587
1588       /* C.8 if the argument has an alignment of 16 then the NGRN is
1589          rounded up to the next even number.  */
1590       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1591         {
1592           ++ncrn;
1593           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1594         }
1595       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1596          A reg is still generated for it, but the caller should be smart
1597          enough not to use it.  */
1598       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1599         {
1600           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1601         }
1602       else
1603         {
1604           rtx par;
1605           int i;
1606
1607           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1608           for (i = 0; i < nregs; i++)
1609             {
1610               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1611               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1612                                        GEN_INT (i * UNITS_PER_WORD));
1613               XVECEXP (par, 0, i) = tmp;
1614             }
1615           pcum->aapcs_reg = par;
1616         }
1617
1618       pcum->aapcs_nextncrn = ncrn + nregs;
1619       return;
1620     }
1621
1622   /* C.11  */
1623   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1624
1625   /* The argument is passed on stack; record the needed number of words for
1626      this argument and align the total size if necessary.  */
1627 on_stack:
1628   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1629   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1630     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1631                                                16 / UNITS_PER_WORD);
1632   return;
1633 }
1634
1635 /* Implement TARGET_FUNCTION_ARG.  */
1636
1637 static rtx
1638 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1639                       const_tree type, bool named)
1640 {
1641   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1642   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1643
1644   if (mode == VOIDmode)
1645     return NULL_RTX;
1646
1647   aarch64_layout_arg (pcum_v, mode, type, named);
1648   return pcum->aapcs_reg;
1649 }
1650
1651 void
1652 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1653                            const_tree fntype ATTRIBUTE_UNUSED,
1654                            rtx libname ATTRIBUTE_UNUSED,
1655                            const_tree fndecl ATTRIBUTE_UNUSED,
1656                            unsigned n_named ATTRIBUTE_UNUSED)
1657 {
1658   pcum->aapcs_ncrn = 0;
1659   pcum->aapcs_nvrn = 0;
1660   pcum->aapcs_nextncrn = 0;
1661   pcum->aapcs_nextnvrn = 0;
1662   pcum->pcs_variant = ARM_PCS_AAPCS64;
1663   pcum->aapcs_reg = NULL_RTX;
1664   pcum->aapcs_arg_processed = false;
1665   pcum->aapcs_stack_words = 0;
1666   pcum->aapcs_stack_size = 0;
1667
1668   return;
1669 }
1670
1671 static void
1672 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1673                               enum machine_mode mode,
1674                               const_tree type,
1675                               bool named)
1676 {
1677   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1678   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1679     {
1680       aarch64_layout_arg (pcum_v, mode, type, named);
1681       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1682                   != (pcum->aapcs_stack_words != 0));
1683       pcum->aapcs_arg_processed = false;
1684       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1685       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1686       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1687       pcum->aapcs_stack_words = 0;
1688       pcum->aapcs_reg = NULL_RTX;
1689     }
1690 }
1691
1692 bool
1693 aarch64_function_arg_regno_p (unsigned regno)
1694 {
1695   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1696           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1697 }
1698
1699 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1700    PARM_BOUNDARY bits of alignment, but will be given anything up
1701    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1702    that both before and after the layout of each argument, the Next
1703    Stacked Argument Address (NSAA) will have a minimum alignment of
1704    8 bytes.  */
1705
1706 static unsigned int
1707 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1708 {
1709   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1710
1711   if (alignment < PARM_BOUNDARY)
1712     alignment = PARM_BOUNDARY;
1713   if (alignment > STACK_BOUNDARY)
1714     alignment = STACK_BOUNDARY;
1715   return alignment;
1716 }
1717
1718 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1719
1720    Return true if an argument passed on the stack should be padded upwards,
1721    i.e. if the least-significant byte of the stack slot has useful data.
1722
1723    Small aggregate types are placed in the lowest memory address.
1724
1725    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1726
1727 bool
1728 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1729 {
1730   /* On little-endian targets, the least significant byte of every stack
1731      argument is passed at the lowest byte address of the stack slot.  */
1732   if (!BYTES_BIG_ENDIAN)
1733     return true;
1734
1735   /* Otherwise, integral, floating-point and pointer types are padded downward:
1736      the least significant byte of a stack argument is passed at the highest
1737      byte address of the stack slot.  */
1738   if (type
1739       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1740          || POINTER_TYPE_P (type))
1741       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1742     return false;
1743
1744   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1745   return true;
1746 }
1747
1748 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1749
1750    It specifies padding for the last (may also be the only)
1751    element of a block move between registers and memory.  If
1752    assuming the block is in the memory, padding upward means that
1753    the last element is padded after its highest significant byte,
1754    while in downward padding, the last element is padded at the
1755    its least significant byte side.
1756
1757    Small aggregates and small complex types are always padded
1758    upwards.
1759
1760    We don't need to worry about homogeneous floating-point or
1761    short-vector aggregates; their move is not affected by the
1762    padding direction determined here.  Regardless of endianness,
1763    each element of such an aggregate is put in the least
1764    significant bits of a fp/simd register.
1765
1766    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1767    register has useful data, and return the opposite if the most
1768    significant byte does.  */
1769
1770 bool
1771 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1772                      bool first ATTRIBUTE_UNUSED)
1773 {
1774
1775   /* Small composite types are always padded upward.  */
1776   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1777     {
1778       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1779                             : GET_MODE_SIZE (mode));
1780       if (size < 2 * UNITS_PER_WORD)
1781         return true;
1782     }
1783
1784   /* Otherwise, use the default padding.  */
1785   return !BYTES_BIG_ENDIAN;
1786 }
1787
1788 static enum machine_mode
1789 aarch64_libgcc_cmp_return_mode (void)
1790 {
1791   return SImode;
1792 }
1793
1794 static bool
1795 aarch64_frame_pointer_required (void)
1796 {
1797   /* If the function contains dynamic stack allocations, we need to
1798      use the frame pointer to access the static parts of the frame.  */
1799   if (cfun->calls_alloca)
1800     return true;
1801
1802   /* In aarch64_override_options_after_change
1803      flag_omit_leaf_frame_pointer turns off the frame pointer by
1804      default.  Turn it back on now if we've not got a leaf
1805      function.  */
1806   if (flag_omit_leaf_frame_pointer
1807       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1808     return true;
1809
1810   return false;
1811 }
1812
1813 /* Mark the registers that need to be saved by the callee and calculate
1814    the size of the callee-saved registers area and frame record (both FP
1815    and LR may be omitted).  */
1816 static void
1817 aarch64_layout_frame (void)
1818 {
1819   HOST_WIDE_INT offset = 0;
1820   int regno;
1821
1822   if (reload_completed && cfun->machine->frame.laid_out)
1823     return;
1824
1825   /* First mark all the registers that really need to be saved...  */
1826   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1827     cfun->machine->frame.reg_offset[regno] = -1;
1828
1829   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1830     cfun->machine->frame.reg_offset[regno] = -1;
1831
1832   /* ... that includes the eh data registers (if needed)...  */
1833   if (crtl->calls_eh_return)
1834     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1835       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1836
1837   /* ... and any callee saved register that dataflow says is live.  */
1838   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1839     if (df_regs_ever_live_p (regno)
1840         && !call_used_regs[regno])
1841       cfun->machine->frame.reg_offset[regno] = 0;
1842
1843   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1844     if (df_regs_ever_live_p (regno)
1845         && !call_used_regs[regno])
1846       cfun->machine->frame.reg_offset[regno] = 0;
1847
1848   if (frame_pointer_needed)
1849     {
1850       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1851       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1852       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1853     }
1854
1855   /* Now assign stack slots for them.  */
1856   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1857     if (cfun->machine->frame.reg_offset[regno] != -1)
1858       {
1859         cfun->machine->frame.reg_offset[regno] = offset;
1860         offset += UNITS_PER_WORD;
1861       }
1862
1863   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1864     if (cfun->machine->frame.reg_offset[regno] != -1)
1865       {
1866         cfun->machine->frame.reg_offset[regno] = offset;
1867         offset += UNITS_PER_WORD;
1868       }
1869
1870   if (frame_pointer_needed)
1871     {
1872       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1873       offset += UNITS_PER_WORD;
1874     }
1875
1876   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1877     {
1878       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1879       offset += UNITS_PER_WORD;
1880     }
1881
1882   cfun->machine->frame.padding0 =
1883     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1884   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1885
1886   cfun->machine->frame.saved_regs_size = offset;
1887   cfun->machine->frame.laid_out = true;
1888 }
1889
1890 /* Make the last instruction frame-related and note that it performs
1891    the operation described by FRAME_PATTERN.  */
1892
1893 static void
1894 aarch64_set_frame_expr (rtx frame_pattern)
1895 {
1896   rtx insn;
1897
1898   insn = get_last_insn ();
1899   RTX_FRAME_RELATED_P (insn) = 1;
1900   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1901   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1902                                       frame_pattern,
1903                                       REG_NOTES (insn));
1904 }
1905
1906 static bool
1907 aarch64_register_saved_on_entry (int regno)
1908 {
1909   return cfun->machine->frame.reg_offset[regno] != -1;
1910 }
1911
1912
1913 static void
1914 aarch64_save_or_restore_fprs (int start_offset, int increment,
1915                               bool restore, rtx base_rtx)
1916
1917 {
1918   unsigned regno;
1919   unsigned regno2;
1920   rtx insn;
1921   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1922     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1923
1924   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1925     {
1926       if (aarch64_register_saved_on_entry (regno))
1927         {
1928           rtx mem;
1929           mem = gen_mem_ref (DFmode,
1930                              plus_constant (Pmode,
1931                                             base_rtx,
1932                                             start_offset));
1933
1934           for (regno2 = regno + 1;
1935                regno2 <= V31_REGNUM
1936                  && !aarch64_register_saved_on_entry (regno2);
1937                regno2++)
1938             {
1939               /* Empty loop.  */
1940             }
1941
1942           if (regno2 <= V31_REGNUM &&
1943               aarch64_register_saved_on_entry (regno2))
1944             {
1945               rtx mem2;
1946
1947               /* Next highest register to be saved.  */
1948               mem2 = gen_mem_ref (DFmode,
1949                                   plus_constant
1950                                   (Pmode,
1951                                    base_rtx,
1952                                    start_offset + increment));
1953               if (restore == false)
1954                 {
1955                   insn = emit_insn
1956                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1957                                         mem2, gen_rtx_REG (DFmode, regno2)));
1958
1959                 }
1960               else
1961                 {
1962                   insn = emit_insn
1963                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1964                                        gen_rtx_REG (DFmode, regno2), mem2));
1965
1966                   add_reg_note (insn, REG_CFA_RESTORE,
1967                                 gen_rtx_REG (DFmode, regno));
1968                   add_reg_note (insn, REG_CFA_RESTORE,
1969                                 gen_rtx_REG (DFmode, regno2));
1970                 }
1971
1972               /* The first part of a frame-related parallel insn is
1973                  always assumed to be relevant to the frame
1974                  calculations; subsequent parts, are only
1975                  frame-related if explicitly marked.  */
1976               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1977               regno = regno2;
1978               start_offset += increment * 2;
1979             }
1980           else
1981             {
1982               if (restore == false)
1983                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1984               else
1985                 {
1986                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1987                   add_reg_note (insn, REG_CFA_RESTORE,
1988                                 gen_rtx_REG (DFmode, regno));
1989                 }
1990               start_offset += increment;
1991             }
1992           RTX_FRAME_RELATED_P (insn) = 1;
1993         }
1994     }
1995 }
1996
1997
1998 /* offset from the stack pointer of where the saves and
1999    restore's have to happen.  */
2000 static void
2001 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
2002                                                bool restore)
2003 {
2004   rtx insn;
2005   rtx base_rtx = stack_pointer_rtx;
2006   HOST_WIDE_INT start_offset = offset;
2007   HOST_WIDE_INT increment = UNITS_PER_WORD;
2008   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2009   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2010   unsigned regno;
2011   unsigned regno2;
2012
2013   for (regno = R0_REGNUM; regno <= limit; regno++)
2014     {
2015       if (aarch64_register_saved_on_entry (regno))
2016         {
2017           rtx mem;
2018           mem = gen_mem_ref (Pmode,
2019                              plus_constant (Pmode,
2020                                             base_rtx,
2021                                             start_offset));
2022
2023           for (regno2 = regno + 1;
2024                regno2 <= limit
2025                  && !aarch64_register_saved_on_entry (regno2);
2026                regno2++)
2027             {
2028               /* Empty loop.  */
2029             }
2030           if (regno2 <= limit &&
2031               aarch64_register_saved_on_entry (regno2))
2032             {
2033               rtx mem2;
2034
2035               /* Next highest register to be saved.  */
2036               mem2 = gen_mem_ref (Pmode,
2037                                   plus_constant
2038                                   (Pmode,
2039                                    base_rtx,
2040                                    start_offset + increment));
2041               if (restore == false)
2042                 {
2043                   insn = emit_insn
2044                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2045                                         mem2, gen_rtx_REG (DImode, regno2)));
2046
2047                 }
2048               else
2049                 {
2050                   insn = emit_insn
2051                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2052                                      gen_rtx_REG (DImode, regno2), mem2));
2053
2054                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2055                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2056                 }
2057
2058               /* The first part of a frame-related parallel insn is
2059                  always assumed to be relevant to the frame
2060                  calculations; subsequent parts, are only
2061                  frame-related if explicitly marked.  */
2062               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2063               regno = regno2;
2064               start_offset += increment * 2;
2065             }
2066           else
2067             {
2068               if (restore == false)
2069                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2070               else
2071                 {
2072                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2073                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2074                 }
2075               start_offset += increment;
2076             }
2077           RTX_FRAME_RELATED_P (insn) = 1;
2078         }
2079     }
2080
2081   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2082 }
2083
2084 /* AArch64 stack frames generated by this compiler look like:
2085
2086         +-------------------------------+
2087         |                               |
2088         |  incoming stack arguments     |
2089         |                               |
2090         +-------------------------------+
2091         |                               | <-- incoming stack pointer (aligned)
2092         |  callee-allocated save area   |
2093         |  for register varargs         |
2094         |                               |
2095         +-------------------------------+
2096         |  local variables              | <-- frame_pointer_rtx
2097         |                               |
2098         +-------------------------------+
2099         |  padding0                     | \
2100         +-------------------------------+  |
2101         |  callee-saved registers       |  | frame.saved_regs_size
2102         +-------------------------------+  |
2103         |  LR'                          |  |
2104         +-------------------------------+  |
2105         |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
2106         +-------------------------------+
2107         |  dynamic allocation           |
2108         +-------------------------------+
2109         |  padding                      |
2110         +-------------------------------+
2111         |  outgoing stack arguments     | <-- arg_pointer
2112         |                               |
2113         +-------------------------------+
2114         |                               | <-- stack_pointer_rtx (aligned)
2115
2116    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2117    but leave frame_pointer_rtx and hard_frame_pointer_rtx
2118    unchanged.  */
2119
2120 /* Generate the prologue instructions for entry into a function.
2121    Establish the stack frame by decreasing the stack pointer with a
2122    properly calculated size and, if necessary, create a frame record
2123    filled with the values of LR and previous frame pointer.  The
2124    current FP is also set up if it is in use.  */
2125
2126 void
2127 aarch64_expand_prologue (void)
2128 {
2129   /* sub sp, sp, #<frame_size>
2130      stp {fp, lr}, [sp, #<frame_size> - 16]
2131      add fp, sp, #<frame_size> - hardfp_offset
2132      stp {cs_reg}, [fp, #-16] etc.
2133
2134      sub sp, sp, <final_adjustment_if_any>
2135   */
2136   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2137   HOST_WIDE_INT frame_size, offset;
2138   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2139   rtx insn;
2140
2141   aarch64_layout_frame ();
2142   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2143   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2144               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2145   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2146                 + crtl->outgoing_args_size);
2147   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2148                                           STACK_BOUNDARY / BITS_PER_UNIT);
2149
2150   if (flag_stack_usage_info)
2151     current_function_static_stack_size = frame_size;
2152
2153   fp_offset = (offset
2154                - original_frame_size
2155                - cfun->machine->frame.saved_regs_size);
2156
2157   /* Store pairs and load pairs have a range only -512 to 504.  */
2158   if (offset >= 512)
2159     {
2160       /* When the frame has a large size, an initial decrease is done on
2161          the stack pointer to jump over the callee-allocated save area for
2162          register varargs, the local variable area and/or the callee-saved
2163          register area.  This will allow the pre-index write-back
2164          store pair instructions to be used for setting up the stack frame
2165          efficiently.  */
2166       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2167       if (offset >= 512)
2168         offset = cfun->machine->frame.saved_regs_size;
2169
2170       frame_size -= (offset + crtl->outgoing_args_size);
2171       fp_offset = 0;
2172
2173       if (frame_size >= 0x1000000)
2174         {
2175           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2176           emit_move_insn (op0, GEN_INT (-frame_size));
2177           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2178           aarch64_set_frame_expr (gen_rtx_SET
2179                                   (Pmode, stack_pointer_rtx,
2180                                    plus_constant (Pmode,
2181                                                   stack_pointer_rtx,
2182                                                   -frame_size)));
2183         }
2184       else if (frame_size > 0)
2185         {
2186           if ((frame_size & 0xfff) != frame_size)
2187             {
2188               insn = emit_insn (gen_add2_insn
2189                                 (stack_pointer_rtx,
2190                                  GEN_INT (-(frame_size
2191                                             & ~(HOST_WIDE_INT)0xfff))));
2192               RTX_FRAME_RELATED_P (insn) = 1;
2193             }
2194           if ((frame_size & 0xfff) != 0)
2195             {
2196               insn = emit_insn (gen_add2_insn
2197                                 (stack_pointer_rtx,
2198                                  GEN_INT (-(frame_size
2199                                             & (HOST_WIDE_INT)0xfff))));
2200               RTX_FRAME_RELATED_P (insn) = 1;
2201             }
2202         }
2203     }
2204   else
2205     frame_size = -1;
2206
2207   if (offset > 0)
2208     {
2209       /* Save the frame pointer and lr if the frame pointer is needed
2210          first.  Make the frame pointer point to the location of the
2211          old frame pointer on the stack.  */
2212       if (frame_pointer_needed)
2213         {
2214           rtx mem_fp, mem_lr;
2215
2216           if (fp_offset)
2217             {
2218               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2219                                                GEN_INT (-offset)));
2220               RTX_FRAME_RELATED_P (insn) = 1;
2221               aarch64_set_frame_expr (gen_rtx_SET
2222                                       (Pmode, stack_pointer_rtx,
2223                                        gen_rtx_MINUS (Pmode,
2224                                                       stack_pointer_rtx,
2225                                                       GEN_INT (offset))));
2226               mem_fp = gen_frame_mem (DImode,
2227                                       plus_constant (Pmode,
2228                                                      stack_pointer_rtx,
2229                                                      fp_offset));
2230               mem_lr = gen_frame_mem (DImode,
2231                                       plus_constant (Pmode,
2232                                                      stack_pointer_rtx,
2233                                                      fp_offset
2234                                                      + UNITS_PER_WORD));
2235               insn = emit_insn (gen_store_pairdi (mem_fp,
2236                                                   hard_frame_pointer_rtx,
2237                                                   mem_lr,
2238                                                   gen_rtx_REG (DImode,
2239                                                                LR_REGNUM)));
2240             }
2241           else
2242             {
2243               insn = emit_insn (gen_storewb_pairdi_di
2244                                 (stack_pointer_rtx, stack_pointer_rtx,
2245                                  hard_frame_pointer_rtx,
2246                                  gen_rtx_REG (DImode, LR_REGNUM),
2247                                  GEN_INT (-offset),
2248                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2249               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2250             }
2251
2252           /* The first part of a frame-related parallel insn is always
2253              assumed to be relevant to the frame calculations;
2254              subsequent parts, are only frame-related if explicitly
2255              marked.  */
2256           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257           RTX_FRAME_RELATED_P (insn) = 1;
2258
2259           /* Set up frame pointer to point to the location of the
2260              previous frame pointer on the stack.  */
2261           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2262                                            stack_pointer_rtx,
2263                                            GEN_INT (fp_offset)));
2264           aarch64_set_frame_expr (gen_rtx_SET
2265                                   (Pmode, hard_frame_pointer_rtx,
2266                                    plus_constant (Pmode,
2267                                                   stack_pointer_rtx,
2268                                                   fp_offset)));
2269           RTX_FRAME_RELATED_P (insn) = 1;
2270           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2271                                            hard_frame_pointer_rtx));
2272         }
2273       else
2274         {
2275           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2276                                            GEN_INT (-offset)));
2277           RTX_FRAME_RELATED_P (insn) = 1;
2278         }
2279
2280       aarch64_save_or_restore_callee_save_registers
2281         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2282     }
2283
2284   /* when offset >= 512,
2285      sub sp, sp, #<outgoing_args_size> */
2286   if (frame_size > -1)
2287     {
2288       if (crtl->outgoing_args_size > 0)
2289         {
2290           insn = emit_insn (gen_add2_insn
2291                             (stack_pointer_rtx,
2292                              GEN_INT (- crtl->outgoing_args_size)));
2293           RTX_FRAME_RELATED_P (insn) = 1;
2294         }
2295     }
2296 }
2297
2298 /* Generate the epilogue instructions for returning from a function.  */
2299 void
2300 aarch64_expand_epilogue (bool for_sibcall)
2301 {
2302   HOST_WIDE_INT original_frame_size, frame_size, offset;
2303   HOST_WIDE_INT fp_offset;
2304   rtx insn;
2305   rtx cfa_reg;
2306
2307   aarch64_layout_frame ();
2308   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2309   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2310                 + crtl->outgoing_args_size);
2311   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2312                                           STACK_BOUNDARY / BITS_PER_UNIT);
2313
2314   fp_offset = (offset
2315                - original_frame_size
2316                - cfun->machine->frame.saved_regs_size);
2317
2318   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2319
2320   /* Store pairs and load pairs have a range only -512 to 504.  */
2321   if (offset >= 512)
2322     {
2323       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2324       if (offset >= 512)
2325         offset = cfun->machine->frame.saved_regs_size;
2326
2327       frame_size -= (offset + crtl->outgoing_args_size);
2328       fp_offset = 0;
2329       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2330         {
2331           insn = emit_insn (gen_add2_insn
2332                             (stack_pointer_rtx,
2333                              GEN_INT (crtl->outgoing_args_size)));
2334           RTX_FRAME_RELATED_P (insn) = 1;
2335         }
2336     }
2337   else
2338     frame_size = -1;
2339
2340   /* If there were outgoing arguments or we've done dynamic stack
2341      allocation, then restore the stack pointer from the frame
2342      pointer.  This is at most one insn and more efficient than using
2343      GCC's internal mechanism.  */
2344   if (frame_pointer_needed
2345       && (crtl->outgoing_args_size || cfun->calls_alloca))
2346     {
2347       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2348                                        hard_frame_pointer_rtx,
2349                                        GEN_INT (- fp_offset)));
2350       RTX_FRAME_RELATED_P (insn) = 1;
2351       /* As SP is set to (FP - fp_offset), according to the rules in
2352          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353          from the value of SP from now on.  */
2354       cfa_reg = stack_pointer_rtx;
2355     }
2356
2357   aarch64_save_or_restore_callee_save_registers
2358     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2359
2360   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2361   if (offset > 0)
2362     {
2363       if (frame_pointer_needed)
2364         {
2365           rtx mem_fp, mem_lr;
2366
2367           if (fp_offset)
2368             {
2369               mem_fp = gen_frame_mem (DImode,
2370                                       plus_constant (Pmode,
2371                                                      stack_pointer_rtx,
2372                                                      fp_offset));
2373               mem_lr = gen_frame_mem (DImode,
2374                                       plus_constant (Pmode,
2375                                                      stack_pointer_rtx,
2376                                                      fp_offset
2377                                                      + UNITS_PER_WORD));
2378               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2379                                                  mem_fp,
2380                                                  gen_rtx_REG (DImode,
2381                                                               LR_REGNUM),
2382                                                  mem_lr));
2383             }
2384           else
2385             {
2386               insn = emit_insn (gen_loadwb_pairdi_di
2387                                 (stack_pointer_rtx,
2388                                  stack_pointer_rtx,
2389                                  hard_frame_pointer_rtx,
2390                                  gen_rtx_REG (DImode, LR_REGNUM),
2391                                  GEN_INT (offset),
2392                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2393               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2394               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2395                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2396                                           plus_constant (Pmode, cfa_reg,
2397                                                          offset))));
2398             }
2399
2400           /* The first part of a frame-related parallel insn
2401              is always assumed to be relevant to the frame
2402              calculations; subsequent parts, are only
2403              frame-related if explicitly marked.  */
2404           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2405           RTX_FRAME_RELATED_P (insn) = 1;
2406           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2407           add_reg_note (insn, REG_CFA_RESTORE,
2408                         gen_rtx_REG (DImode, LR_REGNUM));
2409
2410           if (fp_offset)
2411             {
2412               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413                                                GEN_INT (offset)));
2414               RTX_FRAME_RELATED_P (insn) = 1;
2415             }
2416         }
2417       else
2418         {
2419           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420                                            GEN_INT (offset)));
2421           RTX_FRAME_RELATED_P (insn) = 1;
2422         }
2423     }
2424
2425   /* Stack adjustment for exception handler.  */
2426   if (crtl->calls_eh_return)
2427     {
2428       /* We need to unwind the stack by the offset computed by
2429          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2430          based on SP.  Ideally we would update the SP and define the
2431          CFA along the lines of:
2432
2433          SP = SP + EH_RETURN_STACKADJ_RTX
2434          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2435
2436          However the dwarf emitter only understands a constant
2437          register offset.
2438
2439          The solution chosen here is to use the otherwise unused IP0
2440          as a temporary register to hold the current SP value.  The
2441          CFA is described using IP0 then SP is modified.  */
2442
2443       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2444
2445       insn = emit_move_insn (ip0, stack_pointer_rtx);
2446       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2447       RTX_FRAME_RELATED_P (insn) = 1;
2448
2449       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2450
2451       /* Ensure the assignment to IP0 does not get optimized away.  */
2452       emit_use (ip0);
2453     }
2454
2455   if (frame_size > -1)
2456     {
2457       if (frame_size >= 0x1000000)
2458         {
2459           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2460           emit_move_insn (op0, GEN_INT (frame_size));
2461           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2462           aarch64_set_frame_expr (gen_rtx_SET
2463                                   (Pmode, stack_pointer_rtx,
2464                                    plus_constant (Pmode,
2465                                                   stack_pointer_rtx,
2466                                                   frame_size)));
2467         }
2468       else if (frame_size > 0)
2469         {
2470           if ((frame_size & 0xfff) != 0)
2471             {
2472               insn = emit_insn (gen_add2_insn
2473                                 (stack_pointer_rtx,
2474                                  GEN_INT ((frame_size
2475                                            & (HOST_WIDE_INT) 0xfff))));
2476               RTX_FRAME_RELATED_P (insn) = 1;
2477             }
2478           if ((frame_size & 0xfff) != frame_size)
2479             {
2480               insn = emit_insn (gen_add2_insn
2481                                 (stack_pointer_rtx,
2482                                  GEN_INT ((frame_size
2483                                            & ~ (HOST_WIDE_INT) 0xfff))));
2484               RTX_FRAME_RELATED_P (insn) = 1;
2485             }
2486         }
2487
2488         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2489                                              plus_constant (Pmode,
2490                                                             stack_pointer_rtx,
2491                                                             offset)));
2492     }
2493
2494   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2495   if (!for_sibcall)
2496     emit_jump_insn (ret_rtx);
2497 }
2498
2499 /* Return the place to copy the exception unwinding return address to.
2500    This will probably be a stack slot, but could (in theory be the
2501    return register).  */
2502 rtx
2503 aarch64_final_eh_return_addr (void)
2504 {
2505   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2506   aarch64_layout_frame ();
2507   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2508   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2509                 + crtl->outgoing_args_size);
2510   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2511                                           STACK_BOUNDARY / BITS_PER_UNIT);
2512   fp_offset = offset
2513     - original_frame_size
2514     - cfun->machine->frame.saved_regs_size;
2515
2516   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2517     return gen_rtx_REG (DImode, LR_REGNUM);
2518
2519   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2520      result in a store to save LR introduced by builtin_eh_return () being
2521      incorrectly deleted because the alias is not detected.
2522      So in the calculation of the address to copy the exception unwinding
2523      return address to, we note 2 cases.
2524      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525      we return a SP-relative location since all the addresses are SP-relative
2526      in this case.  This prevents the store from being optimized away.
2527      If the fp_offset is not 0, then the addresses will be FP-relative and
2528      therefore we return a FP-relative location.  */
2529
2530   if (frame_pointer_needed)
2531     {
2532       if (fp_offset)
2533         return gen_frame_mem (DImode,
2534                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2535       else
2536         return gen_frame_mem (DImode,
2537                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2538     }
2539
2540   /* If FP is not needed, we calculate the location of LR, which would be
2541      at the top of the saved registers block.  */
2542
2543   return gen_frame_mem (DImode,
2544                         plus_constant (Pmode,
2545                                        stack_pointer_rtx,
2546                                        fp_offset
2547                                        + cfun->machine->frame.saved_regs_size
2548                                        - 2 * UNITS_PER_WORD));
2549 }
2550
2551 /* Possibly output code to build up a constant in a register.  For
2552    the benefit of the costs infrastructure, returns the number of
2553    instructions which would be emitted.  GENERATE inhibits or
2554    enables code generation.  */
2555
2556 static int
2557 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2558 {
2559   int insns = 0;
2560
2561   if (aarch64_bitmask_imm (val, DImode))
2562     {
2563       if (generate)
2564         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2565       insns = 1;
2566     }
2567   else
2568     {
2569       int i;
2570       int ncount = 0;
2571       int zcount = 0;
2572       HOST_WIDE_INT valp = val >> 16;
2573       HOST_WIDE_INT valm;
2574       HOST_WIDE_INT tval;
2575
2576       for (i = 16; i < 64; i += 16)
2577         {
2578           valm = (valp & 0xffff);
2579
2580           if (valm != 0)
2581             ++ zcount;
2582
2583           if (valm != 0xffff)
2584             ++ ncount;
2585
2586           valp >>= 16;
2587         }
2588
2589       /* zcount contains the number of additional MOVK instructions
2590          required if the constant is built up with an initial MOVZ instruction,
2591          while ncount is the number of MOVK instructions required if starting
2592          with a MOVN instruction.  Choose the sequence that yields the fewest
2593          number of instructions, preferring MOVZ instructions when they are both
2594          the same.  */
2595       if (ncount < zcount)
2596         {
2597           if (generate)
2598             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2600           tval = 0xffff;
2601           insns++;
2602         }
2603       else
2604         {
2605           if (generate)
2606             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2607                             GEN_INT (val & 0xffff));
2608           tval = 0;
2609           insns++;
2610         }
2611
2612       val >>= 16;
2613
2614       for (i = 16; i < 64; i += 16)
2615         {
2616           if ((val & 0xffff) != tval)
2617             {
2618               if (generate)
2619                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2620                                            GEN_INT (i),
2621                                            GEN_INT (val & 0xffff)));
2622               insns++;
2623             }
2624           val >>= 16;
2625         }
2626     }
2627   return insns;
2628 }
2629
2630 static void
2631 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2632 {
2633   HOST_WIDE_INT mdelta = delta;
2634   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2635   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2636
2637   if (mdelta < 0)
2638     mdelta = -mdelta;
2639
2640   if (mdelta >= 4096 * 4096)
2641     {
2642       (void) aarch64_build_constant (scratchreg, delta, true);
2643       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2644     }
2645   else if (mdelta > 0)
2646     {
2647       if (mdelta >= 4096)
2648         {
2649           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2650           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2651           if (delta < 0)
2652             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2653                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2654           else
2655             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2656                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2657         }
2658       if (mdelta % 4096 != 0)
2659         {
2660           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2661           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2662                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2663         }
2664     }
2665 }
2666
2667 /* Output code to add DELTA to the first argument, and then jump
2668    to FUNCTION.  Used for C++ multiple inheritance.  */
2669 static void
2670 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2671                          HOST_WIDE_INT delta,
2672                          HOST_WIDE_INT vcall_offset,
2673                          tree function)
2674 {
2675   /* The this pointer is always in x0.  Note that this differs from
2676      Arm where the this pointer maybe bumped to r1 if r0 is required
2677      to return a pointer to an aggregate.  On AArch64 a result value
2678      pointer will be in x8.  */
2679   int this_regno = R0_REGNUM;
2680   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2681
2682   reload_completed = 1;
2683   emit_note (NOTE_INSN_PROLOGUE_END);
2684
2685   if (vcall_offset == 0)
2686     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2687   else
2688     {
2689       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2690
2691       this_rtx = gen_rtx_REG (Pmode, this_regno);
2692       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2693       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2694
2695       addr = this_rtx;
2696       if (delta != 0)
2697         {
2698           if (delta >= -256 && delta < 256)
2699             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2700                                        plus_constant (Pmode, this_rtx, delta));
2701           else
2702             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2703         }
2704
2705       if (Pmode == ptr_mode)
2706         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2707       else
2708         aarch64_emit_move (temp0,
2709                            gen_rtx_ZERO_EXTEND (Pmode,
2710                                                 gen_rtx_MEM (ptr_mode, addr)));
2711
2712       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2713           addr = plus_constant (Pmode, temp0, vcall_offset);
2714       else
2715         {
2716           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2717           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2718         }
2719
2720       if (Pmode == ptr_mode)
2721         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2722       else
2723         aarch64_emit_move (temp1,
2724                            gen_rtx_SIGN_EXTEND (Pmode,
2725                                                 gen_rtx_MEM (ptr_mode, addr)));
2726
2727       emit_insn (gen_add2_insn (this_rtx, temp1));
2728     }
2729
2730   /* Generate a tail call to the target function.  */
2731   if (!TREE_USED (function))
2732     {
2733       assemble_external (function);
2734       TREE_USED (function) = 1;
2735     }
2736   funexp = XEXP (DECL_RTL (function), 0);
2737   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2738   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2739   SIBLING_CALL_P (insn) = 1;
2740
2741   insn = get_insns ();
2742   shorten_branches (insn);
2743   final_start_function (insn, file, 1);
2744   final (insn, file, 1);
2745   final_end_function ();
2746
2747   /* Stop pretending to be a post-reload pass.  */
2748   reload_completed = 0;
2749 }
2750
2751 static int
2752 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2753 {
2754   if (GET_CODE (*x) == SYMBOL_REF)
2755     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2756
2757   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758      TLS offsets, not real symbol references.  */
2759   if (GET_CODE (*x) == UNSPEC
2760       && XINT (*x, 1) == UNSPEC_TLS)
2761     return -1;
2762
2763   return 0;
2764 }
2765
2766 static bool
2767 aarch64_tls_referenced_p (rtx x)
2768 {
2769   if (!TARGET_HAVE_TLS)
2770     return false;
2771
2772   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2773 }
2774
2775
2776 static int
2777 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2778 {
2779   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2780   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2781
2782   if (*imm1 < *imm2)
2783     return -1;
2784   if (*imm1 > *imm2)
2785     return +1;
2786   return 0;
2787 }
2788
2789
2790 static void
2791 aarch64_build_bitmask_table (void)
2792 {
2793   unsigned HOST_WIDE_INT mask, imm;
2794   unsigned int log_e, e, s, r;
2795   unsigned int nimms = 0;
2796
2797   for (log_e = 1; log_e <= 6; log_e++)
2798     {
2799       e = 1 << log_e;
2800       if (e == 64)
2801         mask = ~(HOST_WIDE_INT) 0;
2802       else
2803         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2804       for (s = 1; s < e; s++)
2805         {
2806           for (r = 0; r < e; r++)
2807             {
2808               /* set s consecutive bits to 1 (s < 64) */
2809               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2810               /* rotate right by r */
2811               if (r != 0)
2812                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2813               /* replicate the constant depending on SIMD size */
2814               switch (log_e) {
2815               case 1: imm |= (imm <<  2);
2816               case 2: imm |= (imm <<  4);
2817               case 3: imm |= (imm <<  8);
2818               case 4: imm |= (imm << 16);
2819               case 5: imm |= (imm << 32);
2820               case 6:
2821                 break;
2822               default:
2823                 gcc_unreachable ();
2824               }
2825               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2826               aarch64_bitmasks[nimms++] = imm;
2827             }
2828         }
2829     }
2830
2831   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2832   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2833          aarch64_bitmasks_cmp);
2834 }
2835
2836
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838    a left shift of 0 or 12 bits.  */
2839 bool
2840 aarch64_uimm12_shift (HOST_WIDE_INT val)
2841 {
2842   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2843           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2844           );
2845 }
2846
2847
2848 /* Return true if val is an immediate that can be loaded into a
2849    register by a MOVZ instruction.  */
2850 static bool
2851 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2852 {
2853   if (GET_MODE_SIZE (mode) > 4)
2854     {
2855       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2856           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2857         return 1;
2858     }
2859   else
2860     {
2861       /* Ignore sign extension.  */
2862       val &= (HOST_WIDE_INT) 0xffffffff;
2863     }
2864   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2865           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2866 }
2867
2868
2869 /* Return true if val is a valid bitmask immediate.  */
2870 bool
2871 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2872 {
2873   if (GET_MODE_SIZE (mode) < 8)
2874     {
2875       /* Replicate bit pattern.  */
2876       val &= (HOST_WIDE_INT) 0xffffffff;
2877       val |= val << 32;
2878     }
2879   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2880                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2881 }
2882
2883
2884 /* Return true if val is an immediate that can be loaded into a
2885    register in a single instruction.  */
2886 bool
2887 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2888 {
2889   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2890     return 1;
2891   return aarch64_bitmask_imm (val, mode);
2892 }
2893
2894 static bool
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2896 {
2897   rtx base, offset;
2898
2899   if (GET_CODE (x) == HIGH)
2900     return true;
2901
2902   split_const (x, &base, &offset);
2903   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2904     {
2905       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2906           != SYMBOL_FORCE_TO_MEM)
2907         return true;
2908       else
2909         /* Avoid generating a 64-bit relocation in ILP32; leave
2910            to aarch64_expand_mov_immediate to handle it properly.  */
2911         return mode != ptr_mode;
2912     }
2913
2914   return aarch64_tls_referenced_p (x);
2915 }
2916
2917 /* Return true if register REGNO is a valid index register.
2918    STRICT_P is true if REG_OK_STRICT is in effect.  */
2919
2920 bool
2921 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2922 {
2923   if (!HARD_REGISTER_NUM_P (regno))
2924     {
2925       if (!strict_p)
2926         return true;
2927
2928       if (!reg_renumber)
2929         return false;
2930
2931       regno = reg_renumber[regno];
2932     }
2933   return GP_REGNUM_P (regno);
2934 }
2935
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937    STRICT_P is true if REG_OK_STRICT is in effect.  */
2938
2939 bool
2940 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2941 {
2942   if (!HARD_REGISTER_NUM_P (regno))
2943     {
2944       if (!strict_p)
2945         return true;
2946
2947       if (!reg_renumber)
2948         return false;
2949
2950       regno = reg_renumber[regno];
2951     }
2952
2953   /* The fake registers will be eliminated to either the stack or
2954      hard frame pointer, both of which are usually valid base registers.
2955      Reload deals with the cases where the eliminated form isn't valid.  */
2956   return (GP_REGNUM_P (regno)
2957           || regno == SP_REGNUM
2958           || regno == FRAME_POINTER_REGNUM
2959           || regno == ARG_POINTER_REGNUM);
2960 }
2961
2962 /* Return true if X is a valid base register for mode MODE.
2963    STRICT_P is true if REG_OK_STRICT is in effect.  */
2964
2965 static bool
2966 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2967 {
2968   if (!strict_p && GET_CODE (x) == SUBREG)
2969     x = SUBREG_REG (x);
2970
2971   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2972 }
2973
2974 /* Return true if address offset is a valid index.  If it is, fill in INFO
2975    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2976
2977 static bool
2978 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2979                         enum machine_mode mode, bool strict_p)
2980 {
2981   enum aarch64_address_type type;
2982   rtx index;
2983   int shift;
2984
2985   /* (reg:P) */
2986   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2987       && GET_MODE (x) == Pmode)
2988     {
2989       type = ADDRESS_REG_REG;
2990       index = x;
2991       shift = 0;
2992     }
2993   /* (sign_extend:DI (reg:SI)) */
2994   else if ((GET_CODE (x) == SIGN_EXTEND
2995             || GET_CODE (x) == ZERO_EXTEND)
2996            && GET_MODE (x) == DImode
2997            && GET_MODE (XEXP (x, 0)) == SImode)
2998     {
2999       type = (GET_CODE (x) == SIGN_EXTEND)
3000         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3001       index = XEXP (x, 0);
3002       shift = 0;
3003     }
3004   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005   else if (GET_CODE (x) == MULT
3006            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3007                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3008            && GET_MODE (XEXP (x, 0)) == DImode
3009            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3010            && CONST_INT_P (XEXP (x, 1)))
3011     {
3012       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3013         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3014       index = XEXP (XEXP (x, 0), 0);
3015       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3016     }
3017   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018   else if (GET_CODE (x) == ASHIFT
3019            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3020                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3021            && GET_MODE (XEXP (x, 0)) == DImode
3022            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3023            && CONST_INT_P (XEXP (x, 1)))
3024     {
3025       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3026         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3027       index = XEXP (XEXP (x, 0), 0);
3028       shift = INTVAL (XEXP (x, 1));
3029     }
3030   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031   else if ((GET_CODE (x) == SIGN_EXTRACT
3032             || GET_CODE (x) == ZERO_EXTRACT)
3033            && GET_MODE (x) == DImode
3034            && GET_CODE (XEXP (x, 0)) == MULT
3035            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3037     {
3038       type = (GET_CODE (x) == SIGN_EXTRACT)
3039         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3040       index = XEXP (XEXP (x, 0), 0);
3041       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042       if (INTVAL (XEXP (x, 1)) != 32 + shift
3043           || INTVAL (XEXP (x, 2)) != 0)
3044         shift = -1;
3045     }
3046   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047      (const_int 0xffffffff<<shift)) */
3048   else if (GET_CODE (x) == AND
3049            && GET_MODE (x) == DImode
3050            && GET_CODE (XEXP (x, 0)) == MULT
3051            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3052            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3053            && CONST_INT_P (XEXP (x, 1)))
3054     {
3055       type = ADDRESS_REG_UXTW;
3056       index = XEXP (XEXP (x, 0), 0);
3057       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3058       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3059         shift = -1;
3060     }
3061   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062   else if ((GET_CODE (x) == SIGN_EXTRACT
3063             || GET_CODE (x) == ZERO_EXTRACT)
3064            && GET_MODE (x) == DImode
3065            && GET_CODE (XEXP (x, 0)) == ASHIFT
3066            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3068     {
3069       type = (GET_CODE (x) == SIGN_EXTRACT)
3070         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3071       index = XEXP (XEXP (x, 0), 0);
3072       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073       if (INTVAL (XEXP (x, 1)) != 32 + shift
3074           || INTVAL (XEXP (x, 2)) != 0)
3075         shift = -1;
3076     }
3077   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078      (const_int 0xffffffff<<shift)) */
3079   else if (GET_CODE (x) == AND
3080            && GET_MODE (x) == DImode
3081            && GET_CODE (XEXP (x, 0)) == ASHIFT
3082            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3083            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3084            && CONST_INT_P (XEXP (x, 1)))
3085     {
3086       type = ADDRESS_REG_UXTW;
3087       index = XEXP (XEXP (x, 0), 0);
3088       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3089       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3090         shift = -1;
3091     }
3092   /* (mult:P (reg:P) (const_int scale)) */
3093   else if (GET_CODE (x) == MULT
3094            && GET_MODE (x) == Pmode
3095            && GET_MODE (XEXP (x, 0)) == Pmode
3096            && CONST_INT_P (XEXP (x, 1)))
3097     {
3098       type = ADDRESS_REG_REG;
3099       index = XEXP (x, 0);
3100       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3101     }
3102   /* (ashift:P (reg:P) (const_int shift)) */
3103   else if (GET_CODE (x) == ASHIFT
3104            && GET_MODE (x) == Pmode
3105            && GET_MODE (XEXP (x, 0)) == Pmode
3106            && CONST_INT_P (XEXP (x, 1)))
3107     {
3108       type = ADDRESS_REG_REG;
3109       index = XEXP (x, 0);
3110       shift = INTVAL (XEXP (x, 1));
3111     }
3112   else
3113     return false;
3114
3115   if (GET_CODE (index) == SUBREG)
3116     index = SUBREG_REG (index);
3117
3118   if ((shift == 0 ||
3119        (shift > 0 && shift <= 3
3120         && (1 << shift) == GET_MODE_SIZE (mode)))
3121       && REG_P (index)
3122       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3123     {
3124       info->type = type;
3125       info->offset = index;
3126       info->shift = shift;
3127       return true;
3128     }
3129
3130   return false;
3131 }
3132
3133 static inline bool
3134 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3135 {
3136   return (offset >= -64 * GET_MODE_SIZE (mode)
3137           && offset < 64 * GET_MODE_SIZE (mode)
3138           && offset % GET_MODE_SIZE (mode) == 0);
3139 }
3140
3141 static inline bool
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3143                                HOST_WIDE_INT offset)
3144 {
3145   return offset >= -256 && offset < 256;
3146 }
3147
3148 static inline bool
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3150 {
3151   return (offset >= 0
3152           && offset < 4096 * GET_MODE_SIZE (mode)
3153           && offset % GET_MODE_SIZE (mode) == 0);
3154 }
3155
3156 /* Return true if X is a valid address for machine mode MODE.  If it is,
3157    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3158    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3159
3160 static bool
3161 aarch64_classify_address (struct aarch64_address_info *info,
3162                           rtx x, enum machine_mode mode,
3163                           RTX_CODE outer_code, bool strict_p)
3164 {
3165   enum rtx_code code = GET_CODE (x);
3166   rtx op0, op1;
3167   bool allow_reg_index_p =
3168     outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3169                                || aarch64_vector_mode_supported_p (mode));
3170   /* Don't support anything other than POST_INC or REG addressing for
3171      AdvSIMD.  */
3172   if (aarch64_vect_struct_mode_p (mode)
3173       && (code != POST_INC && code != REG))
3174     return false;
3175
3176   switch (code)
3177     {
3178     case REG:
3179     case SUBREG:
3180       info->type = ADDRESS_REG_IMM;
3181       info->base = x;
3182       info->offset = const0_rtx;
3183       return aarch64_base_register_rtx_p (x, strict_p);
3184
3185     case PLUS:
3186       op0 = XEXP (x, 0);
3187       op1 = XEXP (x, 1);
3188       if (GET_MODE_SIZE (mode) != 0
3189           && CONST_INT_P (op1)
3190           && aarch64_base_register_rtx_p (op0, strict_p))
3191         {
3192           HOST_WIDE_INT offset = INTVAL (op1);
3193
3194           info->type = ADDRESS_REG_IMM;
3195           info->base = op0;
3196           info->offset = op1;
3197
3198           /* TImode and TFmode values are allowed in both pairs of X
3199              registers and individual Q registers.  The available
3200              address modes are:
3201              X,X: 7-bit signed scaled offset
3202              Q:   9-bit signed offset
3203              We conservatively require an offset representable in either mode.
3204            */
3205           if (mode == TImode || mode == TFmode)
3206             return (offset_7bit_signed_scaled_p (mode, offset)
3207                     && offset_9bit_signed_unscaled_p (mode, offset));
3208
3209           if (outer_code == PARALLEL)
3210             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3211                     && offset_7bit_signed_scaled_p (mode, offset));
3212           else
3213             return (offset_9bit_signed_unscaled_p (mode, offset)
3214                     || offset_12bit_unsigned_scaled_p (mode, offset));
3215         }
3216
3217       if (allow_reg_index_p)
3218         {
3219           /* Look for base + (scaled/extended) index register.  */
3220           if (aarch64_base_register_rtx_p (op0, strict_p)
3221               && aarch64_classify_index (info, op1, mode, strict_p))
3222             {
3223               info->base = op0;
3224               return true;
3225             }
3226           if (aarch64_base_register_rtx_p (op1, strict_p)
3227               && aarch64_classify_index (info, op0, mode, strict_p))
3228             {
3229               info->base = op1;
3230               return true;
3231             }
3232         }
3233
3234       return false;
3235
3236     case POST_INC:
3237     case POST_DEC:
3238     case PRE_INC:
3239     case PRE_DEC:
3240       info->type = ADDRESS_REG_WB;
3241       info->base = XEXP (x, 0);
3242       info->offset = NULL_RTX;
3243       return aarch64_base_register_rtx_p (info->base, strict_p);
3244
3245     case POST_MODIFY:
3246     case PRE_MODIFY:
3247       info->type = ADDRESS_REG_WB;
3248       info->base = XEXP (x, 0);
3249       if (GET_CODE (XEXP (x, 1)) == PLUS
3250           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3251           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3252           && aarch64_base_register_rtx_p (info->base, strict_p))
3253         {
3254           HOST_WIDE_INT offset;
3255           info->offset = XEXP (XEXP (x, 1), 1);
3256           offset = INTVAL (info->offset);
3257
3258           /* TImode and TFmode values are allowed in both pairs of X
3259              registers and individual Q registers.  The available
3260              address modes are:
3261              X,X: 7-bit signed scaled offset
3262              Q:   9-bit signed offset
3263              We conservatively require an offset representable in either mode.
3264            */
3265           if (mode == TImode || mode == TFmode)
3266             return (offset_7bit_signed_scaled_p (mode, offset)
3267                     && offset_9bit_signed_unscaled_p (mode, offset));
3268
3269           if (outer_code == PARALLEL)
3270             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3271                     && offset_7bit_signed_scaled_p (mode, offset));
3272           else
3273             return offset_9bit_signed_unscaled_p (mode, offset);
3274         }
3275       return false;
3276
3277     case CONST:
3278     case SYMBOL_REF:
3279     case LABEL_REF:
3280       /* load literal: pc-relative constant pool entry.  Only supported
3281          for SI mode or larger.  */
3282       info->type = ADDRESS_SYMBOLIC;
3283       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3284         {
3285           rtx sym, addend;
3286
3287           split_const (x, &sym, &addend);
3288           return (GET_CODE (sym) == LABEL_REF
3289                   || (GET_CODE (sym) == SYMBOL_REF
3290                       && CONSTANT_POOL_ADDRESS_P (sym)));
3291         }
3292       return false;
3293
3294     case LO_SUM:
3295       info->type = ADDRESS_LO_SUM;
3296       info->base = XEXP (x, 0);
3297       info->offset = XEXP (x, 1);
3298       if (allow_reg_index_p
3299           && aarch64_base_register_rtx_p (info->base, strict_p))
3300         {
3301           rtx sym, offs;
3302           split_const (info->offset, &sym, &offs);
3303           if (GET_CODE (sym) == SYMBOL_REF
3304               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3305                   == SYMBOL_SMALL_ABSOLUTE))
3306             {
3307               /* The symbol and offset must be aligned to the access size.  */
3308               unsigned int align;
3309               unsigned int ref_size;
3310
3311               if (CONSTANT_POOL_ADDRESS_P (sym))
3312                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3313               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3314                 {
3315                   tree exp = SYMBOL_REF_DECL (sym);
3316                   align = TYPE_ALIGN (TREE_TYPE (exp));
3317                   align = CONSTANT_ALIGNMENT (exp, align);
3318                 }
3319               else if (SYMBOL_REF_DECL (sym))
3320                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3321               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3322                        && SYMBOL_REF_BLOCK (sym) != NULL)
3323                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3324               else
3325                 align = BITS_PER_UNIT;
3326
3327               ref_size = GET_MODE_SIZE (mode);
3328               if (ref_size == 0)
3329                 ref_size = GET_MODE_SIZE (DImode);
3330
3331               return ((INTVAL (offs) & (ref_size - 1)) == 0
3332                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3333             }
3334         }
3335       return false;
3336
3337     default:
3338       return false;
3339     }
3340 }
3341
3342 bool
3343 aarch64_symbolic_address_p (rtx x)
3344 {
3345   rtx offset;
3346
3347   split_const (x, &x, &offset);
3348   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3349 }
3350
3351 /* Classify the base of symbolic expression X, given that X appears in
3352    context CONTEXT.  */
3353
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x,
3356                                       enum aarch64_symbol_context context)
3357 {
3358   rtx offset;
3359
3360   split_const (x, &x, &offset);
3361   return aarch64_classify_symbol (x, context);
3362 }
3363
3364
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3366    mode MODE.  */
3367 static bool
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3369 {
3370   struct aarch64_address_info addr;
3371
3372   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3373 }
3374
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3377    pair operation.  */
3378 bool
3379 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3380                               RTX_CODE outer_code, bool strict_p)
3381 {
3382   struct aarch64_address_info addr;
3383
3384   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3385 }
3386
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3388 bool
3389 aarch64_float_const_zero_rtx_p (rtx x)
3390 {
3391   REAL_VALUE_TYPE r;
3392
3393   if (GET_MODE (x) == VOIDmode)
3394     return false;
3395
3396   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3397   if (REAL_VALUE_MINUS_ZERO (r))
3398     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3399   return REAL_VALUES_EQUAL (r, dconst0);
3400 }
3401
3402 /* Return the fixed registers used for condition codes.  */
3403
3404 static bool
3405 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3406 {
3407   *p1 = CC_REGNUM;
3408   *p2 = INVALID_REGNUM;
3409   return true;
3410 }
3411
3412 enum machine_mode
3413 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3414 {
3415   /* All floating point compares return CCFP if it is an equality
3416      comparison, and CCFPE otherwise.  */
3417   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3418     {
3419       switch (code)
3420         {
3421         case EQ:
3422         case NE:
3423         case UNORDERED:
3424         case ORDERED:
3425         case UNLT:
3426         case UNLE:
3427         case UNGT:
3428         case UNGE:
3429         case UNEQ:
3430         case LTGT:
3431           return CCFPmode;
3432
3433         case LT:
3434         case LE:
3435         case GT:
3436         case GE:
3437           return CCFPEmode;
3438
3439         default:
3440           gcc_unreachable ();
3441         }
3442     }
3443
3444   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3445       && y == const0_rtx
3446       && (code == EQ || code == NE || code == LT || code == GE)
3447       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3448           || GET_CODE (x) == NEG))
3449     return CC_NZmode;
3450
3451   /* A compare with a shifted operand.  Because of canonicalization,
3452      the comparison will have to be swapped when we emit the assembly
3453      code.  */
3454   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3455       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3456       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3457           || GET_CODE (x) == LSHIFTRT
3458           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3459     return CC_SWPmode;
3460
3461   /* Similarly for a negated operand, but we can only do this for
3462      equalities.  */
3463   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3464       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3465       && (code == EQ || code == NE)
3466       && GET_CODE (x) == NEG)
3467     return CC_Zmode;
3468
3469   /* A compare of a mode narrower than SI mode against zero can be done
3470      by extending the value in the comparison.  */
3471   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3472       && y == const0_rtx)
3473     /* Only use sign-extension if we really need it.  */
3474     return ((code == GT || code == GE || code == LE || code == LT)
3475             ? CC_SESWPmode : CC_ZESWPmode);
3476
3477   /* For everything else, return CCmode.  */
3478   return CCmode;
3479 }
3480
3481 static unsigned
3482 aarch64_get_condition_code (rtx x)
3483 {
3484   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3485   enum rtx_code comp_code = GET_CODE (x);
3486
3487   if (GET_MODE_CLASS (mode) != MODE_CC)
3488     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3489
3490   switch (mode)
3491     {
3492     case CCFPmode:
3493     case CCFPEmode:
3494       switch (comp_code)
3495         {
3496         case GE: return AARCH64_GE;
3497         case GT: return AARCH64_GT;
3498         case LE: return AARCH64_LS;
3499         case LT: return AARCH64_MI;
3500         case NE: return AARCH64_NE;
3501         case EQ: return AARCH64_EQ;
3502         case ORDERED: return AARCH64_VC;
3503         case UNORDERED: return AARCH64_VS;
3504         case UNLT: return AARCH64_LT;
3505         case UNLE: return AARCH64_LE;
3506         case UNGT: return AARCH64_HI;
3507         case UNGE: return AARCH64_PL;
3508         default: gcc_unreachable ();
3509         }
3510       break;
3511
3512     case CCmode:
3513       switch (comp_code)
3514         {
3515         case NE: return AARCH64_NE;
3516         case EQ: return AARCH64_EQ;
3517         case GE: return AARCH64_GE;
3518         case GT: return AARCH64_GT;
3519         case LE: return AARCH64_LE;
3520         case LT: return AARCH64_LT;
3521         case GEU: return AARCH64_CS;
3522         case GTU: return AARCH64_HI;
3523         case LEU: return AARCH64_LS;
3524         case LTU: return AARCH64_CC;
3525         default: gcc_unreachable ();
3526         }
3527       break;
3528
3529     case CC_SWPmode:
3530     case CC_ZESWPmode:
3531     case CC_SESWPmode:
3532       switch (comp_code)
3533         {
3534         case NE: return AARCH64_NE;
3535         case EQ: return AARCH64_EQ;
3536         case GE: return AARCH64_LE;
3537         case GT: return AARCH64_LT;
3538         case LE: return AARCH64_GE;
3539         case LT: return AARCH64_GT;
3540         case GEU: return AARCH64_LS;
3541         case GTU: return AARCH64_CC;
3542         case LEU: return AARCH64_CS;
3543         case LTU: return AARCH64_HI;
3544         default: gcc_unreachable ();
3545         }
3546       break;
3547
3548     case CC_NZmode:
3549       switch (comp_code)
3550         {
3551         case NE: return AARCH64_NE;
3552         case EQ: return AARCH64_EQ;
3553         case GE: return AARCH64_PL;
3554         case LT: return AARCH64_MI;
3555         default: gcc_unreachable ();
3556         }
3557       break;
3558
3559     case CC_Zmode:
3560       switch (comp_code)
3561         {
3562         case NE: return AARCH64_NE;
3563         case EQ: return AARCH64_EQ;
3564         default: gcc_unreachable ();
3565         }
3566       break;
3567
3568     default:
3569       gcc_unreachable ();
3570       break;
3571     }
3572 }
3573
3574 static unsigned
3575 bit_count (unsigned HOST_WIDE_INT value)
3576 {
3577   unsigned count = 0;
3578
3579   while (value)
3580     {
3581       count++;
3582       value &= value - 1;
3583     }
3584
3585   return count;
3586 }
3587
3588 void
3589 aarch64_print_operand (FILE *f, rtx x, char code)
3590 {
3591   switch (code)
3592     {
3593     /* An integer or symbol address without a preceding # sign.  */
3594     case 'c':
3595       switch (GET_CODE (x))
3596         {
3597         case CONST_INT:
3598           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3599           break;
3600
3601         case SYMBOL_REF:
3602           output_addr_const (f, x);
3603           break;
3604
3605         case CONST:
3606           if (GET_CODE (XEXP (x, 0)) == PLUS
3607               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3608             {
3609               output_addr_const (f, x);
3610               break;
3611             }
3612           /* Fall through.  */
3613
3614         default:
3615           output_operand_lossage ("Unsupported operand for code '%c'", code);
3616         }
3617       break;
3618
3619     case 'e':
3620       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3621       {
3622         int n;
3623
3624         if (GET_CODE (x) != CONST_INT
3625             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3626           {
3627             output_operand_lossage ("invalid operand for '%%%c'", code);
3628             return;
3629           }
3630
3631         switch (n)
3632           {
3633           case 3:
3634             fputc ('b', f);
3635             break;
3636           case 4:
3637             fputc ('h', f);
3638             break;
3639           case 5:
3640             fputc ('w', f);
3641             break;
3642           default:
3643             output_operand_lossage ("invalid operand for '%%%c'", code);
3644             return;
3645           }
3646       }
3647       break;
3648
3649     case 'p':
3650       {
3651         int n;
3652
3653         /* Print N such that 2^N == X.  */
3654         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3655           {
3656             output_operand_lossage ("invalid operand for '%%%c'", code);
3657             return;
3658           }
3659
3660         asm_fprintf (f, "%d", n);
3661       }
3662       break;
3663
3664     case 'P':
3665       /* Print the number of non-zero bits in X (a const_int).  */
3666       if (GET_CODE (x) != CONST_INT)
3667         {
3668           output_operand_lossage ("invalid operand for '%%%c'", code);
3669           return;
3670         }
3671
3672       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3673       break;
3674
3675     case 'H':
3676       /* Print the higher numbered register of a pair (TImode) of regs.  */
3677       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3678         {
3679           output_operand_lossage ("invalid operand for '%%%c'", code);
3680           return;
3681         }
3682
3683       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3684       break;
3685
3686     case 'm':
3687       /* Print a condition (eq, ne, etc).  */
3688
3689       /* CONST_TRUE_RTX means always -- that's the default.  */
3690       if (x == const_true_rtx)
3691         return;
3692
3693       if (!COMPARISON_P (x))
3694         {
3695           output_operand_lossage ("invalid operand for '%%%c'", code);
3696           return;
3697         }
3698
3699       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3700       break;
3701
3702     case 'M':
3703       /* Print the inverse of a condition (eq <-> ne, etc).  */
3704
3705       /* CONST_TRUE_RTX means never -- that's the default.  */
3706       if (x == const_true_rtx)
3707         {
3708           fputs ("nv", f);
3709           return;
3710         }
3711
3712       if (!COMPARISON_P (x))
3713         {
3714           output_operand_lossage ("invalid operand for '%%%c'", code);
3715           return;
3716         }
3717
3718       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3719                                   (aarch64_get_condition_code (x))], f);
3720       break;
3721
3722     case 'b':
3723     case 'h':
3724     case 's':
3725     case 'd':
3726     case 'q':
3727       /* Print a scalar FP/SIMD register name.  */
3728       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3729         {
3730           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3731           return;
3732         }
3733       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3734       break;
3735
3736     case 'S':
3737     case 'T':
3738     case 'U':
3739     case 'V':
3740       /* Print the first FP/SIMD register name in a list.  */
3741       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3742         {
3743           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3744           return;
3745         }
3746       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3747       break;
3748
3749     case 'X':
3750       /* Print bottom 16 bits of integer constant in hex.  */
3751       if (GET_CODE (x) != CONST_INT)
3752         {
3753           output_operand_lossage ("invalid operand for '%%%c'", code);
3754           return;
3755         }
3756       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3757       break;
3758
3759     case 'w':
3760     case 'x':
3761       /* Print a general register name or the zero register (32-bit or
3762          64-bit).  */
3763       if (x == const0_rtx
3764           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3765         {
3766           asm_fprintf (f, "%czr", code);
3767           break;
3768         }
3769
3770       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3771         {
3772           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3773           break;
3774         }
3775
3776       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3777         {
3778           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3779           break;
3780         }
3781
3782       /* Fall through */
3783
3784     case 0:
3785       /* Print a normal operand, if it's a general register, then we
3786          assume DImode.  */
3787       if (x == NULL)
3788         {
3789           output_operand_lossage ("missing operand");
3790           return;
3791         }
3792
3793       switch (GET_CODE (x))
3794         {
3795         case REG:
3796           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3797           break;
3798
3799         case MEM:
3800           aarch64_memory_reference_mode = GET_MODE (x);
3801           output_address (XEXP (x, 0));
3802           break;
3803
3804         case LABEL_REF:
3805         case SYMBOL_REF:
3806           output_addr_const (asm_out_file, x);
3807           break;
3808
3809         case CONST_INT:
3810           asm_fprintf (f, "%wd", INTVAL (x));
3811           break;
3812
3813         case CONST_VECTOR:
3814           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3815             {
3816               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3817                                                             HOST_WIDE_INT_MIN,
3818                                                             HOST_WIDE_INT_MAX));
3819               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3820             }
3821           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3822             {
3823               fputc ('0', f);
3824             }
3825           else
3826             gcc_unreachable ();
3827           break;
3828
3829         case CONST_DOUBLE:
3830           /* CONST_DOUBLE can represent a double-width integer.
3831              In this case, the mode of x is VOIDmode.  */
3832           if (GET_MODE (x) == VOIDmode)
3833             ; /* Do Nothing.  */
3834           else if (aarch64_float_const_zero_rtx_p (x))
3835             {
3836               fputc ('0', f);
3837               break;
3838             }
3839           else if (aarch64_float_const_representable_p (x))
3840             {
3841 #define buf_size 20
3842               char float_buf[buf_size] = {'\0'};
3843               REAL_VALUE_TYPE r;
3844               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845               real_to_decimal_for_mode (float_buf, &r,
3846                                         buf_size, buf_size,
3847                                         1, GET_MODE (x));
3848               asm_fprintf (asm_out_file, "%s", float_buf);
3849               break;
3850 #undef buf_size
3851             }
3852           output_operand_lossage ("invalid constant");
3853           return;
3854         default:
3855           output_operand_lossage ("invalid operand");
3856           return;
3857         }
3858       break;
3859
3860     case 'A':
3861       if (GET_CODE (x) == HIGH)
3862         x = XEXP (x, 0);
3863
3864       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3865         {
3866         case SYMBOL_SMALL_GOT:
3867           asm_fprintf (asm_out_file, ":got:");
3868           break;
3869
3870         case SYMBOL_SMALL_TLSGD:
3871           asm_fprintf (asm_out_file, ":tlsgd:");
3872           break;
3873
3874         case SYMBOL_SMALL_TLSDESC:
3875           asm_fprintf (asm_out_file, ":tlsdesc:");
3876           break;
3877
3878         case SYMBOL_SMALL_GOTTPREL:
3879           asm_fprintf (asm_out_file, ":gottprel:");
3880           break;
3881
3882         case SYMBOL_SMALL_TPREL:
3883           asm_fprintf (asm_out_file, ":tprel:");
3884           break;
3885
3886         case SYMBOL_TINY_GOT:
3887           gcc_unreachable ();
3888           break;
3889
3890         default:
3891           break;
3892         }
3893       output_addr_const (asm_out_file, x);
3894       break;
3895
3896     case 'L':
3897       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3898         {
3899         case SYMBOL_SMALL_GOT:
3900           asm_fprintf (asm_out_file, ":lo12:");
3901           break;
3902
3903         case SYMBOL_SMALL_TLSGD:
3904           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3905           break;
3906
3907         case SYMBOL_SMALL_TLSDESC:
3908           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3909           break;
3910
3911         case SYMBOL_SMALL_GOTTPREL:
3912           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3913           break;
3914
3915         case SYMBOL_SMALL_TPREL:
3916           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3917           break;
3918
3919         case SYMBOL_TINY_GOT:
3920           asm_fprintf (asm_out_file, ":got:");
3921           break;
3922
3923         default:
3924           break;
3925         }
3926       output_addr_const (asm_out_file, x);
3927       break;
3928
3929     case 'G':
3930
3931       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3932         {
3933         case SYMBOL_SMALL_TPREL:
3934           asm_fprintf (asm_out_file, ":tprel_hi12:");
3935           break;
3936         default:
3937           break;
3938         }
3939       output_addr_const (asm_out_file, x);
3940       break;
3941
3942     default:
3943       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3944       return;
3945     }
3946 }
3947
3948 void
3949 aarch64_print_operand_address (FILE *f, rtx x)
3950 {
3951   struct aarch64_address_info addr;
3952
3953   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3954                              MEM, true))
3955     switch (addr.type)
3956       {
3957       case ADDRESS_REG_IMM:
3958         if (addr.offset == const0_rtx)
3959           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3960         else
3961           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3962                        INTVAL (addr.offset));
3963         return;
3964
3965       case ADDRESS_REG_REG:
3966         if (addr.shift == 0)
3967           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3968                        reg_names [REGNO (addr.offset)]);
3969         else
3970           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3971                        reg_names [REGNO (addr.offset)], addr.shift);
3972         return;
3973
3974       case ADDRESS_REG_UXTW:
3975         if (addr.shift == 0)
3976           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3977                        REGNO (addr.offset) - R0_REGNUM);
3978         else
3979           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3980                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3981         return;
3982
3983       case ADDRESS_REG_SXTW:
3984         if (addr.shift == 0)
3985           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3986                        REGNO (addr.offset) - R0_REGNUM);
3987         else
3988           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3989                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3990         return;
3991
3992       case ADDRESS_REG_WB:
3993         switch (GET_CODE (x))
3994           {
3995           case PRE_INC:
3996             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3997                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3998             return;
3999           case POST_INC:
4000             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4001                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4002             return;
4003           case PRE_DEC:
4004             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4005                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4006             return;
4007           case POST_DEC:
4008             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4009                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4010             return;
4011           case PRE_MODIFY:
4012             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4013                          INTVAL (addr.offset));
4014             return;
4015           case POST_MODIFY:
4016             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4017                          INTVAL (addr.offset));
4018             return;
4019           default:
4020             break;
4021           }
4022         break;
4023
4024       case ADDRESS_LO_SUM:
4025         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4026         output_addr_const (f, addr.offset);
4027         asm_fprintf (f, "]");
4028         return;
4029
4030       case ADDRESS_SYMBOLIC:
4031         break;
4032       }
4033
4034   output_addr_const (f, x);
4035 }
4036
4037 bool
4038 aarch64_label_mentioned_p (rtx x)
4039 {
4040   const char *fmt;
4041   int i;
4042
4043   if (GET_CODE (x) == LABEL_REF)
4044     return true;
4045
4046   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047      referencing instruction, but they are constant offsets, not
4048      symbols.  */
4049   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4050     return false;
4051
4052   fmt = GET_RTX_FORMAT (GET_CODE (x));
4053   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4054     {
4055       if (fmt[i] == 'E')
4056         {
4057           int j;
4058
4059           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4060             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4061               return 1;
4062         }
4063       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4064         return 1;
4065     }
4066
4067   return 0;
4068 }
4069
4070 /* Implement REGNO_REG_CLASS.  */
4071
4072 enum reg_class
4073 aarch64_regno_regclass (unsigned regno)
4074 {
4075   if (GP_REGNUM_P (regno))
4076     return CORE_REGS;
4077
4078   if (regno == SP_REGNUM)
4079     return STACK_REG;
4080
4081   if (regno == FRAME_POINTER_REGNUM
4082       || regno == ARG_POINTER_REGNUM)
4083     return POINTER_REGS;
4084
4085   if (FP_REGNUM_P (regno))
4086     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4087
4088   return NO_REGS;
4089 }
4090
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092    operand.  If we find one, push the reload and return the new rtx.  */
4093
4094 rtx
4095 aarch64_legitimize_reload_address (rtx *x_p,
4096                                    enum machine_mode mode,
4097                                    int opnum, int type,
4098                                    int ind_levels ATTRIBUTE_UNUSED)
4099 {
4100   rtx x = *x_p;
4101
4102   /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
4103   if (aarch64_vect_struct_mode_p (mode)
4104       && GET_CODE (x) == PLUS
4105       && REG_P (XEXP (x, 0))
4106       && CONST_INT_P (XEXP (x, 1)))
4107     {
4108       rtx orig_rtx = x;
4109       x = copy_rtx (x);
4110       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4111                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4112                    opnum, (enum reload_type) type);
4113       return x;
4114     }
4115
4116   /* We must recognize output that we have already generated ourselves.  */
4117   if (GET_CODE (x) == PLUS
4118       && GET_CODE (XEXP (x, 0)) == PLUS
4119       && REG_P (XEXP (XEXP (x, 0), 0))
4120       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4121       && CONST_INT_P (XEXP (x, 1)))
4122     {
4123       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4124                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4125                    opnum, (enum reload_type) type);
4126       return x;
4127     }
4128
4129   /* We wish to handle large displacements off a base register by splitting
4130      the addend across an add and the mem insn.  This can cut the number of
4131      extra insns needed from 3 to 1.  It is only useful for load/store of a
4132      single register with 12 bit offset field.  */
4133   if (GET_CODE (x) == PLUS
4134       && REG_P (XEXP (x, 0))
4135       && CONST_INT_P (XEXP (x, 1))
4136       && HARD_REGISTER_P (XEXP (x, 0))
4137       && mode != TImode
4138       && mode != TFmode
4139       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4140     {
4141       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4142       HOST_WIDE_INT low = val & 0xfff;
4143       HOST_WIDE_INT high = val - low;
4144       HOST_WIDE_INT offs;
4145       rtx cst;
4146       enum machine_mode xmode = GET_MODE (x);
4147
4148       /* In ILP32, xmode can be either DImode or SImode.  */
4149       gcc_assert (xmode == DImode || xmode == SImode);
4150
4151       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4152          BLKmode alignment.  */
4153       if (GET_MODE_SIZE (mode) == 0)
4154         return NULL_RTX;
4155
4156       offs = low % GET_MODE_SIZE (mode);
4157
4158       /* Align misaligned offset by adjusting high part to compensate.  */
4159       if (offs != 0)
4160         {
4161           if (aarch64_uimm12_shift (high + offs))
4162             {
4163               /* Align down.  */
4164               low = low - offs;
4165               high = high + offs;
4166             }
4167           else
4168             {
4169               /* Align up.  */
4170               offs = GET_MODE_SIZE (mode) - offs;
4171               low = low + offs;
4172               high = high + (low & 0x1000) - offs;
4173               low &= 0xfff;
4174             }
4175         }
4176
4177       /* Check for overflow.  */
4178       if (high + low != val)
4179         return NULL_RTX;
4180
4181       cst = GEN_INT (high);
4182       if (!aarch64_uimm12_shift (high))
4183         cst = force_const_mem (xmode, cst);
4184
4185       /* Reload high part into base reg, leaving the low part
4186          in the mem instruction.
4187          Note that replacing this gen_rtx_PLUS with plus_constant is
4188          wrong in this case because we rely on the
4189          (plus (plus reg c1) c2) structure being preserved so that
4190          XEXP (*p, 0) in push_reload below uses the correct term.  */
4191       x = gen_rtx_PLUS (xmode,
4192                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4193                         GEN_INT (low));
4194
4195       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4196                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4197                    opnum, (enum reload_type) type);
4198       return x;
4199     }
4200
4201   return NULL_RTX;
4202 }
4203
4204
4205 static reg_class_t
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4207                           reg_class_t rclass,
4208                           enum machine_mode mode,
4209                           secondary_reload_info *sri)
4210 {
4211   /* Without the TARGET_SIMD instructions we cannot move a Q register
4212      to a Q register directly.  We need a scratch.  */
4213   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4214       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4215       && reg_class_subset_p (rclass, FP_REGS))
4216     {
4217       if (mode == TFmode)
4218         sri->icode = CODE_FOR_aarch64_reload_movtf;
4219       else if (mode == TImode)
4220         sri->icode = CODE_FOR_aarch64_reload_movti;
4221       return NO_REGS;
4222     }
4223
4224   /* A TFmode or TImode memory access should be handled via an FP_REGS
4225      because AArch64 has richer addressing modes for LDR/STR instructions
4226      than LDP/STP instructions.  */
4227   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4228       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4229     return FP_REGS;
4230
4231   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4232       return CORE_REGS;
4233
4234   return NO_REGS;
4235 }
4236
4237 static bool
4238 aarch64_can_eliminate (const int from, const int to)
4239 {
4240   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4242
4243   if (frame_pointer_needed)
4244     {
4245       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246         return true;
4247       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4248         return false;
4249       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4250           && !cfun->calls_alloca)
4251         return true;
4252       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4253         return true;
4254
4255       return false;
4256     }
4257
4258   return true;
4259 }
4260
4261 HOST_WIDE_INT
4262 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4263 {
4264   HOST_WIDE_INT frame_size;
4265   HOST_WIDE_INT offset;
4266
4267   aarch64_layout_frame ();
4268   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4269                 + crtl->outgoing_args_size
4270                 + cfun->machine->saved_varargs_size);
4271
4272   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4273   offset = frame_size;
4274
4275   if (to == HARD_FRAME_POINTER_REGNUM)
4276     {
4277       if (from == ARG_POINTER_REGNUM)
4278         return offset - crtl->outgoing_args_size;
4279
4280       if (from == FRAME_POINTER_REGNUM)
4281         return cfun->machine->frame.saved_regs_size + get_frame_size ();
4282     }
4283
4284   if (to == STACK_POINTER_REGNUM)
4285     {
4286       if (from == FRAME_POINTER_REGNUM)
4287         {
4288           HOST_WIDE_INT elim = crtl->outgoing_args_size
4289             + cfun->machine->frame.saved_regs_size
4290             + get_frame_size ();
4291           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4292           return elim;
4293         }
4294     }
4295
4296   return offset;
4297 }
4298
4299
4300 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4301    previous frame.  */
4302
4303 rtx
4304 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4305 {
4306   if (count != 0)
4307     return const0_rtx;
4308   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4309 }
4310
4311
4312 static void
4313 aarch64_asm_trampoline_template (FILE *f)
4314 {
4315   if (TARGET_ILP32)
4316     {
4317       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4318       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4319     }
4320   else
4321     {
4322       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4323       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4324     }
4325   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4326   assemble_aligned_integer (4, const0_rtx);
4327   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4329 }
4330
4331 static void
4332 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4333 {
4334   rtx fnaddr, mem, a_tramp;
4335   const int tramp_code_sz = 16;
4336
4337   /* Don't need to copy the trailing D-words, we fill those in below.  */
4338   emit_block_move (m_tramp, assemble_trampoline_template (),
4339                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4340   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4341   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4342   if (GET_MODE (fnaddr) != ptr_mode)
4343     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4344   emit_move_insn (mem, fnaddr);
4345
4346   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4347   emit_move_insn (mem, chain_value);
4348
4349   /* XXX We should really define a "clear_cache" pattern and use
4350      gen_clear_cache().  */
4351   a_tramp = XEXP (m_tramp, 0);
4352   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4353                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4354                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4355                      ptr_mode);
4356 }
4357
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4360 {
4361   switch (regclass)
4362     {
4363     case CORE_REGS:
4364     case POINTER_REGS:
4365     case GENERAL_REGS:
4366     case ALL_REGS:
4367     case FP_REGS:
4368     case FP_LO_REGS:
4369       return
4370         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4371                                        (GET_MODE_SIZE (mode) + 7) / 8;
4372     case STACK_REG:
4373       return 1;
4374
4375     case NO_REGS:
4376       return 0;
4377
4378     default:
4379       break;
4380     }
4381   gcc_unreachable ();
4382 }
4383
4384 static reg_class_t
4385 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4386 {
4387   if (regclass == POINTER_REGS)
4388     return GENERAL_REGS;
4389
4390   if (regclass == STACK_REG)
4391     {
4392       if (REG_P(x)
4393           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4394           return regclass;
4395
4396       return NO_REGS;
4397     }
4398
4399   /* If it's an integer immediate that MOVI can't handle, then
4400      FP_REGS is not an option, so we return NO_REGS instead.  */
4401   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4402       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4403     return NO_REGS;
4404
4405   /* Register eliminiation can result in a request for
4406      SP+constant->FP_REGS.  We cannot support such operations which
4407      use SP as source and an FP_REG as destination, so reject out
4408      right now.  */
4409   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4410     {
4411       rtx lhs = XEXP (x, 0);
4412
4413       /* Look through a possible SUBREG introduced by ILP32.  */
4414       if (GET_CODE (lhs) == SUBREG)
4415         lhs = SUBREG_REG (lhs);
4416
4417       gcc_assert (REG_P (lhs));
4418       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4419                                       POINTER_REGS));
4420       return NO_REGS;
4421     }
4422
4423   return regclass;
4424 }
4425
4426 void
4427 aarch64_asm_output_labelref (FILE* f, const char *name)
4428 {
4429   asm_fprintf (f, "%U%s", name);
4430 }
4431
4432 static void
4433 aarch64_elf_asm_constructor (rtx symbol, int priority)
4434 {
4435   if (priority == DEFAULT_INIT_PRIORITY)
4436     default_ctor_section_asm_out_constructor (symbol, priority);
4437   else
4438     {
4439       section *s;
4440       char buf[18];
4441       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4442       s = get_section (buf, SECTION_WRITE, NULL);
4443       switch_to_section (s);
4444       assemble_align (POINTER_SIZE);
4445       assemble_aligned_integer (POINTER_BYTES, symbol);
4446     }
4447 }
4448
4449 static void
4450 aarch64_elf_asm_destructor (rtx symbol, int priority)
4451 {
4452   if (priority == DEFAULT_INIT_PRIORITY)
4453     default_dtor_section_asm_out_destructor (symbol, priority);
4454   else
4455     {
4456       section *s;
4457       char buf[18];
4458       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4459       s = get_section (buf, SECTION_WRITE, NULL);
4460       switch_to_section (s);
4461       assemble_align (POINTER_SIZE);
4462       assemble_aligned_integer (POINTER_BYTES, symbol);
4463     }
4464 }
4465
4466 const char*
4467 aarch64_output_casesi (rtx *operands)
4468 {
4469   char buf[100];
4470   char label[100];
4471   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4472   int index;
4473   static const char *const patterns[4][2] =
4474   {
4475     {
4476       "ldrb\t%w3, [%0,%w1,uxtw]",
4477       "add\t%3, %4, %w3, sxtb #2"
4478     },
4479     {
4480       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481       "add\t%3, %4, %w3, sxth #2"
4482     },
4483     {
4484       "ldr\t%w3, [%0,%w1,uxtw #2]",
4485       "add\t%3, %4, %w3, sxtw #2"
4486     },
4487     /* We assume that DImode is only generated when not optimizing and
4488        that we don't really need 64-bit address offsets.  That would
4489        imply an object file with 8GB of code in a single function!  */
4490     {
4491       "ldr\t%w3, [%0,%w1,uxtw #2]",
4492       "add\t%3, %4, %w3, sxtw #2"
4493     }
4494   };
4495
4496   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4497
4498   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4499
4500   gcc_assert (index >= 0 && index <= 3);
4501
4502   /* Need to implement table size reduction, by chaning the code below.  */
4503   output_asm_insn (patterns[index][0], operands);
4504   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4505   snprintf (buf, sizeof (buf),
4506             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4507   output_asm_insn (buf, operands);
4508   output_asm_insn (patterns[index][1], operands);
4509   output_asm_insn ("br\t%3", operands);
4510   assemble_label (asm_out_file, label);
4511   return "";
4512 }
4513
4514
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4517    operator.  */
4518
4519 int
4520 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4521 {
4522   if (shift >= 0 && shift <= 3)
4523     {
4524       int size;
4525       for (size = 8; size <= 32; size *= 2)
4526         {
4527           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4528           if (mask == bits << shift)
4529             return size;
4530         }
4531     }
4532   return 0;
4533 }
4534
4535 static bool
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4537                                    const_rtx x ATTRIBUTE_UNUSED)
4538 {
4539   /* We can't use blocks for constants when we're using a per-function
4540      constant pool.  */
4541   return false;
4542 }
4543
4544 static section *
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4546                             rtx x ATTRIBUTE_UNUSED,
4547                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4548 {
4549   /* Force all constant pool entries into the current function section.  */
4550   return function_section (current_function_decl);
4551 }
4552
4553
4554 /* Costs.  */
4555
4556 /* Helper function for rtx cost calculation.  Strip a shift expression
4557    from X.  Returns the inner operand if successful, or the original
4558    expression on failure.  */
4559 static rtx
4560 aarch64_strip_shift (rtx x)
4561 {
4562   rtx op = x;
4563
4564   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565      we can convert both to ROR during final output.  */
4566   if ((GET_CODE (op) == ASHIFT
4567        || GET_CODE (op) == ASHIFTRT
4568        || GET_CODE (op) == LSHIFTRT
4569        || GET_CODE (op) == ROTATERT
4570        || GET_CODE (op) == ROTATE)
4571       && CONST_INT_P (XEXP (op, 1)))
4572     return XEXP (op, 0);
4573
4574   if (GET_CODE (op) == MULT
4575       && CONST_INT_P (XEXP (op, 1))
4576       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4577     return XEXP (op, 0);
4578
4579   return x;
4580 }
4581
4582 /* Helper function for rtx cost calculation.  Strip an extend
4583    expression from X.  Returns the inner operand if successful, or the
4584    original expression on failure.  We deal with a number of possible
4585    canonicalization variations here.  */
4586 static rtx
4587 aarch64_strip_extend (rtx x)
4588 {
4589   rtx op = x;
4590
4591   /* Zero and sign extraction of a widened value.  */
4592   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4593       && XEXP (op, 2) == const0_rtx
4594       && GET_CODE (XEXP (op, 0)) == MULT
4595       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4596                                          XEXP (op, 1)))
4597     return XEXP (XEXP (op, 0), 0);
4598
4599   /* It can also be represented (for zero-extend) as an AND with an
4600      immediate.  */
4601   if (GET_CODE (op) == AND
4602       && GET_CODE (XEXP (op, 0)) == MULT
4603       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4604       && CONST_INT_P (XEXP (op, 1))
4605       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4606                            INTVAL (XEXP (op, 1))) != 0)
4607     return XEXP (XEXP (op, 0), 0);
4608
4609   /* Now handle extended register, as this may also have an optional
4610      left shift by 1..4.  */
4611   if (GET_CODE (op) == ASHIFT
4612       && CONST_INT_P (XEXP (op, 1))
4613       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4614     op = XEXP (op, 0);
4615
4616   if (GET_CODE (op) == ZERO_EXTEND
4617       || GET_CODE (op) == SIGN_EXTEND)
4618     op = XEXP (op, 0);
4619
4620   if (op != x)
4621     return op;
4622
4623   return x;
4624 }
4625
4626 /* Helper function for rtx cost calculation.  Calculate the cost of
4627    a MULT, which may be part of a multiply-accumulate rtx.  Return
4628    the calculated cost of the expression, recursing manually in to
4629    operands where needed.  */
4630
4631 static int
4632 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4633 {
4634   rtx op0, op1;
4635   const struct cpu_cost_table *extra_cost
4636     = aarch64_tune_params->insn_extra_cost;
4637   int cost = 0;
4638   bool maybe_fma = (outer == PLUS || outer == MINUS);
4639   enum machine_mode mode = GET_MODE (x);
4640
4641   gcc_checking_assert (code == MULT);
4642
4643   op0 = XEXP (x, 0);
4644   op1 = XEXP (x, 1);
4645
4646   if (VECTOR_MODE_P (mode))
4647     mode = GET_MODE_INNER (mode);
4648
4649   /* Integer multiply/fma.  */
4650   if (GET_MODE_CLASS (mode) == MODE_INT)
4651     {
4652       /* The multiply will be canonicalized as a shift, cost it as such.  */
4653       if (CONST_INT_P (op1)
4654           && exact_log2 (INTVAL (op1)) > 0)
4655         {
4656           if (speed)
4657             {
4658               if (maybe_fma)
4659                 /* ADD (shifted register).  */
4660                 cost += extra_cost->alu.arith_shift;
4661               else
4662                 /* LSL (immediate).  */
4663                 cost += extra_cost->alu.shift;
4664             }
4665
4666           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4667
4668           return cost;
4669         }
4670
4671       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4672       if ((GET_CODE (op0) == ZERO_EXTEND
4673            && GET_CODE (op1) == ZERO_EXTEND)
4674           || (GET_CODE (op0) == SIGN_EXTEND
4675               && GET_CODE (op1) == SIGN_EXTEND))
4676         {
4677           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4678                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4679
4680           if (speed)
4681             {
4682               if (maybe_fma)
4683                 /* MADD/SMADDL/UMADDL.  */
4684                 cost += extra_cost->mult[0].extend_add;
4685               else
4686                 /* MUL/SMULL/UMULL.  */
4687                 cost += extra_cost->mult[0].extend;
4688             }
4689
4690           return cost;
4691         }
4692
4693       /* This is either an integer multiply or an FMA.  In both cases
4694          we want to recurse and cost the operands.  */
4695       cost += rtx_cost (op0, MULT, 0, speed)
4696               + rtx_cost (op1, MULT, 1, speed);
4697
4698       if (speed)
4699         {
4700           if (maybe_fma)
4701             /* MADD.  */
4702             cost += extra_cost->mult[mode == DImode].add;
4703           else
4704             /* MUL.  */
4705             cost += extra_cost->mult[mode == DImode].simple;
4706         }
4707
4708       return cost;
4709     }
4710   else
4711     {
4712       if (speed)
4713         {
4714           /* Floating-point FMA/FMUL can also support negations of the
4715              operands.  */
4716           if (GET_CODE (op0) == NEG)
4717             op0 = XEXP (op0, 0);
4718           if (GET_CODE (op1) == NEG)
4719             op1 = XEXP (op1, 0);
4720
4721           if (maybe_fma)
4722             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4723             cost += extra_cost->fp[mode == DFmode].fma;
4724           else
4725             /* FMUL/FNMUL.  */
4726             cost += extra_cost->fp[mode == DFmode].mult;
4727         }
4728
4729       cost += rtx_cost (op0, MULT, 0, speed)
4730               + rtx_cost (op1, MULT, 1, speed);
4731       return cost;
4732     }
4733 }
4734
4735 static int
4736 aarch64_address_cost (rtx x,
4737                       enum machine_mode mode,
4738                       addr_space_t as ATTRIBUTE_UNUSED,
4739                       bool speed)
4740 {
4741   enum rtx_code c = GET_CODE (x);
4742   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4743   struct aarch64_address_info info;
4744   int cost = 0;
4745   info.shift = 0;
4746
4747   if (!aarch64_classify_address (&info, x, mode, c, false))
4748     {
4749       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4750         {
4751           /* This is a CONST or SYMBOL ref which will be split
4752              in a different way depending on the code model in use.
4753              Cost it through the generic infrastructure.  */
4754           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4755           /* Divide through by the cost of one instruction to
4756              bring it to the same units as the address costs.  */
4757           cost_symbol_ref /= COSTS_N_INSNS (1);
4758           /* The cost is then the cost of preparing the address,
4759              followed by an immediate (possibly 0) offset.  */
4760           return cost_symbol_ref + addr_cost->imm_offset;
4761         }
4762       else
4763         {
4764           /* This is most likely a jump table from a case
4765              statement.  */
4766           return addr_cost->register_offset;
4767         }
4768     }
4769
4770   switch (info.type)
4771     {
4772       case ADDRESS_LO_SUM:
4773       case ADDRESS_SYMBOLIC:
4774       case ADDRESS_REG_IMM:
4775         cost += addr_cost->imm_offset;
4776         break;
4777
4778       case ADDRESS_REG_WB:
4779         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4780           cost += addr_cost->pre_modify;
4781         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4782           cost += addr_cost->post_modify;
4783         else
4784           gcc_unreachable ();
4785
4786         break;
4787
4788       case ADDRESS_REG_REG:
4789         cost += addr_cost->register_offset;
4790         break;
4791
4792       case ADDRESS_REG_UXTW:
4793       case ADDRESS_REG_SXTW:
4794         cost += addr_cost->register_extend;
4795         break;
4796
4797       default:
4798         gcc_unreachable ();
4799     }
4800
4801
4802   if (info.shift > 0)
4803     {
4804       /* For the sake of calculating the cost of the shifted register
4805          component, we can treat same sized modes in the same way.  */
4806       switch (GET_MODE_BITSIZE (mode))
4807         {
4808           case 16:
4809             cost += addr_cost->addr_scale_costs.hi;
4810             break;
4811
4812           case 32:
4813             cost += addr_cost->addr_scale_costs.si;
4814             break;
4815
4816           case 64:
4817             cost += addr_cost->addr_scale_costs.di;
4818             break;
4819
4820           /* We can't tell, or this is a 128-bit vector.  */
4821           default:
4822             cost += addr_cost->addr_scale_costs.ti;
4823             break;
4824         }
4825     }
4826
4827   return cost;
4828 }
4829
4830 /* Return true if the RTX X in mode MODE is a zero or sign extract
4831    usable in an ADD or SUB (extended register) instruction.  */
4832 static bool
4833 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4834 {
4835   /* Catch add with a sign extract.
4836      This is add_<optab><mode>_multp2.  */
4837   if (GET_CODE (x) == SIGN_EXTRACT
4838       || GET_CODE (x) == ZERO_EXTRACT)
4839     {
4840       rtx op0 = XEXP (x, 0);
4841       rtx op1 = XEXP (x, 1);
4842       rtx op2 = XEXP (x, 2);
4843
4844       if (GET_CODE (op0) == MULT
4845           && CONST_INT_P (op1)
4846           && op2 == const0_rtx
4847           && CONST_INT_P (XEXP (op0, 1))
4848           && aarch64_is_extend_from_extract (mode,
4849                                              XEXP (op0, 1),
4850                                              op1))
4851         {
4852           return true;
4853         }
4854     }
4855
4856   return false;
4857 }
4858
4859 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4860    storing it in *COST.  Result is true if the total cost of the operation
4861    has now been calculated.  */
4862 static bool
4863 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4864 {
4865   rtx inner;
4866   rtx comparator;
4867   enum rtx_code cmpcode;
4868
4869   if (COMPARISON_P (op0))
4870     {
4871       inner = XEXP (op0, 0);
4872       comparator = XEXP (op0, 1);
4873       cmpcode = GET_CODE (op0);
4874     }
4875   else
4876     {
4877       inner = op0;
4878       comparator = const0_rtx;
4879       cmpcode = NE;
4880     }
4881
4882   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4883     {
4884       /* Conditional branch.  */
4885       if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4886         return true;
4887       else
4888         {
4889           if (cmpcode == NE || cmpcode == EQ)
4890             {
4891               if (comparator == const0_rtx)
4892                 {
4893                   /* TBZ/TBNZ/CBZ/CBNZ.  */
4894                   if (GET_CODE (inner) == ZERO_EXTRACT)
4895                     /* TBZ/TBNZ.  */
4896                     *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4897                                        0, speed);
4898                 else
4899                   /* CBZ/CBNZ.  */
4900                   *cost += rtx_cost (inner, cmpcode, 0, speed);
4901
4902                 return true;
4903               }
4904             }
4905           else if (cmpcode == LT || cmpcode == GE)
4906             {
4907               /* TBZ/TBNZ.  */
4908               if (comparator == const0_rtx)
4909                 return true;
4910             }
4911         }
4912     }
4913   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4914     {
4915       /* It's a conditional operation based on the status flags,
4916          so it must be some flavor of CSEL.  */
4917
4918       /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
4919       if (GET_CODE (op1) == NEG
4920           || GET_CODE (op1) == NOT
4921           || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4922         op1 = XEXP (op1, 0);
4923
4924       *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4925       *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4926       return true;
4927     }
4928
4929   /* We don't know what this is, cost all operands.  */
4930   return false;
4931 }
4932
4933 /* Calculate the cost of calculating X, storing it in *COST.  Result
4934    is true if the total cost of the operation has now been calculated.  */
4935 static bool
4936 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4937                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4938 {
4939   rtx op0, op1, op2;
4940   const struct cpu_cost_table *extra_cost
4941     = aarch64_tune_params->insn_extra_cost;
4942   enum machine_mode mode = GET_MODE (x);
4943
4944   /* By default, assume that everything has equivalent cost to the
4945      cheapest instruction.  Any additional costs are applied as a delta
4946      above this default.  */
4947   *cost = COSTS_N_INSNS (1);
4948
4949   /* TODO: The cost infrastructure currently does not handle
4950      vector operations.  Assume that all vector operations
4951      are equally expensive.  */
4952   if (VECTOR_MODE_P (mode))
4953     {
4954       if (speed)
4955         *cost += extra_cost->vect.alu;
4956       return true;
4957     }
4958
4959   switch (code)
4960     {
4961     case SET:
4962       /* The cost depends entirely on the operands to SET.  */
4963       *cost = 0;
4964       op0 = SET_DEST (x);
4965       op1 = SET_SRC (x);
4966
4967       switch (GET_CODE (op0))
4968         {
4969         case MEM:
4970           if (speed)
4971             {
4972               rtx address = XEXP (op0, 0);
4973               if (GET_MODE_CLASS (mode) == MODE_INT)
4974                 *cost += extra_cost->ldst.store;
4975               else if (mode == SFmode)
4976                 *cost += extra_cost->ldst.storef;
4977               else if (mode == DFmode)
4978                 *cost += extra_cost->ldst.stored;
4979
4980               *cost +=
4981                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4982                                                      0, speed));
4983             }
4984
4985           *cost += rtx_cost (op1, SET, 1, speed);
4986           return true;
4987
4988         case SUBREG:
4989           if (! REG_P (SUBREG_REG (op0)))
4990             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4991
4992           /* Fall through.  */
4993         case REG:
4994           /* const0_rtx is in general free, but we will use an
4995              instruction to set a register to 0.  */
4996           if (REG_P (op1) || op1 == const0_rtx)
4997             {
4998               /* The cost is 1 per register copied.  */
4999               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5000                               / UNITS_PER_WORD;
5001               *cost = COSTS_N_INSNS (n_minus_1 + 1);
5002             }
5003           else
5004             /* Cost is just the cost of the RHS of the set.  */
5005             *cost += rtx_cost (op1, SET, 1, speed);
5006           return true;
5007
5008         case ZERO_EXTRACT:
5009         case SIGN_EXTRACT:
5010           /* Bit-field insertion.  Strip any redundant widening of
5011              the RHS to meet the width of the target.  */
5012           if (GET_CODE (op1) == SUBREG)
5013             op1 = SUBREG_REG (op1);
5014           if ((GET_CODE (op1) == ZERO_EXTEND
5015                || GET_CODE (op1) == SIGN_EXTEND)
5016               && GET_CODE (XEXP (op0, 1)) == CONST_INT
5017               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5018                   >= INTVAL (XEXP (op0, 1))))
5019             op1 = XEXP (op1, 0);
5020
5021           if (CONST_INT_P (op1))
5022             {
5023               /* MOV immediate is assumed to always be cheap.  */
5024               *cost = COSTS_N_INSNS (1);
5025             }
5026           else
5027             {
5028               /* BFM.  */
5029               if (speed)
5030                 *cost += extra_cost->alu.bfi;
5031               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5032             }
5033
5034           return true;
5035
5036         default:
5037           /* We can't make sense of this, assume default cost.  */
5038           *cost = COSTS_N_INSNS (1);
5039           break;
5040         }
5041       return false;
5042
5043     case CONST_INT:
5044       /* If an instruction can incorporate a constant within the
5045          instruction, the instruction's expression avoids calling
5046          rtx_cost() on the constant.  If rtx_cost() is called on a
5047          constant, then it is usually because the constant must be
5048          moved into a register by one or more instructions.
5049
5050          The exception is constant 0, which can be expressed
5051          as XZR/WZR and is therefore free.  The exception to this is
5052          if we have (set (reg) (const0_rtx)) in which case we must cost
5053          the move.  However, we can catch that when we cost the SET, so
5054          we don't need to consider that here.  */
5055       if (x == const0_rtx)
5056         *cost = 0;
5057       else
5058         {
5059           /* To an approximation, building any other constant is
5060              proportionally expensive to the number of instructions
5061              required to build that constant.  This is true whether we
5062              are compiling for SPEED or otherwise.  */
5063           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5064                                                          INTVAL (x),
5065                                                          false));
5066         }
5067       return true;
5068
5069     case CONST_DOUBLE:
5070       if (speed)
5071         {
5072           /* mov[df,sf]_aarch64.  */
5073           if (aarch64_float_const_representable_p (x))
5074             /* FMOV (scalar immediate).  */
5075             *cost += extra_cost->fp[mode == DFmode].fpconst;
5076           else if (!aarch64_float_const_zero_rtx_p (x))
5077             {
5078               /* This will be a load from memory.  */
5079               if (mode == DFmode)
5080                 *cost += extra_cost->ldst.loadd;
5081               else
5082                 *cost += extra_cost->ldst.loadf;
5083             }
5084           else
5085             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5086                or MOV v0.s[0], wzr - neither of which are modeled by the
5087                cost tables.  Just use the default cost.  */
5088             {
5089             }
5090         }
5091
5092       return true;
5093
5094     case MEM:
5095       if (speed)
5096         {
5097           /* For loads we want the base cost of a load, plus an
5098              approximation for the additional cost of the addressing
5099              mode.  */
5100           rtx address = XEXP (x, 0);
5101           if (GET_MODE_CLASS (mode) == MODE_INT)
5102             *cost += extra_cost->ldst.load;
5103           else if (mode == SFmode)
5104             *cost += extra_cost->ldst.loadf;
5105           else if (mode == DFmode)
5106             *cost += extra_cost->ldst.loadd;
5107
5108           *cost +=
5109                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5110                                                      0, speed));
5111         }
5112
5113       return true;
5114
5115     case NEG:
5116       op0 = XEXP (x, 0);
5117
5118       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5119        {
5120           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5121               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5122             {
5123               /* CSETM.  */
5124               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5125               return true;
5126             }
5127
5128           /* Cost this as SUB wzr, X.  */
5129           op0 = CONST0_RTX (GET_MODE (x));
5130           op1 = XEXP (x, 0);
5131           goto cost_minus;
5132         }
5133
5134       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5135         {
5136           /* Support (neg(fma...)) as a single instruction only if
5137              sign of zeros is unimportant.  This matches the decision
5138              making in aarch64.md.  */
5139           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5140             {
5141               /* FNMADD.  */
5142               *cost = rtx_cost (op0, NEG, 0, speed);
5143               return true;
5144             }
5145           if (speed)
5146             /* FNEG.  */
5147             *cost += extra_cost->fp[mode == DFmode].neg;
5148           return false;
5149         }
5150
5151       return false;
5152
5153     case COMPARE:
5154       op0 = XEXP (x, 0);
5155       op1 = XEXP (x, 1);
5156
5157       if (op1 == const0_rtx
5158           && GET_CODE (op0) == AND)
5159         {
5160           x = op0;
5161           goto cost_logic;
5162         }
5163
5164       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5165         {
5166           /* TODO: A write to the CC flags possibly costs extra, this
5167              needs encoding in the cost tables.  */
5168
5169           /* CC_ZESWPmode supports zero extend for free.  */
5170           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5171             op0 = XEXP (op0, 0);
5172
5173           /* ANDS.  */
5174           if (GET_CODE (op0) == AND)
5175             {
5176               x = op0;
5177               goto cost_logic;
5178             }
5179
5180           if (GET_CODE (op0) == PLUS)
5181             {
5182               /* ADDS (and CMN alias).  */
5183               x = op0;
5184               goto cost_plus;
5185             }
5186
5187           if (GET_CODE (op0) == MINUS)
5188             {
5189               /* SUBS.  */
5190               x = op0;
5191               goto cost_minus;
5192             }
5193
5194           if (GET_CODE (op1) == NEG)
5195             {
5196               /* CMN.  */
5197               if (speed)
5198                 *cost += extra_cost->alu.arith;
5199
5200               *cost += rtx_cost (op0, COMPARE, 0, speed);
5201               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5202               return true;
5203             }
5204
5205           /* CMP.
5206
5207              Compare can freely swap the order of operands, and
5208              canonicalization puts the more complex operation first.
5209              But the integer MINUS logic expects the shift/extend
5210              operation in op1.  */
5211           if (! (REG_P (op0)
5212                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5213           {
5214             op0 = XEXP (x, 1);
5215             op1 = XEXP (x, 0);
5216           }
5217           goto cost_minus;
5218         }
5219
5220       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5221         {
5222           /* FCMP.  */
5223           if (speed)
5224             *cost += extra_cost->fp[mode == DFmode].compare;
5225
5226           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5227             {
5228               /* FCMP supports constant 0.0 for no extra cost. */
5229               return true;
5230             }
5231           return false;
5232         }
5233
5234       return false;
5235
5236     case MINUS:
5237       {
5238         op0 = XEXP (x, 0);
5239         op1 = XEXP (x, 1);
5240
5241 cost_minus:
5242         /* Detect valid immediates.  */
5243         if ((GET_MODE_CLASS (mode) == MODE_INT
5244              || (GET_MODE_CLASS (mode) == MODE_CC
5245                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5246             && CONST_INT_P (op1)
5247             && aarch64_uimm12_shift (INTVAL (op1)))
5248           {
5249             *cost += rtx_cost (op0, MINUS, 0, speed);
5250
5251             if (speed)
5252               /* SUB(S) (immediate).  */
5253               *cost += extra_cost->alu.arith;
5254             return true;
5255
5256           }
5257
5258         /* Look for SUB (extended register).  */
5259         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5260           {
5261             if (speed)
5262               *cost += extra_cost->alu.arith_shift;
5263
5264             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5265                                (enum rtx_code) GET_CODE (op1),
5266                                0, speed);
5267             return true;
5268           }
5269
5270         rtx new_op1 = aarch64_strip_extend (op1);
5271
5272         /* Cost this as an FMA-alike operation.  */
5273         if ((GET_CODE (new_op1) == MULT
5274              || GET_CODE (new_op1) == ASHIFT)
5275             && code != COMPARE)
5276           {
5277             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5278                                             (enum rtx_code) code,
5279                                             speed);
5280             *cost += rtx_cost (op0, MINUS, 0, speed);
5281             return true;
5282           }
5283
5284         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5285
5286         if (speed)
5287           {
5288             if (GET_MODE_CLASS (mode) == MODE_INT)
5289               /* SUB(S).  */
5290               *cost += extra_cost->alu.arith;
5291             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5292               /* FSUB.  */
5293               *cost += extra_cost->fp[mode == DFmode].addsub;
5294           }
5295         return true;
5296       }
5297
5298     case PLUS:
5299       {
5300         rtx new_op0;
5301
5302         op0 = XEXP (x, 0);
5303         op1 = XEXP (x, 1);
5304
5305 cost_plus:
5306         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5307             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5308           {
5309             /* CSINC.  */
5310             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5311             *cost += rtx_cost (op1, PLUS, 1, speed);
5312             return true;
5313           }
5314
5315         if (GET_MODE_CLASS (mode) == MODE_INT
5316             && CONST_INT_P (op1)
5317             && aarch64_uimm12_shift (INTVAL (op1)))
5318           {
5319             *cost += rtx_cost (op0, PLUS, 0, speed);
5320
5321             if (speed)
5322               /* ADD (immediate).  */
5323               *cost += extra_cost->alu.arith;
5324             return true;
5325           }
5326
5327         /* Look for ADD (extended register).  */
5328         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5329           {
5330             if (speed)
5331               *cost += extra_cost->alu.arith_shift;
5332
5333             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5334                                (enum rtx_code) GET_CODE (op0),
5335                                0, speed);
5336             return true;
5337           }
5338
5339         /* Strip any extend, leave shifts behind as we will
5340            cost them through mult_cost.  */
5341         new_op0 = aarch64_strip_extend (op0);
5342
5343         if (GET_CODE (new_op0) == MULT
5344             || GET_CODE (new_op0) == ASHIFT)
5345           {
5346             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5347                                             speed);
5348             *cost += rtx_cost (op1, PLUS, 1, speed);
5349             return true;
5350           }
5351
5352         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5353                   + rtx_cost (op1, PLUS, 1, speed));
5354
5355         if (speed)
5356           {
5357             if (GET_MODE_CLASS (mode) == MODE_INT)
5358               /* ADD.  */
5359               *cost += extra_cost->alu.arith;
5360             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5361               /* FADD.  */
5362               *cost += extra_cost->fp[mode == DFmode].addsub;
5363           }
5364         return true;
5365       }
5366
5367     case BSWAP:
5368       *cost = COSTS_N_INSNS (1);
5369
5370       if (speed)
5371         *cost += extra_cost->alu.rev;
5372
5373       return false;
5374
5375     case IOR:
5376       if (aarch_rev16_p (x))
5377         {
5378           *cost = COSTS_N_INSNS (1);
5379
5380           if (speed)
5381             *cost += extra_cost->alu.rev;
5382
5383           return true;
5384         }
5385     /* Fall through.  */
5386     case XOR:
5387     case AND:
5388     cost_logic:
5389       op0 = XEXP (x, 0);
5390       op1 = XEXP (x, 1);
5391
5392       if (code == AND
5393           && GET_CODE (op0) == MULT
5394           && CONST_INT_P (XEXP (op0, 1))
5395           && CONST_INT_P (op1)
5396           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5397                                INTVAL (op1)) != 0)
5398         {
5399           /* This is a UBFM/SBFM.  */
5400           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5401           if (speed)
5402             *cost += extra_cost->alu.bfx;
5403           return true;
5404         }
5405
5406       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5407         {
5408           /* We possibly get the immediate for free, this is not
5409              modelled.  */
5410           if (CONST_INT_P (op1)
5411               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5412             {
5413               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5414
5415               if (speed)
5416                 *cost += extra_cost->alu.logical;
5417
5418               return true;
5419             }
5420           else
5421             {
5422               rtx new_op0 = op0;
5423
5424               /* Handle ORN, EON, or BIC.  */
5425               if (GET_CODE (op0) == NOT)
5426                 op0 = XEXP (op0, 0);
5427
5428               new_op0 = aarch64_strip_shift (op0);
5429
5430               /* If we had a shift on op0 then this is a logical-shift-
5431                  by-register/immediate operation.  Otherwise, this is just
5432                  a logical operation.  */
5433               if (speed)
5434                 {
5435                   if (new_op0 != op0)
5436                     {
5437                       /* Shift by immediate.  */
5438                       if (CONST_INT_P (XEXP (op0, 1)))
5439                         *cost += extra_cost->alu.log_shift;
5440                       else
5441                         *cost += extra_cost->alu.log_shift_reg;
5442                     }
5443                   else
5444                     *cost += extra_cost->alu.logical;
5445                 }
5446
5447               /* In both cases we want to cost both operands.  */
5448               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5449                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5450
5451               return true;
5452             }
5453         }
5454       return false;
5455
5456     case NOT:
5457       /* MVN.  */
5458       if (speed)
5459         *cost += extra_cost->alu.logical;
5460
5461       /* The logical instruction could have the shifted register form,
5462          but the cost is the same if the shift is processed as a separate
5463          instruction, so we don't bother with it here.  */
5464       return false;
5465
5466     case ZERO_EXTEND:
5467
5468       op0 = XEXP (x, 0);
5469       /* If a value is written in SI mode, then zero extended to DI
5470          mode, the operation will in general be free as a write to
5471          a 'w' register implicitly zeroes the upper bits of an 'x'
5472          register.  However, if this is
5473
5474            (set (reg) (zero_extend (reg)))
5475
5476          we must cost the explicit register move.  */
5477       if (mode == DImode
5478           && GET_MODE (op0) == SImode
5479           && outer == SET)
5480         {
5481           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5482
5483           if (!op_cost && speed)
5484             /* MOV.  */
5485             *cost += extra_cost->alu.extend;
5486           else
5487             /* Free, the cost is that of the SI mode operation.  */
5488             *cost = op_cost;
5489
5490           return true;
5491         }
5492       else if (MEM_P (XEXP (x, 0)))
5493         {
5494           /* All loads can zero extend to any size for free.  */
5495           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5496           return true;
5497         }
5498
5499       /* UXTB/UXTH.  */
5500       if (speed)
5501         *cost += extra_cost->alu.extend;
5502
5503       return false;
5504
5505     case SIGN_EXTEND:
5506       if (MEM_P (XEXP (x, 0)))
5507         {
5508           /* LDRSH.  */
5509           if (speed)
5510             {
5511               rtx address = XEXP (XEXP (x, 0), 0);
5512               *cost += extra_cost->ldst.load_sign_extend;
5513
5514               *cost +=
5515                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5516                                                      0, speed));
5517             }
5518           return true;
5519         }
5520
5521       if (speed)
5522         *cost += extra_cost->alu.extend;
5523       return false;
5524
5525     case ASHIFT:
5526       op0 = XEXP (x, 0);
5527       op1 = XEXP (x, 1);
5528
5529       if (CONST_INT_P (op1))
5530         {
5531           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
5532              aliases.  */
5533           if (speed)
5534             *cost += extra_cost->alu.shift;
5535
5536           /* We can incorporate zero/sign extend for free.  */
5537           if (GET_CODE (op0) == ZERO_EXTEND
5538               || GET_CODE (op0) == SIGN_EXTEND)
5539             op0 = XEXP (op0, 0);
5540
5541           *cost += rtx_cost (op0, ASHIFT, 0, speed);
5542           return true;
5543         }
5544       else
5545         {
5546           /* LSLV.  */
5547           if (speed)
5548             *cost += extra_cost->alu.shift_reg;
5549
5550           return false;  /* All arguments need to be in registers.  */
5551         }
5552
5553     case ROTATE:
5554     case ROTATERT:
5555     case LSHIFTRT:
5556     case ASHIFTRT:
5557       op0 = XEXP (x, 0);
5558       op1 = XEXP (x, 1);
5559
5560       if (CONST_INT_P (op1))
5561         {
5562           /* ASR (immediate) and friends.  */
5563           if (speed)
5564             *cost += extra_cost->alu.shift;
5565
5566           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5567           return true;
5568         }
5569       else
5570         {
5571
5572           /* ASR (register) and friends.  */
5573           if (speed)
5574             *cost += extra_cost->alu.shift_reg;
5575
5576           return false;  /* All arguments need to be in registers.  */
5577         }
5578
5579     case SYMBOL_REF:
5580
5581       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5582         {
5583           /* LDR.  */
5584           if (speed)
5585             *cost += extra_cost->ldst.load;
5586         }
5587       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5588                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5589         {
5590           /* ADRP, followed by ADD.  */
5591           *cost += COSTS_N_INSNS (1);
5592           if (speed)
5593             *cost += 2 * extra_cost->alu.arith;
5594         }
5595       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5596                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5597         {
5598           /* ADR.  */
5599           if (speed)
5600             *cost += extra_cost->alu.arith;
5601         }
5602
5603       if (flag_pic)
5604         {
5605           /* One extra load instruction, after accessing the GOT.  */
5606           *cost += COSTS_N_INSNS (1);
5607           if (speed)
5608             *cost += extra_cost->ldst.load;
5609         }
5610       return true;
5611
5612     case HIGH:
5613     case LO_SUM:
5614       /* ADRP/ADD (immediate).  */
5615       if (speed)
5616         *cost += extra_cost->alu.arith;
5617       return true;
5618
5619     case ZERO_EXTRACT:
5620     case SIGN_EXTRACT:
5621       /* UBFX/SBFX.  */
5622       if (speed)
5623         *cost += extra_cost->alu.bfx;
5624
5625       /* We can trust that the immediates used will be correct (there
5626          are no by-register forms), so we need only cost op0.  */
5627       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5628       return true;
5629
5630     case MULT:
5631       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5632       /* aarch64_rtx_mult_cost always handles recursion to its
5633          operands.  */
5634       return true;
5635
5636     case MOD:
5637     case UMOD:
5638       if (speed)
5639         {
5640           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5641             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5642                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5643           else if (GET_MODE (x) == DFmode)
5644             *cost += (extra_cost->fp[1].mult
5645                       + extra_cost->fp[1].div);
5646           else if (GET_MODE (x) == SFmode)
5647             *cost += (extra_cost->fp[0].mult
5648                       + extra_cost->fp[0].div);
5649         }
5650       return false;  /* All arguments need to be in registers.  */
5651
5652     case DIV:
5653     case UDIV:
5654     case SQRT:
5655       if (speed)
5656         {
5657           if (GET_MODE_CLASS (mode) == MODE_INT)
5658             /* There is no integer SQRT, so only DIV and UDIV can get
5659                here.  */
5660             *cost += extra_cost->mult[mode == DImode].idiv;
5661           else
5662             *cost += extra_cost->fp[mode == DFmode].div;
5663         }
5664       return false;  /* All arguments need to be in registers.  */
5665
5666     case IF_THEN_ELSE:
5667       return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5668                                          XEXP (x, 2), cost, speed);
5669
5670     case EQ:
5671     case NE:
5672     case GT:
5673     case GTU:
5674     case LT:
5675     case LTU:
5676     case GE:
5677     case GEU:
5678     case LE:
5679     case LEU:
5680
5681       return false; /* All arguments must be in registers.  */
5682
5683     case FMA:
5684       op0 = XEXP (x, 0);
5685       op1 = XEXP (x, 1);
5686       op2 = XEXP (x, 2);
5687
5688       if (speed)
5689         *cost += extra_cost->fp[mode == DFmode].fma;
5690
5691       /* FMSUB, FNMADD, and FNMSUB are free.  */
5692       if (GET_CODE (op0) == NEG)
5693         op0 = XEXP (op0, 0);
5694
5695       if (GET_CODE (op2) == NEG)
5696         op2 = XEXP (op2, 0);
5697
5698       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5699          and the by-element operand as operand 0.  */
5700       if (GET_CODE (op1) == NEG)
5701         op1 = XEXP (op1, 0);
5702
5703       /* Catch vector-by-element operations.  The by-element operand can
5704          either be (vec_duplicate (vec_select (x))) or just
5705          (vec_select (x)), depending on whether we are multiplying by
5706          a vector or a scalar.
5707
5708          Canonicalization is not very good in these cases, FMA4 will put the
5709          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
5710       if (GET_CODE (op0) == VEC_DUPLICATE)
5711         op0 = XEXP (op0, 0);
5712       else if (GET_CODE (op1) == VEC_DUPLICATE)
5713         op1 = XEXP (op1, 0);
5714
5715       if (GET_CODE (op0) == VEC_SELECT)
5716         op0 = XEXP (op0, 0);
5717       else if (GET_CODE (op1) == VEC_SELECT)
5718         op1 = XEXP (op1, 0);
5719
5720       /* If the remaining parameters are not registers,
5721          get the cost to put them into registers.  */
5722       *cost += rtx_cost (op0, FMA, 0, speed);
5723       *cost += rtx_cost (op1, FMA, 1, speed);
5724       *cost += rtx_cost (op2, FMA, 2, speed);
5725       return true;
5726
5727     case FLOAT_EXTEND:
5728       if (speed)
5729         *cost += extra_cost->fp[mode == DFmode].widen;
5730       return false;
5731
5732     case FLOAT_TRUNCATE:
5733       if (speed)
5734         *cost += extra_cost->fp[mode == DFmode].narrow;
5735       return false;
5736
5737     case ABS:
5738       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5739         {
5740           /* FABS and FNEG are analogous.  */
5741           if (speed)
5742             *cost += extra_cost->fp[mode == DFmode].neg;
5743         }
5744       else
5745         {
5746           /* Integer ABS will either be split to
5747              two arithmetic instructions, or will be an ABS
5748              (scalar), which we don't model.  */
5749           *cost = COSTS_N_INSNS (2);
5750           if (speed)
5751             *cost += 2 * extra_cost->alu.arith;
5752         }
5753       return false;
5754
5755     case SMAX:
5756     case SMIN:
5757       if (speed)
5758         {
5759           /* FMAXNM/FMINNM/FMAX/FMIN.
5760              TODO: This may not be accurate for all implementations, but
5761              we do not model this in the cost tables.  */
5762           *cost += extra_cost->fp[mode == DFmode].addsub;
5763         }
5764       return false;
5765
5766     case TRUNCATE:
5767
5768       /* Decompose <su>muldi3_highpart.  */
5769       if (/* (truncate:DI  */
5770           mode == DImode
5771           /*   (lshiftrt:TI  */
5772           && GET_MODE (XEXP (x, 0)) == TImode
5773           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5774           /*      (mult:TI  */
5775           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5776           /*        (ANY_EXTEND:TI (reg:DI))
5777                     (ANY_EXTEND:TI (reg:DI)))  */
5778           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5779                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5780               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5781                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5782           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5783           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5784           /*     (const_int 64)  */
5785           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5786           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5787         {
5788           /* UMULH/SMULH.  */
5789           if (speed)
5790             *cost += extra_cost->mult[mode == DImode].extend;
5791           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5792                              MULT, 0, speed);
5793           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5794                              MULT, 1, speed);
5795           return true;
5796         }
5797
5798       /* Fall through.  */
5799     default:
5800       if (dump_file && (dump_flags & TDF_DETAILS))
5801         fprintf (dump_file,
5802                  "\nFailed to cost RTX.  Assuming default cost.\n");
5803
5804       return true;
5805     }
5806   return false;
5807 }
5808
5809 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5810    calculated for X.  This cost is stored in *COST.  Returns true
5811    if the total cost of X was calculated.  */
5812 static bool
5813 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5814                    int param, int *cost, bool speed)
5815 {
5816   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5817
5818   if (dump_file && (dump_flags & TDF_DETAILS))
5819     {
5820       print_rtl_single (dump_file, x);
5821       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5822                speed ? "Hot" : "Cold",
5823                *cost, result ? "final" : "partial");
5824     }
5825
5826   return result;
5827 }
5828
5829 static int
5830 aarch64_register_move_cost (enum machine_mode mode,
5831                             reg_class_t from_i, reg_class_t to_i)
5832 {
5833   enum reg_class from = (enum reg_class) from_i;
5834   enum reg_class to = (enum reg_class) to_i;
5835   const struct cpu_regmove_cost *regmove_cost
5836     = aarch64_tune_params->regmove_cost;
5837
5838   /* Moving between GPR and stack cost is the same as GP2GP.  */
5839   if ((from == GENERAL_REGS && to == STACK_REG)
5840       || (to == GENERAL_REGS && from == STACK_REG))
5841     return regmove_cost->GP2GP;
5842
5843   /* To/From the stack register, we move via the gprs.  */
5844   if (to == STACK_REG || from == STACK_REG)
5845     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5846             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5847
5848   if (from == GENERAL_REGS && to == GENERAL_REGS)
5849     return regmove_cost->GP2GP;
5850   else if (from == GENERAL_REGS)
5851     return regmove_cost->GP2FP;
5852   else if (to == GENERAL_REGS)
5853     return regmove_cost->FP2GP;
5854
5855   /* When AdvSIMD instructions are disabled it is not possible to move
5856      a 128-bit value directly between Q registers.  This is handled in
5857      secondary reload.  A general register is used as a scratch to move
5858      the upper DI value and the lower DI value is moved directly,
5859      hence the cost is the sum of three moves. */
5860   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5861     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5862
5863   return regmove_cost->FP2FP;
5864 }
5865
5866 static int
5867 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5868                           reg_class_t rclass ATTRIBUTE_UNUSED,
5869                           bool in ATTRIBUTE_UNUSED)
5870 {
5871   return aarch64_tune_params->memmov_cost;
5872 }
5873
5874 /* Return the number of instructions that can be issued per cycle.  */
5875 static int
5876 aarch64_sched_issue_rate (void)
5877 {
5878   return aarch64_tune_params->issue_rate;
5879 }
5880
5881 /* Vectorizer cost model target hooks.  */
5882
5883 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5884 static int
5885 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5886                                     tree vectype,
5887                                     int misalign ATTRIBUTE_UNUSED)
5888 {
5889   unsigned elements;
5890
5891   switch (type_of_cost)
5892     {
5893       case scalar_stmt:
5894         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5895
5896       case scalar_load:
5897         return aarch64_tune_params->vec_costs->scalar_load_cost;
5898
5899       case scalar_store:
5900         return aarch64_tune_params->vec_costs->scalar_store_cost;
5901
5902       case vector_stmt:
5903         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5904
5905       case vector_load:
5906         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5907
5908       case vector_store:
5909         return aarch64_tune_params->vec_costs->vec_store_cost;
5910
5911       case vec_to_scalar:
5912         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5913
5914       case scalar_to_vec:
5915         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5916
5917       case unaligned_load:
5918         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5919
5920       case unaligned_store:
5921         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5922
5923       case cond_branch_taken:
5924         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5925
5926       case cond_branch_not_taken:
5927         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5928
5929       case vec_perm:
5930       case vec_promote_demote:
5931         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5932
5933       case vec_construct:
5934         elements = TYPE_VECTOR_SUBPARTS (vectype);
5935         return elements / 2 + 1;
5936
5937       default:
5938         gcc_unreachable ();
5939     }
5940 }
5941
5942 /* Implement targetm.vectorize.add_stmt_cost.  */
5943 static unsigned
5944 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5945                        struct _stmt_vec_info *stmt_info, int misalign,
5946                        enum vect_cost_model_location where)
5947 {
5948   unsigned *cost = (unsigned *) data;
5949   unsigned retval = 0;
5950
5951   if (flag_vect_cost_model)
5952     {
5953       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5954       int stmt_cost =
5955             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5956
5957       /* Statements in an inner loop relative to the loop being
5958          vectorized are weighted more heavily.  The value here is
5959          a function (linear for now) of the loop nest level.  */
5960       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5961         {
5962           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5963           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5964           unsigned nest_level = loop_depth (loop);
5965
5966           count *= nest_level;
5967         }
5968
5969       retval = (unsigned) (count * stmt_cost);
5970       cost[where] += retval;
5971     }
5972
5973   return retval;
5974 }
5975
5976 static void initialize_aarch64_code_model (void);
5977
5978 /* Parse the architecture extension string.  */
5979
5980 static void
5981 aarch64_parse_extension (char *str)
5982 {
5983   /* The extension string is parsed left to right.  */
5984   const struct aarch64_option_extension *opt = NULL;
5985
5986   /* Flag to say whether we are adding or removing an extension.  */
5987   int adding_ext = -1;
5988
5989   while (str != NULL && *str != 0)
5990     {
5991       char *ext;
5992       size_t len;
5993
5994       str++;
5995       ext = strchr (str, '+');
5996
5997       if (ext != NULL)
5998         len = ext - str;
5999       else
6000         len = strlen (str);
6001
6002       if (len >= 2 && strncmp (str, "no", 2) == 0)
6003         {
6004           adding_ext = 0;
6005           len -= 2;
6006           str += 2;
6007         }
6008       else if (len > 0)
6009         adding_ext = 1;
6010
6011       if (len == 0)
6012         {
6013           error ("missing feature modifier after %qs", "+no");
6014           return;
6015         }
6016
6017       /* Scan over the extensions table trying to find an exact match.  */
6018       for (opt = all_extensions; opt->name != NULL; opt++)
6019         {
6020           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6021             {
6022               /* Add or remove the extension.  */
6023               if (adding_ext)
6024                 aarch64_isa_flags |= opt->flags_on;
6025               else
6026                 aarch64_isa_flags &= ~(opt->flags_off);
6027               break;
6028             }
6029         }
6030
6031       if (opt->name == NULL)
6032         {
6033           /* Extension not found in list.  */
6034           error ("unknown feature modifier %qs", str);
6035           return;
6036         }
6037
6038       str = ext;
6039     };
6040
6041   return;
6042 }
6043
6044 /* Parse the ARCH string.  */
6045
6046 static void
6047 aarch64_parse_arch (void)
6048 {
6049   char *ext;
6050   const struct processor *arch;
6051   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6052   size_t len;
6053
6054   strcpy (str, aarch64_arch_string);
6055
6056   ext = strchr (str, '+');
6057
6058   if (ext != NULL)
6059     len = ext - str;
6060   else
6061     len = strlen (str);
6062
6063   if (len == 0)
6064     {
6065       error ("missing arch name in -march=%qs", str);
6066       return;
6067     }
6068
6069   /* Loop through the list of supported ARCHs to find a match.  */
6070   for (arch = all_architectures; arch->name != NULL; arch++)
6071     {
6072       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6073         {
6074           selected_arch = arch;
6075           aarch64_isa_flags = selected_arch->flags;
6076
6077           if (!selected_cpu)
6078             selected_cpu = &all_cores[selected_arch->core];
6079
6080           if (ext != NULL)
6081             {
6082               /* ARCH string contains at least one extension.  */
6083               aarch64_parse_extension (ext);
6084             }
6085
6086           if (strcmp (selected_arch->arch, selected_cpu->arch))
6087             {
6088               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6089                        selected_cpu->name, selected_arch->name);
6090             }
6091
6092           return;
6093         }
6094     }
6095
6096   /* ARCH name not found in list.  */
6097   error ("unknown value %qs for -march", str);
6098   return;
6099 }
6100
6101 /* Parse the CPU string.  */
6102
6103 static void
6104 aarch64_parse_cpu (void)
6105 {
6106   char *ext;
6107   const struct processor *cpu;
6108   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6109   size_t len;
6110
6111   strcpy (str, aarch64_cpu_string);
6112
6113   ext = strchr (str, '+');
6114
6115   if (ext != NULL)
6116     len = ext - str;
6117   else
6118     len = strlen (str);
6119
6120   if (len == 0)
6121     {
6122       error ("missing cpu name in -mcpu=%qs", str);
6123       return;
6124     }
6125
6126   /* Loop through the list of supported CPUs to find a match.  */
6127   for (cpu = all_cores; cpu->name != NULL; cpu++)
6128     {
6129       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6130         {
6131           selected_cpu = cpu;
6132           selected_tune = cpu;
6133           aarch64_isa_flags = selected_cpu->flags;
6134
6135           if (ext != NULL)
6136             {
6137               /* CPU string contains at least one extension.  */
6138               aarch64_parse_extension (ext);
6139             }
6140
6141           return;
6142         }
6143     }
6144
6145   /* CPU name not found in list.  */
6146   error ("unknown value %qs for -mcpu", str);
6147   return;
6148 }
6149
6150 /* Parse the TUNE string.  */
6151
6152 static void
6153 aarch64_parse_tune (void)
6154 {
6155   const struct processor *cpu;
6156   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6157   strcpy (str, aarch64_tune_string);
6158
6159   /* Loop through the list of supported CPUs to find a match.  */
6160   for (cpu = all_cores; cpu->name != NULL; cpu++)
6161     {
6162       if (strcmp (cpu->name, str) == 0)
6163         {
6164           selected_tune = cpu;
6165           return;
6166         }
6167     }
6168
6169   /* CPU name not found in list.  */
6170   error ("unknown value %qs for -mtune", str);
6171   return;
6172 }
6173
6174
6175 /* Implement TARGET_OPTION_OVERRIDE.  */
6176
6177 static void
6178 aarch64_override_options (void)
6179 {
6180   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6181      If either of -march or -mtune is given, they override their
6182      respective component of -mcpu.
6183
6184      So, first parse AARCH64_CPU_STRING, then the others, be careful
6185      with -march as, if -mcpu is not present on the command line, march
6186      must set a sensible default CPU.  */
6187   if (aarch64_cpu_string)
6188     {
6189       aarch64_parse_cpu ();
6190     }
6191
6192   if (aarch64_arch_string)
6193     {
6194       aarch64_parse_arch ();
6195     }
6196
6197   if (aarch64_tune_string)
6198     {
6199       aarch64_parse_tune ();
6200     }
6201
6202 #ifndef HAVE_AS_MABI_OPTION
6203   /* The compiler may have been configured with 2.23.* binutils, which does
6204      not have support for ILP32.  */
6205   if (TARGET_ILP32)
6206     error ("Assembler does not support -mabi=ilp32");
6207 #endif
6208
6209   initialize_aarch64_code_model ();
6210
6211   aarch64_build_bitmask_table ();
6212
6213   /* This target defaults to strict volatile bitfields.  */
6214   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6215     flag_strict_volatile_bitfields = 1;
6216
6217   /* If the user did not specify a processor, choose the default
6218      one for them.  This will be the CPU set during configuration using
6219      --with-cpu, otherwise it is "generic".  */
6220   if (!selected_cpu)
6221     {
6222       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6223       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6224     }
6225
6226   gcc_assert (selected_cpu);
6227
6228   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
6229   if (!selected_tune)
6230     selected_tune = &all_cores[selected_cpu->core];
6231
6232   aarch64_tune_flags = selected_tune->flags;
6233   aarch64_tune = selected_tune->core;
6234   aarch64_tune_params = selected_tune->tune;
6235
6236   aarch64_override_options_after_change ();
6237 }
6238
6239 /* Implement targetm.override_options_after_change.  */
6240
6241 static void
6242 aarch64_override_options_after_change (void)
6243 {
6244   if (flag_omit_frame_pointer)
6245     flag_omit_leaf_frame_pointer = false;
6246   else if (flag_omit_leaf_frame_pointer)
6247     flag_omit_frame_pointer = true;
6248 }
6249
6250 static struct machine_function *
6251 aarch64_init_machine_status (void)
6252 {
6253   struct machine_function *machine;
6254   machine = ggc_alloc_cleared_machine_function ();
6255   return machine;
6256 }
6257
6258 void
6259 aarch64_init_expanders (void)
6260 {
6261   init_machine_status = aarch64_init_machine_status;
6262 }
6263
6264 /* A checking mechanism for the implementation of the various code models.  */
6265 static void
6266 initialize_aarch64_code_model (void)
6267 {
6268    if (flag_pic)
6269      {
6270        switch (aarch64_cmodel_var)
6271          {
6272          case AARCH64_CMODEL_TINY:
6273            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6274            break;
6275          case AARCH64_CMODEL_SMALL:
6276            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6277            break;
6278          case AARCH64_CMODEL_LARGE:
6279            sorry ("code model %qs with -f%s", "large",
6280                   flag_pic > 1 ? "PIC" : "pic");
6281          default:
6282            gcc_unreachable ();
6283          }
6284      }
6285    else
6286      aarch64_cmodel = aarch64_cmodel_var;
6287 }
6288
6289 /* Return true if SYMBOL_REF X binds locally.  */
6290
6291 static bool
6292 aarch64_symbol_binds_local_p (const_rtx x)
6293 {
6294   return (SYMBOL_REF_DECL (x)
6295           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6296           : SYMBOL_REF_LOCAL_P (x));
6297 }
6298
6299 /* Return true if SYMBOL_REF X is thread local */
6300 static bool
6301 aarch64_tls_symbol_p (rtx x)
6302 {
6303   if (! TARGET_HAVE_TLS)
6304     return false;
6305
6306   if (GET_CODE (x) != SYMBOL_REF)
6307     return false;
6308
6309   return SYMBOL_REF_TLS_MODEL (x) != 0;
6310 }
6311
6312 /* Classify a TLS symbol into one of the TLS kinds.  */
6313 enum aarch64_symbol_type
6314 aarch64_classify_tls_symbol (rtx x)
6315 {
6316   enum tls_model tls_kind = tls_symbolic_operand_type (x);
6317
6318   switch (tls_kind)
6319     {
6320     case TLS_MODEL_GLOBAL_DYNAMIC:
6321     case TLS_MODEL_LOCAL_DYNAMIC:
6322       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6323
6324     case TLS_MODEL_INITIAL_EXEC:
6325       return SYMBOL_SMALL_GOTTPREL;
6326
6327     case TLS_MODEL_LOCAL_EXEC:
6328       return SYMBOL_SMALL_TPREL;
6329
6330     case TLS_MODEL_EMULATED:
6331     case TLS_MODEL_NONE:
6332       return SYMBOL_FORCE_TO_MEM;
6333
6334     default:
6335       gcc_unreachable ();
6336     }
6337 }
6338
6339 /* Return the method that should be used to access SYMBOL_REF or
6340    LABEL_REF X in context CONTEXT.  */
6341
6342 enum aarch64_symbol_type
6343 aarch64_classify_symbol (rtx x,
6344                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6345 {
6346   if (GET_CODE (x) == LABEL_REF)
6347     {
6348       switch (aarch64_cmodel)
6349         {
6350         case AARCH64_CMODEL_LARGE:
6351           return SYMBOL_FORCE_TO_MEM;
6352
6353         case AARCH64_CMODEL_TINY_PIC:
6354         case AARCH64_CMODEL_TINY:
6355           return SYMBOL_TINY_ABSOLUTE;
6356
6357         case AARCH64_CMODEL_SMALL_PIC:
6358         case AARCH64_CMODEL_SMALL:
6359           return SYMBOL_SMALL_ABSOLUTE;
6360
6361         default:
6362           gcc_unreachable ();
6363         }
6364     }
6365
6366   if (GET_CODE (x) == SYMBOL_REF)
6367     {
6368       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6369           return SYMBOL_FORCE_TO_MEM;
6370
6371       if (aarch64_tls_symbol_p (x))
6372         return aarch64_classify_tls_symbol (x);
6373
6374       switch (aarch64_cmodel)
6375         {
6376         case AARCH64_CMODEL_TINY:
6377           if (SYMBOL_REF_WEAK (x))
6378             return SYMBOL_FORCE_TO_MEM;
6379           return SYMBOL_TINY_ABSOLUTE;
6380
6381         case AARCH64_CMODEL_SMALL:
6382           if (SYMBOL_REF_WEAK (x))
6383             return SYMBOL_FORCE_TO_MEM;
6384           return SYMBOL_SMALL_ABSOLUTE;
6385
6386         case AARCH64_CMODEL_TINY_PIC:
6387           if (!aarch64_symbol_binds_local_p (x))
6388             return SYMBOL_TINY_GOT;
6389           return SYMBOL_TINY_ABSOLUTE;
6390
6391         case AARCH64_CMODEL_SMALL_PIC:
6392           if (!aarch64_symbol_binds_local_p (x))
6393             return SYMBOL_SMALL_GOT;
6394           return SYMBOL_SMALL_ABSOLUTE;
6395
6396         default:
6397           gcc_unreachable ();
6398         }
6399     }
6400
6401   /* By default push everything into the constant pool.  */
6402   return SYMBOL_FORCE_TO_MEM;
6403 }
6404
6405 bool
6406 aarch64_constant_address_p (rtx x)
6407 {
6408   return (CONSTANT_P (x) && memory_address_p (DImode, x));
6409 }
6410
6411 bool
6412 aarch64_legitimate_pic_operand_p (rtx x)
6413 {
6414   if (GET_CODE (x) == SYMBOL_REF
6415       || (GET_CODE (x) == CONST
6416           && GET_CODE (XEXP (x, 0)) == PLUS
6417           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6418      return false;
6419
6420   return true;
6421 }
6422
6423 /* Return true if X holds either a quarter-precision or
6424      floating-point +0.0 constant.  */
6425 static bool
6426 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6427 {
6428   if (!CONST_DOUBLE_P (x))
6429     return false;
6430
6431   /* TODO: We could handle moving 0.0 to a TFmode register,
6432      but first we would like to refactor the movtf_aarch64
6433      to be more amicable to split moves properly and
6434      correctly gate on TARGET_SIMD.  For now - reject all
6435      constants which are not to SFmode or DFmode registers.  */
6436   if (!(mode == SFmode || mode == DFmode))
6437     return false;
6438
6439   if (aarch64_float_const_zero_rtx_p (x))
6440     return true;
6441   return aarch64_float_const_representable_p (x);
6442 }
6443
6444 static bool
6445 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6446 {
6447   /* Do not allow vector struct mode constants.  We could support
6448      0 and -1 easily, but they need support in aarch64-simd.md.  */
6449   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6450     return false;
6451
6452   /* This could probably go away because
6453      we now decompose CONST_INTs according to expand_mov_immediate.  */
6454   if ((GET_CODE (x) == CONST_VECTOR
6455        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6456       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6457         return !targetm.cannot_force_const_mem (mode, x);
6458
6459   if (GET_CODE (x) == HIGH
6460       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6461     return true;
6462
6463   return aarch64_constant_address_p (x);
6464 }
6465
6466 rtx
6467 aarch64_load_tp (rtx target)
6468 {
6469   if (!target
6470       || GET_MODE (target) != Pmode
6471       || !register_operand (target, Pmode))
6472     target = gen_reg_rtx (Pmode);
6473
6474   /* Can return in any reg.  */
6475   emit_insn (gen_aarch64_load_tp_hard (target));
6476   return target;
6477 }
6478
6479 /* On AAPCS systems, this is the "struct __va_list".  */
6480 static GTY(()) tree va_list_type;
6481
6482 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6483    Return the type to use as __builtin_va_list.
6484
6485    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6486
6487    struct __va_list
6488    {
6489      void *__stack;
6490      void *__gr_top;
6491      void *__vr_top;
6492      int   __gr_offs;
6493      int   __vr_offs;
6494    };  */
6495
6496 static tree
6497 aarch64_build_builtin_va_list (void)
6498 {
6499   tree va_list_name;
6500   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6501
6502   /* Create the type.  */
6503   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6504   /* Give it the required name.  */
6505   va_list_name = build_decl (BUILTINS_LOCATION,
6506                              TYPE_DECL,
6507                              get_identifier ("__va_list"),
6508                              va_list_type);
6509   DECL_ARTIFICIAL (va_list_name) = 1;
6510   TYPE_NAME (va_list_type) = va_list_name;
6511   TYPE_STUB_DECL (va_list_type) = va_list_name;
6512
6513   /* Create the fields.  */
6514   f_stack = build_decl (BUILTINS_LOCATION,
6515                         FIELD_DECL, get_identifier ("__stack"),
6516                         ptr_type_node);
6517   f_grtop = build_decl (BUILTINS_LOCATION,
6518                         FIELD_DECL, get_identifier ("__gr_top"),
6519                         ptr_type_node);
6520   f_vrtop = build_decl (BUILTINS_LOCATION,
6521                         FIELD_DECL, get_identifier ("__vr_top"),
6522                         ptr_type_node);
6523   f_groff = build_decl (BUILTINS_LOCATION,
6524                         FIELD_DECL, get_identifier ("__gr_offs"),
6525                         integer_type_node);
6526   f_vroff = build_decl (BUILTINS_LOCATION,
6527                         FIELD_DECL, get_identifier ("__vr_offs"),
6528                         integer_type_node);
6529
6530   DECL_ARTIFICIAL (f_stack) = 1;
6531   DECL_ARTIFICIAL (f_grtop) = 1;
6532   DECL_ARTIFICIAL (f_vrtop) = 1;
6533   DECL_ARTIFICIAL (f_groff) = 1;
6534   DECL_ARTIFICIAL (f_vroff) = 1;
6535
6536   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6537   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6538   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6539   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6540   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6541
6542   TYPE_FIELDS (va_list_type) = f_stack;
6543   DECL_CHAIN (f_stack) = f_grtop;
6544   DECL_CHAIN (f_grtop) = f_vrtop;
6545   DECL_CHAIN (f_vrtop) = f_groff;
6546   DECL_CHAIN (f_groff) = f_vroff;
6547
6548   /* Compute its layout.  */
6549   layout_type (va_list_type);
6550
6551   return va_list_type;
6552 }
6553
6554 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6555 static void
6556 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6557 {
6558   const CUMULATIVE_ARGS *cum;
6559   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6560   tree stack, grtop, vrtop, groff, vroff;
6561   tree t;
6562   int gr_save_area_size;
6563   int vr_save_area_size;
6564   int vr_offset;
6565
6566   cum = &crtl->args.info;
6567   gr_save_area_size
6568     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6569   vr_save_area_size
6570     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6571
6572   if (TARGET_GENERAL_REGS_ONLY)
6573     {
6574       if (cum->aapcs_nvrn > 0)
6575         sorry ("%qs and floating point or vector arguments",
6576                "-mgeneral-regs-only");
6577       vr_save_area_size = 0;
6578     }
6579
6580   f_stack = TYPE_FIELDS (va_list_type_node);
6581   f_grtop = DECL_CHAIN (f_stack);
6582   f_vrtop = DECL_CHAIN (f_grtop);
6583   f_groff = DECL_CHAIN (f_vrtop);
6584   f_vroff = DECL_CHAIN (f_groff);
6585
6586   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6587                   NULL_TREE);
6588   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6589                   NULL_TREE);
6590   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6591                   NULL_TREE);
6592   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6593                   NULL_TREE);
6594   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6595                   NULL_TREE);
6596
6597   /* Emit code to initialize STACK, which points to the next varargs stack
6598      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6599      by named arguments.  STACK is 8-byte aligned.  */
6600   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6601   if (cum->aapcs_stack_size > 0)
6602     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6603   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6604   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6605
6606   /* Emit code to initialize GRTOP, the top of the GR save area.
6607      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6608   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6609   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6610   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6611
6612   /* Emit code to initialize VRTOP, the top of the VR save area.
6613      This address is gr_save_area_bytes below GRTOP, rounded
6614      down to the next 16-byte boundary.  */
6615   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6616   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6617                              STACK_BOUNDARY / BITS_PER_UNIT);
6618
6619   if (vr_offset)
6620     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6621   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6622   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6623
6624   /* Emit code to initialize GROFF, the offset from GRTOP of the
6625      next GPR argument.  */
6626   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6627               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6628   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6629
6630   /* Likewise emit code to initialize VROFF, the offset from FTOP
6631      of the next VR argument.  */
6632   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6633               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6634   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6635 }
6636
6637 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6638
6639 static tree
6640 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6641                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6642 {
6643   tree addr;
6644   bool indirect_p;
6645   bool is_ha;           /* is HFA or HVA.  */
6646   bool dw_align;        /* double-word align.  */
6647   enum machine_mode ag_mode = VOIDmode;
6648   int nregs;
6649   enum machine_mode mode;
6650
6651   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6652   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6653   HOST_WIDE_INT size, rsize, adjust, align;
6654   tree t, u, cond1, cond2;
6655
6656   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6657   if (indirect_p)
6658     type = build_pointer_type (type);
6659
6660   mode = TYPE_MODE (type);
6661
6662   f_stack = TYPE_FIELDS (va_list_type_node);
6663   f_grtop = DECL_CHAIN (f_stack);
6664   f_vrtop = DECL_CHAIN (f_grtop);
6665   f_groff = DECL_CHAIN (f_vrtop);
6666   f_vroff = DECL_CHAIN (f_groff);
6667
6668   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6669                   f_stack, NULL_TREE);
6670   size = int_size_in_bytes (type);
6671   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6672
6673   dw_align = false;
6674   adjust = 0;
6675   if (aarch64_vfp_is_call_or_return_candidate (mode,
6676                                                type,
6677                                                &ag_mode,
6678                                                &nregs,
6679                                                &is_ha))
6680     {
6681       /* TYPE passed in fp/simd registers.  */
6682       if (TARGET_GENERAL_REGS_ONLY)
6683         sorry ("%qs and floating point or vector arguments",
6684                "-mgeneral-regs-only");
6685
6686       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6687                       unshare_expr (valist), f_vrtop, NULL_TREE);
6688       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6689                       unshare_expr (valist), f_vroff, NULL_TREE);
6690
6691       rsize = nregs * UNITS_PER_VREG;
6692
6693       if (is_ha)
6694         {
6695           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6696             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6697         }
6698       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6699                && size < UNITS_PER_VREG)
6700         {
6701           adjust = UNITS_PER_VREG - size;
6702         }
6703     }
6704   else
6705     {
6706       /* TYPE passed in general registers.  */
6707       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6708                       unshare_expr (valist), f_grtop, NULL_TREE);
6709       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6710                       unshare_expr (valist), f_groff, NULL_TREE);
6711       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6712       nregs = rsize / UNITS_PER_WORD;
6713
6714       if (align > 8)
6715         dw_align = true;
6716
6717       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6718           && size < UNITS_PER_WORD)
6719         {
6720           adjust = UNITS_PER_WORD  - size;
6721         }
6722     }
6723
6724   /* Get a local temporary for the field value.  */
6725   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6726
6727   /* Emit code to branch if off >= 0.  */
6728   t = build2 (GE_EXPR, boolean_type_node, off,
6729               build_int_cst (TREE_TYPE (off), 0));
6730   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6731
6732   if (dw_align)
6733     {
6734       /* Emit: offs = (offs + 15) & -16.  */
6735       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6736                   build_int_cst (TREE_TYPE (off), 15));
6737       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6738                   build_int_cst (TREE_TYPE (off), -16));
6739       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6740     }
6741   else
6742     roundup = NULL;
6743
6744   /* Update ap.__[g|v]r_offs  */
6745   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6746               build_int_cst (TREE_TYPE (off), rsize));
6747   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6748
6749   /* String up.  */
6750   if (roundup)
6751     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6752
6753   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6754   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6755               build_int_cst (TREE_TYPE (f_off), 0));
6756   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6757
6758   /* String up: make sure the assignment happens before the use.  */
6759   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6760   COND_EXPR_ELSE (cond1) = t;
6761
6762   /* Prepare the trees handling the argument that is passed on the stack;
6763      the top level node will store in ON_STACK.  */
6764   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6765   if (align > 8)
6766     {
6767       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6768       t = fold_convert (intDI_type_node, arg);
6769       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6770                   build_int_cst (TREE_TYPE (t), 15));
6771       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6772                   build_int_cst (TREE_TYPE (t), -16));
6773       t = fold_convert (TREE_TYPE (arg), t);
6774       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6775     }
6776   else
6777     roundup = NULL;
6778   /* Advance ap.__stack  */
6779   t = fold_convert (intDI_type_node, arg);
6780   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6781               build_int_cst (TREE_TYPE (t), size + 7));
6782   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6783               build_int_cst (TREE_TYPE (t), -8));
6784   t = fold_convert (TREE_TYPE (arg), t);
6785   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6786   /* String up roundup and advance.  */
6787   if (roundup)
6788     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6789   /* String up with arg */
6790   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6791   /* Big-endianness related address adjustment.  */
6792   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6793       && size < UNITS_PER_WORD)
6794   {
6795     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6796                 size_int (UNITS_PER_WORD - size));
6797     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6798   }
6799
6800   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6801   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6802
6803   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6804   t = off;
6805   if (adjust)
6806     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6807                 build_int_cst (TREE_TYPE (off), adjust));
6808
6809   t = fold_convert (sizetype, t);
6810   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6811
6812   if (is_ha)
6813     {
6814       /* type ha; // treat as "struct {ftype field[n];}"
6815          ... [computing offs]
6816          for (i = 0; i <nregs; ++i, offs += 16)
6817            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6818          return ha;  */
6819       int i;
6820       tree tmp_ha, field_t, field_ptr_t;
6821
6822       /* Declare a local variable.  */
6823       tmp_ha = create_tmp_var_raw (type, "ha");
6824       gimple_add_tmp_var (tmp_ha);
6825
6826       /* Establish the base type.  */
6827       switch (ag_mode)
6828         {
6829         case SFmode:
6830           field_t = float_type_node;
6831           field_ptr_t = float_ptr_type_node;
6832           break;
6833         case DFmode:
6834           field_t = double_type_node;
6835           field_ptr_t = double_ptr_type_node;
6836           break;
6837         case TFmode:
6838           field_t = long_double_type_node;
6839           field_ptr_t = long_double_ptr_type_node;
6840           break;
6841 /* The half precision and quad precision are not fully supported yet.  Enable
6842    the following code after the support is complete.  Need to find the correct
6843    type node for __fp16 *.  */
6844 #if 0
6845         case HFmode:
6846           field_t = float_type_node;
6847           field_ptr_t = float_ptr_type_node;
6848           break;
6849 #endif
6850         case V2SImode:
6851         case V4SImode:
6852             {
6853               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6854               field_t = build_vector_type_for_mode (innertype, ag_mode);
6855               field_ptr_t = build_pointer_type (field_t);
6856             }
6857           break;
6858         default:
6859           gcc_assert (0);
6860         }
6861
6862       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6863       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6864       addr = t;
6865       t = fold_convert (field_ptr_t, addr);
6866       t = build2 (MODIFY_EXPR, field_t,
6867                   build1 (INDIRECT_REF, field_t, tmp_ha),
6868                   build1 (INDIRECT_REF, field_t, t));
6869
6870       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6871       for (i = 1; i < nregs; ++i)
6872         {
6873           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6874           u = fold_convert (field_ptr_t, addr);
6875           u = build2 (MODIFY_EXPR, field_t,
6876                       build2 (MEM_REF, field_t, tmp_ha,
6877                               build_int_cst (field_ptr_t,
6878                                              (i *
6879                                               int_size_in_bytes (field_t)))),
6880                       build1 (INDIRECT_REF, field_t, u));
6881           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6882         }
6883
6884       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6885       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6886     }
6887
6888   COND_EXPR_ELSE (cond2) = t;
6889   addr = fold_convert (build_pointer_type (type), cond1);
6890   addr = build_va_arg_indirect_ref (addr);
6891
6892   if (indirect_p)
6893     addr = build_va_arg_indirect_ref (addr);
6894
6895   return addr;
6896 }
6897
6898 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6899
6900 static void
6901 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6902                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6903                                 int no_rtl)
6904 {
6905   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6906   CUMULATIVE_ARGS local_cum;
6907   int gr_saved, vr_saved;
6908
6909   /* The caller has advanced CUM up to, but not beyond, the last named
6910      argument.  Advance a local copy of CUM past the last "real" named
6911      argument, to find out how many registers are left over.  */
6912   local_cum = *cum;
6913   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6914
6915   /* Found out how many registers we need to save.  */
6916   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6917   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6918
6919   if (TARGET_GENERAL_REGS_ONLY)
6920     {
6921       if (local_cum.aapcs_nvrn > 0)
6922         sorry ("%qs and floating point or vector arguments",
6923                "-mgeneral-regs-only");
6924       vr_saved = 0;
6925     }
6926
6927   if (!no_rtl)
6928     {
6929       if (gr_saved > 0)
6930         {
6931           rtx ptr, mem;
6932
6933           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6934           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6935                                - gr_saved * UNITS_PER_WORD);
6936           mem = gen_frame_mem (BLKmode, ptr);
6937           set_mem_alias_set (mem, get_varargs_alias_set ());
6938
6939           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6940                                mem, gr_saved);
6941         }
6942       if (vr_saved > 0)
6943         {
6944           /* We can't use move_block_from_reg, because it will use
6945              the wrong mode, storing D regs only.  */
6946           enum machine_mode mode = TImode;
6947           int off, i;
6948
6949           /* Set OFF to the offset from virtual_incoming_args_rtx of
6950              the first vector register.  The VR save area lies below
6951              the GR one, and is aligned to 16 bytes.  */
6952           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6953                                    STACK_BOUNDARY / BITS_PER_UNIT);
6954           off -= vr_saved * UNITS_PER_VREG;
6955
6956           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6957             {
6958               rtx ptr, mem;
6959
6960               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6961               mem = gen_frame_mem (mode, ptr);
6962               set_mem_alias_set (mem, get_varargs_alias_set ());
6963               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6964               off += UNITS_PER_VREG;
6965             }
6966         }
6967     }
6968
6969   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6970      any complication of having crtl->args.pretend_args_size changed.  */
6971   cfun->machine->saved_varargs_size
6972     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6973                       STACK_BOUNDARY / BITS_PER_UNIT)
6974        + vr_saved * UNITS_PER_VREG);
6975 }
6976
6977 static void
6978 aarch64_conditional_register_usage (void)
6979 {
6980   int i;
6981   if (!TARGET_FLOAT)
6982     {
6983       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6984         {
6985           fixed_regs[i] = 1;
6986           call_used_regs[i] = 1;
6987         }
6988     }
6989 }
6990
6991 /* Walk down the type tree of TYPE counting consecutive base elements.
6992    If *MODEP is VOIDmode, then set it to the first valid floating point
6993    type.  If a non-floating point type is found, or if a floating point
6994    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6995    otherwise return the count in the sub-tree.  */
6996 static int
6997 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6998 {
6999   enum machine_mode mode;
7000   HOST_WIDE_INT size;
7001
7002   switch (TREE_CODE (type))
7003     {
7004     case REAL_TYPE:
7005       mode = TYPE_MODE (type);
7006       if (mode != DFmode && mode != SFmode && mode != TFmode)
7007         return -1;
7008
7009       if (*modep == VOIDmode)
7010         *modep = mode;
7011
7012       if (*modep == mode)
7013         return 1;
7014
7015       break;
7016
7017     case COMPLEX_TYPE:
7018       mode = TYPE_MODE (TREE_TYPE (type));
7019       if (mode != DFmode && mode != SFmode && mode != TFmode)
7020         return -1;
7021
7022       if (*modep == VOIDmode)
7023         *modep = mode;
7024
7025       if (*modep == mode)
7026         return 2;
7027
7028       break;
7029
7030     case VECTOR_TYPE:
7031       /* Use V2SImode and V4SImode as representatives of all 64-bit
7032          and 128-bit vector types.  */
7033       size = int_size_in_bytes (type);
7034       switch (size)
7035         {
7036         case 8:
7037           mode = V2SImode;
7038           break;
7039         case 16:
7040           mode = V4SImode;
7041           break;
7042         default:
7043           return -1;
7044         }
7045
7046       if (*modep == VOIDmode)
7047         *modep = mode;
7048
7049       /* Vector modes are considered to be opaque: two vectors are
7050          equivalent for the purposes of being homogeneous aggregates
7051          if they are the same size.  */
7052       if (*modep == mode)
7053         return 1;
7054
7055       break;
7056
7057     case ARRAY_TYPE:
7058       {
7059         int count;
7060         tree index = TYPE_DOMAIN (type);
7061
7062         /* Can't handle incomplete types.  */
7063         if (!COMPLETE_TYPE_P (type))
7064           return -1;
7065
7066         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7067         if (count == -1
7068             || !index
7069             || !TYPE_MAX_VALUE (index)
7070             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7071             || !TYPE_MIN_VALUE (index)
7072             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7073             || count < 0)
7074           return -1;
7075
7076         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7077                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7078
7079         /* There must be no padding.  */
7080         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7081             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7082                 != count * GET_MODE_BITSIZE (*modep)))
7083           return -1;
7084
7085         return count;
7086       }
7087
7088     case RECORD_TYPE:
7089       {
7090         int count = 0;
7091         int sub_count;
7092         tree field;
7093
7094         /* Can't handle incomplete types.  */
7095         if (!COMPLETE_TYPE_P (type))
7096           return -1;
7097
7098         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7099           {
7100             if (TREE_CODE (field) != FIELD_DECL)
7101               continue;
7102
7103             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7104             if (sub_count < 0)
7105               return -1;
7106             count += sub_count;
7107           }
7108
7109         /* There must be no padding.  */
7110         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7111             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7112                 != count * GET_MODE_BITSIZE (*modep)))
7113           return -1;
7114
7115         return count;
7116       }
7117
7118     case UNION_TYPE:
7119     case QUAL_UNION_TYPE:
7120       {
7121         /* These aren't very interesting except in a degenerate case.  */
7122         int count = 0;
7123         int sub_count;
7124         tree field;
7125
7126         /* Can't handle incomplete types.  */
7127         if (!COMPLETE_TYPE_P (type))
7128           return -1;
7129
7130         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7131           {
7132             if (TREE_CODE (field) != FIELD_DECL)
7133               continue;
7134
7135             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7136             if (sub_count < 0)
7137               return -1;
7138             count = count > sub_count ? count : sub_count;
7139           }
7140
7141         /* There must be no padding.  */
7142         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7143             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7144                 != count * GET_MODE_BITSIZE (*modep)))
7145           return -1;
7146
7147         return count;
7148       }
7149
7150     default:
7151       break;
7152     }
7153
7154   return -1;
7155 }
7156
7157 /* Return true if we use LRA instead of reload pass.  */
7158 static bool
7159 aarch64_lra_p (void)
7160 {
7161   return aarch64_lra_flag;
7162 }
7163
7164 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7165    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7166    array types.  The C99 floating-point complex types are also considered
7167    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7168    types, which are GCC extensions and out of the scope of AAPCS64, are
7169    treated as composite types here as well.
7170
7171    Note that MODE itself is not sufficient in determining whether a type
7172    is such a composite type or not.  This is because
7173    stor-layout.c:compute_record_mode may have already changed the MODE
7174    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7175    structure with only one field may have its MODE set to the mode of the
7176    field.  Also an integer mode whose size matches the size of the
7177    RECORD_TYPE type may be used to substitute the original mode
7178    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7179    solely relied on.  */
7180
7181 static bool
7182 aarch64_composite_type_p (const_tree type,
7183                           enum machine_mode mode)
7184 {
7185   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7186     return true;
7187
7188   if (mode == BLKmode
7189       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7190       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7191     return true;
7192
7193   return false;
7194 }
7195
7196 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7197    type as described in AAPCS64 \S 4.1.2.
7198
7199    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7200
7201 static bool
7202 aarch64_short_vector_p (const_tree type,
7203                         enum machine_mode mode)
7204 {
7205   HOST_WIDE_INT size = -1;
7206
7207   if (type && TREE_CODE (type) == VECTOR_TYPE)
7208     size = int_size_in_bytes (type);
7209   else if (!aarch64_composite_type_p (type, mode)
7210            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7211                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7212     size = GET_MODE_SIZE (mode);
7213
7214   return (size == 8 || size == 16) ? true : false;
7215 }
7216
7217 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7218    shall be passed or returned in simd/fp register(s) (providing these
7219    parameter passing registers are available).
7220
7221    Upon successful return, *COUNT returns the number of needed registers,
7222    *BASE_MODE returns the mode of the individual register and when IS_HAF
7223    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7224    floating-point aggregate or a homogeneous short-vector aggregate.  */
7225
7226 static bool
7227 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7228                                          const_tree type,
7229                                          enum machine_mode *base_mode,
7230                                          int *count,
7231                                          bool *is_ha)
7232 {
7233   enum machine_mode new_mode = VOIDmode;
7234   bool composite_p = aarch64_composite_type_p (type, mode);
7235
7236   if (is_ha != NULL) *is_ha = false;
7237
7238   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7239       || aarch64_short_vector_p (type, mode))
7240     {
7241       *count = 1;
7242       new_mode = mode;
7243     }
7244   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7245     {
7246       if (is_ha != NULL) *is_ha = true;
7247       *count = 2;
7248       new_mode = GET_MODE_INNER (mode);
7249     }
7250   else if (type && composite_p)
7251     {
7252       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7253
7254       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7255         {
7256           if (is_ha != NULL) *is_ha = true;
7257           *count = ag_count;
7258         }
7259       else
7260         return false;
7261     }
7262   else
7263     return false;
7264
7265   *base_mode = new_mode;
7266   return true;
7267 }
7268
7269 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7270
7271 static rtx
7272 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7273                           int incoming ATTRIBUTE_UNUSED)
7274 {
7275   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7276 }
7277
7278 /* Implements target hook vector_mode_supported_p.  */
7279 static bool
7280 aarch64_vector_mode_supported_p (enum machine_mode mode)
7281 {
7282   if (TARGET_SIMD
7283       && (mode == V4SImode  || mode == V8HImode
7284           || mode == V16QImode || mode == V2DImode
7285           || mode == V2SImode  || mode == V4HImode
7286           || mode == V8QImode || mode == V2SFmode
7287           || mode == V4SFmode || mode == V2DFmode
7288           || mode == V1DFmode))
7289     return true;
7290
7291   return false;
7292 }
7293
7294 /* Return appropriate SIMD container
7295    for MODE within a vector of WIDTH bits.  */
7296 static enum machine_mode
7297 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7298 {
7299   gcc_assert (width == 64 || width == 128);
7300   if (TARGET_SIMD)
7301     {
7302       if (width == 128)
7303         switch (mode)
7304           {
7305           case DFmode:
7306             return V2DFmode;
7307           case SFmode:
7308             return V4SFmode;
7309           case SImode:
7310             return V4SImode;
7311           case HImode:
7312             return V8HImode;
7313           case QImode:
7314             return V16QImode;
7315           case DImode:
7316             return V2DImode;
7317           default:
7318             break;
7319           }
7320       else
7321         switch (mode)
7322           {
7323           case SFmode:
7324             return V2SFmode;
7325           case SImode:
7326             return V2SImode;
7327           case HImode:
7328             return V4HImode;
7329           case QImode:
7330             return V8QImode;
7331           default:
7332             break;
7333           }
7334     }
7335   return word_mode;
7336 }
7337
7338 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
7339 static enum machine_mode
7340 aarch64_preferred_simd_mode (enum machine_mode mode)
7341 {
7342   return aarch64_simd_container_mode (mode, 128);
7343 }
7344
7345 /* Return the bitmask of possible vector sizes for the vectorizer
7346    to iterate over.  */
7347 static unsigned int
7348 aarch64_autovectorize_vector_sizes (void)
7349 {
7350   return (16 | 8);
7351 }
7352
7353 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7354    vector types in order to conform to the AAPCS64 (see "Procedure
7355    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
7356    qualify for emission with the mangled names defined in that document,
7357    a vector type must not only be of the correct mode but also be
7358    composed of AdvSIMD vector element types (e.g.
7359    _builtin_aarch64_simd_qi); these types are registered by
7360    aarch64_init_simd_builtins ().  In other words, vector types defined
7361    in other ways e.g. via vector_size attribute will get default
7362    mangled names.  */
7363 typedef struct
7364 {
7365   enum machine_mode mode;
7366   const char *element_type_name;
7367   const char *mangled_name;
7368 } aarch64_simd_mangle_map_entry;
7369
7370 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7371   /* 64-bit containerized types.  */
7372   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
7373   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
7374   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
7375   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
7376   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
7377   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
7378   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
7379   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
7380   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7381   /* 128-bit containerized types.  */
7382   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
7383   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
7384   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
7385   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
7386   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
7387   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
7388   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
7389   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
7390   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
7391   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
7392   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
7393   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7394   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7395   { VOIDmode, NULL, NULL }
7396 };
7397
7398 /* Implement TARGET_MANGLE_TYPE.  */
7399
7400 static const char *
7401 aarch64_mangle_type (const_tree type)
7402 {
7403   /* The AArch64 ABI documents say that "__va_list" has to be
7404      managled as if it is in the "std" namespace.  */
7405   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7406     return "St9__va_list";
7407
7408   /* Check the mode of the vector type, and the name of the vector
7409      element type, against the table.  */
7410   if (TREE_CODE (type) == VECTOR_TYPE)
7411     {
7412       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7413
7414       while (pos->mode != VOIDmode)
7415         {
7416           tree elt_type = TREE_TYPE (type);
7417
7418           if (pos->mode == TYPE_MODE (type)
7419               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7420               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7421                           pos->element_type_name))
7422             return pos->mangled_name;
7423
7424           pos++;
7425         }
7426     }
7427
7428   /* Use the default mangling.  */
7429   return NULL;
7430 }
7431
7432 /* Return the equivalent letter for size.  */
7433 static char
7434 sizetochar (int size)
7435 {
7436   switch (size)
7437     {
7438     case 64: return 'd';
7439     case 32: return 's';
7440     case 16: return 'h';
7441     case 8 : return 'b';
7442     default: gcc_unreachable ();
7443     }
7444 }
7445
7446 /* Return true iff x is a uniform vector of floating-point
7447    constants, and the constant can be represented in
7448    quarter-precision form.  Note, as aarch64_float_const_representable
7449    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7450 static bool
7451 aarch64_vect_float_const_representable_p (rtx x)
7452 {
7453   int i = 0;
7454   REAL_VALUE_TYPE r0, ri;
7455   rtx x0, xi;
7456
7457   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7458     return false;
7459
7460   x0 = CONST_VECTOR_ELT (x, 0);
7461   if (!CONST_DOUBLE_P (x0))
7462     return false;
7463
7464   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7465
7466   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7467     {
7468       xi = CONST_VECTOR_ELT (x, i);
7469       if (!CONST_DOUBLE_P (xi))
7470         return false;
7471
7472       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7473       if (!REAL_VALUES_EQUAL (r0, ri))
7474         return false;
7475     }
7476
7477   return aarch64_float_const_representable_p (x0);
7478 }
7479
7480 /* Return true for valid and false for invalid.  */
7481 bool
7482 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7483                               struct simd_immediate_info *info)
7484 {
7485 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7486   matches = 1;                                          \
7487   for (i = 0; i < idx; i += (STRIDE))                   \
7488     if (!(TEST))                                        \
7489       matches = 0;                                      \
7490   if (matches)                                          \
7491     {                                                   \
7492       immtype = (CLASS);                                \
7493       elsize = (ELSIZE);                                \
7494       eshift = (SHIFT);                                 \
7495       emvn = (NEG);                                     \
7496       break;                                            \
7497     }
7498
7499   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7500   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7501   unsigned char bytes[16];
7502   int immtype = -1, matches;
7503   unsigned int invmask = inverse ? 0xff : 0;
7504   int eshift, emvn;
7505
7506   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7507     {
7508       if (! (aarch64_simd_imm_zero_p (op, mode)
7509              || aarch64_vect_float_const_representable_p (op)))
7510         return false;
7511
7512       if (info)
7513         {
7514           info->value = CONST_VECTOR_ELT (op, 0);
7515           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7516           info->mvn = false;
7517           info->shift = 0;
7518         }
7519
7520       return true;
7521     }
7522
7523   /* Splat vector constant out into a byte vector.  */
7524   for (i = 0; i < n_elts; i++)
7525     {
7526       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7527          it must be laid out in the vector register in reverse order.  */
7528       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7529       unsigned HOST_WIDE_INT elpart;
7530       unsigned int part, parts;
7531
7532       if (GET_CODE (el) == CONST_INT)
7533         {
7534           elpart = INTVAL (el);
7535           parts = 1;
7536         }
7537       else if (GET_CODE (el) == CONST_DOUBLE)
7538         {
7539           elpart = CONST_DOUBLE_LOW (el);
7540           parts = 2;
7541         }
7542       else
7543         gcc_unreachable ();
7544
7545       for (part = 0; part < parts; part++)
7546         {
7547           unsigned int byte;
7548           for (byte = 0; byte < innersize; byte++)
7549             {
7550               bytes[idx++] = (elpart & 0xff) ^ invmask;
7551               elpart >>= BITS_PER_UNIT;
7552             }
7553           if (GET_CODE (el) == CONST_DOUBLE)
7554             elpart = CONST_DOUBLE_HIGH (el);
7555         }
7556     }
7557
7558   /* Sanity check.  */
7559   gcc_assert (idx == GET_MODE_SIZE (mode));
7560
7561   do
7562     {
7563       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7564              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7565
7566       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7567              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7568
7569       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7570              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7571
7572       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7573              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7574
7575       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7576
7577       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7578
7579       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7580              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7581
7582       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7583              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7584
7585       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7586              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7587
7588       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7589              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7590
7591       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7592
7593       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7594
7595       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7596              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7597
7598       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7599              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7600
7601       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7602              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7603
7604       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7605              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7606
7607       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7608
7609       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7610              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7611     }
7612   while (0);
7613
7614   if (immtype == -1)
7615     return false;
7616
7617   if (info)
7618     {
7619       info->element_width = elsize;
7620       info->mvn = emvn != 0;
7621       info->shift = eshift;
7622
7623       unsigned HOST_WIDE_INT imm = 0;
7624
7625       if (immtype >= 12 && immtype <= 15)
7626         info->msl = true;
7627
7628       /* Un-invert bytes of recognized vector, if necessary.  */
7629       if (invmask != 0)
7630         for (i = 0; i < idx; i++)
7631           bytes[i] ^= invmask;
7632
7633       if (immtype == 17)
7634         {
7635           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7636           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7637
7638           for (i = 0; i < 8; i++)
7639             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7640               << (i * BITS_PER_UNIT);
7641
7642
7643           info->value = GEN_INT (imm);
7644         }
7645       else
7646         {
7647           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7648             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7649
7650           /* Construct 'abcdefgh' because the assembler cannot handle
7651              generic constants.  */
7652           if (info->mvn)
7653             imm = ~imm;
7654           imm = (imm >> info->shift) & 0xff;
7655           info->value = GEN_INT (imm);
7656         }
7657     }
7658
7659   return true;
7660 #undef CHECK
7661 }
7662
7663 static bool
7664 aarch64_const_vec_all_same_int_p (rtx x,
7665                                   HOST_WIDE_INT minval,
7666                                   HOST_WIDE_INT maxval)
7667 {
7668   HOST_WIDE_INT firstval;
7669   int count, i;
7670
7671   if (GET_CODE (x) != CONST_VECTOR
7672       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7673     return false;
7674
7675   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7676   if (firstval < minval || firstval > maxval)
7677     return false;
7678
7679   count = CONST_VECTOR_NUNITS (x);
7680   for (i = 1; i < count; i++)
7681     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7682       return false;
7683
7684   return true;
7685 }
7686
7687 /* Check of immediate shift constants are within range.  */
7688 bool
7689 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7690 {
7691   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7692   if (left)
7693     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7694   else
7695     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7696 }
7697
7698 /* Return true if X is a uniform vector where all elements
7699    are either the floating-point constant 0.0 or the
7700    integer constant 0.  */
7701 bool
7702 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7703 {
7704   return x == CONST0_RTX (mode);
7705 }
7706
7707 bool
7708 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7709 {
7710   HOST_WIDE_INT imm = INTVAL (x);
7711   int i;
7712
7713   for (i = 0; i < 8; i++)
7714     {
7715       unsigned int byte = imm & 0xff;
7716       if (byte != 0xff && byte != 0)
7717        return false;
7718       imm >>= 8;
7719     }
7720
7721   return true;
7722 }
7723
7724 bool
7725 aarch64_mov_operand_p (rtx x,
7726                        enum aarch64_symbol_context context,
7727                        enum machine_mode mode)
7728 {
7729   if (GET_CODE (x) == HIGH
7730       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7731     return true;
7732
7733   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7734     return true;
7735
7736   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7737     return true;
7738
7739   return aarch64_classify_symbolic_expression (x, context)
7740     == SYMBOL_TINY_ABSOLUTE;
7741 }
7742
7743 /* Return a const_int vector of VAL.  */
7744 rtx
7745 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7746 {
7747   int nunits = GET_MODE_NUNITS (mode);
7748   rtvec v = rtvec_alloc (nunits);
7749   int i;
7750
7751   for (i=0; i < nunits; i++)
7752     RTVEC_ELT (v, i) = GEN_INT (val);
7753
7754   return gen_rtx_CONST_VECTOR (mode, v);
7755 }
7756
7757 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7758
7759 bool
7760 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7761 {
7762   enum machine_mode vmode;
7763
7764   gcc_assert (!VECTOR_MODE_P (mode));
7765   vmode = aarch64_preferred_simd_mode (mode);
7766   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7767   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7768 }
7769
7770 /* Construct and return a PARALLEL RTX vector.  */
7771 rtx
7772 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7773 {
7774   int nunits = GET_MODE_NUNITS (mode);
7775   rtvec v = rtvec_alloc (nunits / 2);
7776   int base = high ? nunits / 2 : 0;
7777   rtx t1;
7778   int i;
7779
7780   for (i=0; i < nunits / 2; i++)
7781     RTVEC_ELT (v, i) = GEN_INT (base + i);
7782
7783   t1 = gen_rtx_PARALLEL (mode, v);
7784   return t1;
7785 }
7786
7787 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7788    HIGH (exclusive).  */
7789 void
7790 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7791 {
7792   HOST_WIDE_INT lane;
7793   gcc_assert (GET_CODE (operand) == CONST_INT);
7794   lane = INTVAL (operand);
7795
7796   if (lane < low || lane >= high)
7797     error ("lane out of range");
7798 }
7799
7800 void
7801 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7802 {
7803   gcc_assert (GET_CODE (operand) == CONST_INT);
7804   HOST_WIDE_INT lane = INTVAL (operand);
7805
7806   if (lane < low || lane >= high)
7807     error ("constant out of range");
7808 }
7809
7810 /* Emit code to reinterpret one AdvSIMD type as another,
7811    without altering bits.  */
7812 void
7813 aarch64_simd_reinterpret (rtx dest, rtx src)
7814 {
7815   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7816 }
7817
7818 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7819    registers).  */
7820 void
7821 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7822                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7823                             rtx op1)
7824 {
7825   rtx mem = gen_rtx_MEM (mode, destaddr);
7826   rtx tmp1 = gen_reg_rtx (mode);
7827   rtx tmp2 = gen_reg_rtx (mode);
7828
7829   emit_insn (intfn (tmp1, op1, tmp2));
7830
7831   emit_move_insn (mem, tmp1);
7832   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7833   emit_move_insn (mem, tmp2);
7834 }
7835
7836 /* Return TRUE if OP is a valid vector addressing mode.  */
7837 bool
7838 aarch64_simd_mem_operand_p (rtx op)
7839 {
7840   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7841                         || GET_CODE (XEXP (op, 0)) == REG);
7842 }
7843
7844 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7845    not to early-clobber SRC registers in the process.
7846
7847    We assume that the operands described by SRC and DEST represent a
7848    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7849    number of components into which the copy has been decomposed.  */
7850 void
7851 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7852                                 rtx *src, unsigned int count)
7853 {
7854   unsigned int i;
7855
7856   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7857       || REGNO (operands[0]) < REGNO (operands[1]))
7858     {
7859       for (i = 0; i < count; i++)
7860         {
7861           operands[2 * i] = dest[i];
7862           operands[2 * i + 1] = src[i];
7863         }
7864     }
7865   else
7866     {
7867       for (i = 0; i < count; i++)
7868         {
7869           operands[2 * i] = dest[count - i - 1];
7870           operands[2 * i + 1] = src[count - i - 1];
7871         }
7872     }
7873 }
7874
7875 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7876    one of VSTRUCT modes: OI, CI or XI.  */
7877 int
7878 aarch64_simd_attr_length_move (rtx insn)
7879 {
7880   enum machine_mode mode;
7881
7882   extract_insn_cached (insn);
7883
7884   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7885     {
7886       mode = GET_MODE (recog_data.operand[0]);
7887       switch (mode)
7888         {
7889         case OImode:
7890           return 8;
7891         case CImode:
7892           return 12;
7893         case XImode:
7894           return 16;
7895         default:
7896           gcc_unreachable ();
7897         }
7898     }
7899   return 4;
7900 }
7901
7902 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7903    alignment of a vector to 128 bits.  */
7904 static HOST_WIDE_INT
7905 aarch64_simd_vector_alignment (const_tree type)
7906 {
7907   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7908   return MIN (align, 128);
7909 }
7910
7911 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7912 static bool
7913 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7914 {
7915   if (is_packed)
7916     return false;
7917
7918   /* We guarantee alignment for vectors up to 128-bits.  */
7919   if (tree_int_cst_compare (TYPE_SIZE (type),
7920                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7921     return false;
7922
7923   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7924   return true;
7925 }
7926
7927 /* If VALS is a vector constant that can be loaded into a register
7928    using DUP, generate instructions to do so and return an RTX to
7929    assign to the register.  Otherwise return NULL_RTX.  */
7930 static rtx
7931 aarch64_simd_dup_constant (rtx vals)
7932 {
7933   enum machine_mode mode = GET_MODE (vals);
7934   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7935   int n_elts = GET_MODE_NUNITS (mode);
7936   bool all_same = true;
7937   rtx x;
7938   int i;
7939
7940   if (GET_CODE (vals) != CONST_VECTOR)
7941     return NULL_RTX;
7942
7943   for (i = 1; i < n_elts; ++i)
7944     {
7945       x = CONST_VECTOR_ELT (vals, i);
7946       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7947         all_same = false;
7948     }
7949
7950   if (!all_same)
7951     return NULL_RTX;
7952
7953   /* We can load this constant by using DUP and a constant in a
7954      single ARM register.  This will be cheaper than a vector
7955      load.  */
7956   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7957   return gen_rtx_VEC_DUPLICATE (mode, x);
7958 }
7959
7960
7961 /* Generate code to load VALS, which is a PARALLEL containing only
7962    constants (for vec_init) or CONST_VECTOR, efficiently into a
7963    register.  Returns an RTX to copy into the register, or NULL_RTX
7964    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7965 static rtx
7966 aarch64_simd_make_constant (rtx vals)
7967 {
7968   enum machine_mode mode = GET_MODE (vals);
7969   rtx const_dup;
7970   rtx const_vec = NULL_RTX;
7971   int n_elts = GET_MODE_NUNITS (mode);
7972   int n_const = 0;
7973   int i;
7974
7975   if (GET_CODE (vals) == CONST_VECTOR)
7976     const_vec = vals;
7977   else if (GET_CODE (vals) == PARALLEL)
7978     {
7979       /* A CONST_VECTOR must contain only CONST_INTs and
7980          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7981          Only store valid constants in a CONST_VECTOR.  */
7982       for (i = 0; i < n_elts; ++i)
7983         {
7984           rtx x = XVECEXP (vals, 0, i);
7985           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7986             n_const++;
7987         }
7988       if (n_const == n_elts)
7989         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7990     }
7991   else
7992     gcc_unreachable ();
7993
7994   if (const_vec != NULL_RTX
7995       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7996     /* Load using MOVI/MVNI.  */
7997     return const_vec;
7998   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7999     /* Loaded using DUP.  */
8000     return const_dup;
8001   else if (const_vec != NULL_RTX)
8002     /* Load from constant pool. We can not take advantage of single-cycle
8003        LD1 because we need a PC-relative addressing mode.  */
8004     return const_vec;
8005   else
8006     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8007        We can not construct an initializer.  */
8008     return NULL_RTX;
8009 }
8010
8011 void
8012 aarch64_expand_vector_init (rtx target, rtx vals)
8013 {
8014   enum machine_mode mode = GET_MODE (target);
8015   enum machine_mode inner_mode = GET_MODE_INNER (mode);
8016   int n_elts = GET_MODE_NUNITS (mode);
8017   int n_var = 0, one_var = -1;
8018   bool all_same = true;
8019   rtx x, mem;
8020   int i;
8021
8022   x = XVECEXP (vals, 0, 0);
8023   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8024     n_var = 1, one_var = 0;
8025
8026   for (i = 1; i < n_elts; ++i)
8027     {
8028       x = XVECEXP (vals, 0, i);
8029       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8030         ++n_var, one_var = i;
8031
8032       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8033         all_same = false;
8034     }
8035
8036   if (n_var == 0)
8037     {
8038       rtx constant = aarch64_simd_make_constant (vals);
8039       if (constant != NULL_RTX)
8040         {
8041           emit_move_insn (target, constant);
8042           return;
8043         }
8044     }
8045
8046   /* Splat a single non-constant element if we can.  */
8047   if (all_same)
8048     {
8049       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8050       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8051       return;
8052     }
8053
8054   /* One field is non-constant.  Load constant then overwrite varying
8055      field.  This is more efficient than using the stack.  */
8056   if (n_var == 1)
8057     {
8058       rtx copy = copy_rtx (vals);
8059       rtx index = GEN_INT (one_var);
8060       enum insn_code icode;
8061
8062       /* Load constant part of vector, substitute neighboring value for
8063          varying element.  */
8064       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8065       aarch64_expand_vector_init (target, copy);
8066
8067       /* Insert variable.  */
8068       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8069       icode = optab_handler (vec_set_optab, mode);
8070       gcc_assert (icode != CODE_FOR_nothing);
8071       emit_insn (GEN_FCN (icode) (target, x, index));
8072       return;
8073     }
8074
8075   /* Construct the vector in memory one field at a time
8076      and load the whole vector.  */
8077   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8078   for (i = 0; i < n_elts; i++)
8079     emit_move_insn (adjust_address_nv (mem, inner_mode,
8080                                     i * GET_MODE_SIZE (inner_mode)),
8081                     XVECEXP (vals, 0, i));
8082   emit_move_insn (target, mem);
8083
8084 }
8085
8086 static unsigned HOST_WIDE_INT
8087 aarch64_shift_truncation_mask (enum machine_mode mode)
8088 {
8089   return
8090     (aarch64_vector_mode_supported_p (mode)
8091      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8092 }
8093
8094 #ifndef TLS_SECTION_ASM_FLAG
8095 #define TLS_SECTION_ASM_FLAG 'T'
8096 #endif
8097
8098 void
8099 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8100                                tree decl ATTRIBUTE_UNUSED)
8101 {
8102   char flagchars[10], *f = flagchars;
8103
8104   /* If we have already declared this section, we can use an
8105      abbreviated form to switch back to it -- unless this section is
8106      part of a COMDAT groups, in which case GAS requires the full
8107      declaration every time.  */
8108   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8109       && (flags & SECTION_DECLARED))
8110     {
8111       fprintf (asm_out_file, "\t.section\t%s\n", name);
8112       return;
8113     }
8114
8115   if (!(flags & SECTION_DEBUG))
8116     *f++ = 'a';
8117   if (flags & SECTION_WRITE)
8118     *f++ = 'w';
8119   if (flags & SECTION_CODE)
8120     *f++ = 'x';
8121   if (flags & SECTION_SMALL)
8122     *f++ = 's';
8123   if (flags & SECTION_MERGE)
8124     *f++ = 'M';
8125   if (flags & SECTION_STRINGS)
8126     *f++ = 'S';
8127   if (flags & SECTION_TLS)
8128     *f++ = TLS_SECTION_ASM_FLAG;
8129   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8130     *f++ = 'G';
8131   *f = '\0';
8132
8133   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8134
8135   if (!(flags & SECTION_NOTYPE))
8136     {
8137       const char *type;
8138       const char *format;
8139
8140       if (flags & SECTION_BSS)
8141         type = "nobits";
8142       else
8143         type = "progbits";
8144
8145 #ifdef TYPE_OPERAND_FMT
8146       format = "," TYPE_OPERAND_FMT;
8147 #else
8148       format = ",@%s";
8149 #endif
8150
8151       fprintf (asm_out_file, format, type);
8152
8153       if (flags & SECTION_ENTSIZE)
8154         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8155       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8156         {
8157           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8158             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8159           else
8160             fprintf (asm_out_file, ",%s,comdat",
8161                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8162         }
8163     }
8164
8165   putc ('\n', asm_out_file);
8166 }
8167
8168 /* Select a format to encode pointers in exception handling data.  */
8169 int
8170 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8171 {
8172    int type;
8173    switch (aarch64_cmodel)
8174      {
8175      case AARCH64_CMODEL_TINY:
8176      case AARCH64_CMODEL_TINY_PIC:
8177      case AARCH64_CMODEL_SMALL:
8178      case AARCH64_CMODEL_SMALL_PIC:
8179        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8180           for everything.  */
8181        type = DW_EH_PE_sdata4;
8182        break;
8183      default:
8184        /* No assumptions here.  8-byte relocs required.  */
8185        type = DW_EH_PE_sdata8;
8186        break;
8187      }
8188    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8189 }
8190
8191 /* Emit load exclusive.  */
8192
8193 static void
8194 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8195                              rtx mem, rtx model_rtx)
8196 {
8197   rtx (*gen) (rtx, rtx, rtx);
8198
8199   switch (mode)
8200     {
8201     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8202     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8203     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8204     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8205     default:
8206       gcc_unreachable ();
8207     }
8208
8209   emit_insn (gen (rval, mem, model_rtx));
8210 }
8211
8212 /* Emit store exclusive.  */
8213
8214 static void
8215 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8216                               rtx rval, rtx mem, rtx model_rtx)
8217 {
8218   rtx (*gen) (rtx, rtx, rtx, rtx);
8219
8220   switch (mode)
8221     {
8222     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8223     case HImode: gen = gen_aarch64_store_exclusivehi; break;
8224     case SImode: gen = gen_aarch64_store_exclusivesi; break;
8225     case DImode: gen = gen_aarch64_store_exclusivedi; break;
8226     default:
8227       gcc_unreachable ();
8228     }
8229
8230   emit_insn (gen (bval, rval, mem, model_rtx));
8231 }
8232
8233 /* Mark the previous jump instruction as unlikely.  */
8234
8235 static void
8236 aarch64_emit_unlikely_jump (rtx insn)
8237 {
8238   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8239
8240   insn = emit_jump_insn (insn);
8241   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8242 }
8243
8244 /* Expand a compare and swap pattern.  */
8245
8246 void
8247 aarch64_expand_compare_and_swap (rtx operands[])
8248 {
8249   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8250   enum machine_mode mode, cmp_mode;
8251   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8252
8253   bval = operands[0];
8254   rval = operands[1];
8255   mem = operands[2];
8256   oldval = operands[3];
8257   newval = operands[4];
8258   is_weak = operands[5];
8259   mod_s = operands[6];
8260   mod_f = operands[7];
8261   mode = GET_MODE (mem);
8262   cmp_mode = mode;
8263
8264   /* Normally the succ memory model must be stronger than fail, but in the
8265      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8266      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
8267
8268   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8269       && INTVAL (mod_s) == MEMMODEL_RELEASE)
8270     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8271
8272   switch (mode)
8273     {
8274     case QImode:
8275     case HImode:
8276       /* For short modes, we're going to perform the comparison in SImode,
8277          so do the zero-extension now.  */
8278       cmp_mode = SImode;
8279       rval = gen_reg_rtx (SImode);
8280       oldval = convert_modes (SImode, mode, oldval, true);
8281       /* Fall through.  */
8282
8283     case SImode:
8284     case DImode:
8285       /* Force the value into a register if needed.  */
8286       if (!aarch64_plus_operand (oldval, mode))
8287         oldval = force_reg (cmp_mode, oldval);
8288       break;
8289
8290     default:
8291       gcc_unreachable ();
8292     }
8293
8294   switch (mode)
8295     {
8296     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8297     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8298     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8299     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8300     default:
8301       gcc_unreachable ();
8302     }
8303
8304   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8305
8306   if (mode == QImode || mode == HImode)
8307     emit_move_insn (operands[1], gen_lowpart (mode, rval));
8308
8309   x = gen_rtx_REG (CCmode, CC_REGNUM);
8310   x = gen_rtx_EQ (SImode, x, const0_rtx);
8311   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8312 }
8313
8314 /* Split a compare and swap pattern.  */
8315
8316 void
8317 aarch64_split_compare_and_swap (rtx operands[])
8318 {
8319   rtx rval, mem, oldval, newval, scratch;
8320   enum machine_mode mode;
8321   bool is_weak;
8322   rtx label1, label2, x, cond;
8323
8324   rval = operands[0];
8325   mem = operands[1];
8326   oldval = operands[2];
8327   newval = operands[3];
8328   is_weak = (operands[4] != const0_rtx);
8329   scratch = operands[7];
8330   mode = GET_MODE (mem);
8331
8332   label1 = NULL_RTX;
8333   if (!is_weak)
8334     {
8335       label1 = gen_label_rtx ();
8336       emit_label (label1);
8337     }
8338   label2 = gen_label_rtx ();
8339
8340   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8341
8342   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8343   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8344   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8345                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8346   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8347
8348   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8349
8350   if (!is_weak)
8351     {
8352       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8353       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8354                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8355       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8356     }
8357   else
8358     {
8359       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8360       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8361       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8362     }
8363
8364   emit_label (label2);
8365 }
8366
8367 /* Split an atomic operation.  */
8368
8369 void
8370 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8371                      rtx value, rtx model_rtx, rtx cond)
8372 {
8373   enum machine_mode mode = GET_MODE (mem);
8374   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8375   rtx label, x;
8376
8377   label = gen_label_rtx ();
8378   emit_label (label);
8379
8380   if (new_out)
8381     new_out = gen_lowpart (wmode, new_out);
8382   if (old_out)
8383     old_out = gen_lowpart (wmode, old_out);
8384   else
8385     old_out = new_out;
8386   value = simplify_gen_subreg (wmode, value, mode, 0);
8387
8388   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8389
8390   switch (code)
8391     {
8392     case SET:
8393       new_out = value;
8394       break;
8395
8396     case NOT:
8397       x = gen_rtx_AND (wmode, old_out, value);
8398       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8399       x = gen_rtx_NOT (wmode, new_out);
8400       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8401       break;
8402
8403     case MINUS:
8404       if (CONST_INT_P (value))
8405         {
8406           value = GEN_INT (-INTVAL (value));
8407           code = PLUS;
8408         }
8409       /* Fall through.  */
8410
8411     default:
8412       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8413       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8414       break;
8415     }
8416
8417   aarch64_emit_store_exclusive (mode, cond, mem,
8418                                 gen_lowpart (mode, new_out), model_rtx);
8419
8420   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8421   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8422                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8423   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8424 }
8425
8426 static void
8427 aarch64_print_extension (void)
8428 {
8429   const struct aarch64_option_extension *opt = NULL;
8430
8431   for (opt = all_extensions; opt->name != NULL; opt++)
8432     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8433       asm_fprintf (asm_out_file, "+%s", opt->name);
8434
8435   asm_fprintf (asm_out_file, "\n");
8436 }
8437
8438 static void
8439 aarch64_start_file (void)
8440 {
8441   if (selected_arch)
8442     {
8443       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8444       aarch64_print_extension ();
8445     }
8446   else if (selected_cpu)
8447     {
8448       const char *truncated_name
8449             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8450       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8451       aarch64_print_extension ();
8452     }
8453   default_file_start();
8454 }
8455
8456 /* Target hook for c_mode_for_suffix.  */
8457 static enum machine_mode
8458 aarch64_c_mode_for_suffix (char suffix)
8459 {
8460   if (suffix == 'q')
8461     return TFmode;
8462
8463   return VOIDmode;
8464 }
8465
8466 /* We can only represent floating point constants which will fit in
8467    "quarter-precision" values.  These values are characterised by
8468    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8469    by:
8470
8471    (-1)^s * (n/16) * 2^r
8472
8473    Where:
8474      's' is the sign bit.
8475      'n' is an integer in the range 16 <= n <= 31.
8476      'r' is an integer in the range -3 <= r <= 4.  */
8477
8478 /* Return true iff X can be represented by a quarter-precision
8479    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8480 bool
8481 aarch64_float_const_representable_p (rtx x)
8482 {
8483   /* This represents our current view of how many bits
8484      make up the mantissa.  */
8485   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8486   int exponent;
8487   unsigned HOST_WIDE_INT mantissa, mask;
8488   HOST_WIDE_INT m1, m2;
8489   REAL_VALUE_TYPE r, m;
8490
8491   if (!CONST_DOUBLE_P (x))
8492     return false;
8493
8494   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8495
8496   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8497      know if we have +zero until we analyse the mantissa, but we
8498      can reject the other invalid values.  */
8499   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8500       || REAL_VALUE_MINUS_ZERO (r))
8501     return false;
8502
8503   /* Extract exponent.  */
8504   r = real_value_abs (&r);
8505   exponent = REAL_EXP (&r);
8506
8507   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8508      highest (sign) bit, with a fixed binary point at bit point_pos.
8509      m1 holds the low part of the mantissa, m2 the high part.
8510      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8511      bits for the mantissa, this can fail (low bits will be lost).  */
8512   real_ldexp (&m, &r, point_pos - exponent);
8513   REAL_VALUE_TO_INT (&m1, &m2, m);
8514
8515   /* If the low part of the mantissa has bits set we cannot represent
8516      the value.  */
8517   if (m1 != 0)
8518     return false;
8519   /* We have rejected the lower HOST_WIDE_INT, so update our
8520      understanding of how many bits lie in the mantissa and
8521      look only at the high HOST_WIDE_INT.  */
8522   mantissa = m2;
8523   point_pos -= HOST_BITS_PER_WIDE_INT;
8524
8525   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8526   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8527   if ((mantissa & mask) != 0)
8528     return false;
8529
8530   /* Having filtered unrepresentable values, we may now remove all
8531      but the highest 5 bits.  */
8532   mantissa >>= point_pos - 5;
8533
8534   /* We cannot represent the value 0.0, so reject it.  This is handled
8535      elsewhere.  */
8536   if (mantissa == 0)
8537     return false;
8538
8539   /* Then, as bit 4 is always set, we can mask it off, leaving
8540      the mantissa in the range [0, 15].  */
8541   mantissa &= ~(1 << 4);
8542   gcc_assert (mantissa <= 15);
8543
8544   /* GCC internally does not use IEEE754-like encoding (where normalized
8545      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8546      Our mantissa values are shifted 4 places to the left relative to
8547      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8548      by 5 places to correct for GCC's representation.  */
8549   exponent = 5 - exponent;
8550
8551   return (exponent >= 0 && exponent <= 7);
8552 }
8553
8554 char*
8555 aarch64_output_simd_mov_immediate (rtx const_vector,
8556                                    enum machine_mode mode,
8557                                    unsigned width)
8558 {
8559   bool is_valid;
8560   static char templ[40];
8561   const char *mnemonic;
8562   const char *shift_op;
8563   unsigned int lane_count = 0;
8564   char element_char;
8565
8566   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8567
8568   /* This will return true to show const_vector is legal for use as either
8569      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8570      also update INFO to show how the immediate should be generated.  */
8571   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8572   gcc_assert (is_valid);
8573
8574   element_char = sizetochar (info.element_width);
8575   lane_count = width / info.element_width;
8576
8577   mode = GET_MODE_INNER (mode);
8578   if (mode == SFmode || mode == DFmode)
8579     {
8580       gcc_assert (info.shift == 0 && ! info.mvn);
8581       if (aarch64_float_const_zero_rtx_p (info.value))
8582         info.value = GEN_INT (0);
8583       else
8584         {
8585 #define buf_size 20
8586           REAL_VALUE_TYPE r;
8587           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8588           char float_buf[buf_size] = {'\0'};
8589           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8590 #undef buf_size
8591
8592           if (lane_count == 1)
8593             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8594           else
8595             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8596                       lane_count, element_char, float_buf);
8597           return templ;
8598         }
8599     }
8600
8601   mnemonic = info.mvn ? "mvni" : "movi";
8602   shift_op = info.msl ? "msl" : "lsl";
8603
8604   if (lane_count == 1)
8605     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8606               mnemonic, UINTVAL (info.value));
8607   else if (info.shift)
8608     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8609               ", %s %d", mnemonic, lane_count, element_char,
8610               UINTVAL (info.value), shift_op, info.shift);
8611   else
8612     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8613               mnemonic, lane_count, element_char, UINTVAL (info.value));
8614   return templ;
8615 }
8616
8617 char*
8618 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8619                                           enum machine_mode mode)
8620 {
8621   enum machine_mode vmode;
8622
8623   gcc_assert (!VECTOR_MODE_P (mode));
8624   vmode = aarch64_simd_container_mode (mode, 64);
8625   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8626   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8627 }
8628
8629 /* Split operands into moves from op[1] + op[2] into op[0].  */
8630
8631 void
8632 aarch64_split_combinev16qi (rtx operands[3])
8633 {
8634   unsigned int dest = REGNO (operands[0]);
8635   unsigned int src1 = REGNO (operands[1]);
8636   unsigned int src2 = REGNO (operands[2]);
8637   enum machine_mode halfmode = GET_MODE (operands[1]);
8638   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8639   rtx destlo, desthi;
8640
8641   gcc_assert (halfmode == V16QImode);
8642
8643   if (src1 == dest && src2 == dest + halfregs)
8644     {
8645       /* No-op move.  Can't split to nothing; emit something.  */
8646       emit_note (NOTE_INSN_DELETED);
8647       return;
8648     }
8649
8650   /* Preserve register attributes for variable tracking.  */
8651   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8652   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8653                                GET_MODE_SIZE (halfmode));
8654
8655   /* Special case of reversed high/low parts.  */
8656   if (reg_overlap_mentioned_p (operands[2], destlo)
8657       && reg_overlap_mentioned_p (operands[1], desthi))
8658     {
8659       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8660       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8661       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8662     }
8663   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8664     {
8665       /* Try to avoid unnecessary moves if part of the result
8666          is in the right place already.  */
8667       if (src1 != dest)
8668         emit_move_insn (destlo, operands[1]);
8669       if (src2 != dest + halfregs)
8670         emit_move_insn (desthi, operands[2]);
8671     }
8672   else
8673     {
8674       if (src2 != dest + halfregs)
8675         emit_move_insn (desthi, operands[2]);
8676       if (src1 != dest)
8677         emit_move_insn (destlo, operands[1]);
8678     }
8679 }
8680
8681 /* vec_perm support.  */
8682
8683 #define MAX_VECT_LEN 16
8684
8685 struct expand_vec_perm_d
8686 {
8687   rtx target, op0, op1;
8688   unsigned char perm[MAX_VECT_LEN];
8689   enum machine_mode vmode;
8690   unsigned char nelt;
8691   bool one_vector_p;
8692   bool testing_p;
8693 };
8694
8695 /* Generate a variable permutation.  */
8696
8697 static void
8698 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8699 {
8700   enum machine_mode vmode = GET_MODE (target);
8701   bool one_vector_p = rtx_equal_p (op0, op1);
8702
8703   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8704   gcc_checking_assert (GET_MODE (op0) == vmode);
8705   gcc_checking_assert (GET_MODE (op1) == vmode);
8706   gcc_checking_assert (GET_MODE (sel) == vmode);
8707   gcc_checking_assert (TARGET_SIMD);
8708
8709   if (one_vector_p)
8710     {
8711       if (vmode == V8QImode)
8712         {
8713           /* Expand the argument to a V16QI mode by duplicating it.  */
8714           rtx pair = gen_reg_rtx (V16QImode);
8715           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8716           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8717         }
8718       else
8719         {
8720           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8721         }
8722     }
8723   else
8724     {
8725       rtx pair;
8726
8727       if (vmode == V8QImode)
8728         {
8729           pair = gen_reg_rtx (V16QImode);
8730           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8731           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8732         }
8733       else
8734         {
8735           pair = gen_reg_rtx (OImode);
8736           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8737           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8738         }
8739     }
8740 }
8741
8742 void
8743 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8744 {
8745   enum machine_mode vmode = GET_MODE (target);
8746   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8747   bool one_vector_p = rtx_equal_p (op0, op1);
8748   rtx rmask[MAX_VECT_LEN], mask;
8749
8750   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8751
8752   /* The TBL instruction does not use a modulo index, so we must take care
8753      of that ourselves.  */
8754   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8755   for (i = 0; i < nelt; ++i)
8756     rmask[i] = mask;
8757   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8758   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8759
8760   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8761 }
8762
8763 /* Recognize patterns suitable for the TRN instructions.  */
8764 static bool
8765 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8766 {
8767   unsigned int i, odd, mask, nelt = d->nelt;
8768   rtx out, in0, in1, x;
8769   rtx (*gen) (rtx, rtx, rtx);
8770   enum machine_mode vmode = d->vmode;
8771
8772   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8773     return false;
8774
8775   /* Note that these are little-endian tests.
8776      We correct for big-endian later.  */
8777   if (d->perm[0] == 0)
8778     odd = 0;
8779   else if (d->perm[0] == 1)
8780     odd = 1;
8781   else
8782     return false;
8783   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8784
8785   for (i = 0; i < nelt; i += 2)
8786     {
8787       if (d->perm[i] != i + odd)
8788         return false;
8789       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8790         return false;
8791     }
8792
8793   /* Success!  */
8794   if (d->testing_p)
8795     return true;
8796
8797   in0 = d->op0;
8798   in1 = d->op1;
8799   if (BYTES_BIG_ENDIAN)
8800     {
8801       x = in0, in0 = in1, in1 = x;
8802       odd = !odd;
8803     }
8804   out = d->target;
8805
8806   if (odd)
8807     {
8808       switch (vmode)
8809         {
8810         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8811         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8812         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8813         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8814         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8815         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8816         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8817         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8818         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8819         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8820         default:
8821           return false;
8822         }
8823     }
8824   else
8825     {
8826       switch (vmode)
8827         {
8828         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8829         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8830         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8831         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8832         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8833         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8834         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8835         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8836         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8837         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8838         default:
8839           return false;
8840         }
8841     }
8842
8843   emit_insn (gen (out, in0, in1));
8844   return true;
8845 }
8846
8847 /* Recognize patterns suitable for the UZP instructions.  */
8848 static bool
8849 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8850 {
8851   unsigned int i, odd, mask, nelt = d->nelt;
8852   rtx out, in0, in1, x;
8853   rtx (*gen) (rtx, rtx, rtx);
8854   enum machine_mode vmode = d->vmode;
8855
8856   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8857     return false;
8858
8859   /* Note that these are little-endian tests.
8860      We correct for big-endian later.  */
8861   if (d->perm[0] == 0)
8862     odd = 0;
8863   else if (d->perm[0] == 1)
8864     odd = 1;
8865   else
8866     return false;
8867   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8868
8869   for (i = 0; i < nelt; i++)
8870     {
8871       unsigned elt = (i * 2 + odd) & mask;
8872       if (d->perm[i] != elt)
8873         return false;
8874     }
8875
8876   /* Success!  */
8877   if (d->testing_p)
8878     return true;
8879
8880   in0 = d->op0;
8881   in1 = d->op1;
8882   if (BYTES_BIG_ENDIAN)
8883     {
8884       x = in0, in0 = in1, in1 = x;
8885       odd = !odd;
8886     }
8887   out = d->target;
8888
8889   if (odd)
8890     {
8891       switch (vmode)
8892         {
8893         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8894         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8895         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8896         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8897         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8898         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8899         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8900         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8901         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8902         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8903         default:
8904           return false;
8905         }
8906     }
8907   else
8908     {
8909       switch (vmode)
8910         {
8911         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8912         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8913         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8914         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8915         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8916         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8917         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8918         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8919         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8920         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8921         default:
8922           return false;
8923         }
8924     }
8925
8926   emit_insn (gen (out, in0, in1));
8927   return true;
8928 }
8929
8930 /* Recognize patterns suitable for the ZIP instructions.  */
8931 static bool
8932 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8933 {
8934   unsigned int i, high, mask, nelt = d->nelt;
8935   rtx out, in0, in1, x;
8936   rtx (*gen) (rtx, rtx, rtx);
8937   enum machine_mode vmode = d->vmode;
8938
8939   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8940     return false;
8941
8942   /* Note that these are little-endian tests.
8943      We correct for big-endian later.  */
8944   high = nelt / 2;
8945   if (d->perm[0] == high)
8946     /* Do Nothing.  */
8947     ;
8948   else if (d->perm[0] == 0)
8949     high = 0;
8950   else
8951     return false;
8952   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8953
8954   for (i = 0; i < nelt / 2; i++)
8955     {
8956       unsigned elt = (i + high) & mask;
8957       if (d->perm[i * 2] != elt)
8958         return false;
8959       elt = (elt + nelt) & mask;
8960       if (d->perm[i * 2 + 1] != elt)
8961         return false;
8962     }
8963
8964   /* Success!  */
8965   if (d->testing_p)
8966     return true;
8967
8968   in0 = d->op0;
8969   in1 = d->op1;
8970   if (BYTES_BIG_ENDIAN)
8971     {
8972       x = in0, in0 = in1, in1 = x;
8973       high = !high;
8974     }
8975   out = d->target;
8976
8977   if (high)
8978     {
8979       switch (vmode)
8980         {
8981         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8982         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8983         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8984         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8985         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8986         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8987         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8988         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8989         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8990         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8991         default:
8992           return false;
8993         }
8994     }
8995   else
8996     {
8997       switch (vmode)
8998         {
8999         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9000         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9001         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9002         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9003         case V4SImode: gen = gen_aarch64_zip1v4si; break;
9004         case V2SImode: gen = gen_aarch64_zip1v2si; break;
9005         case V2DImode: gen = gen_aarch64_zip1v2di; break;
9006         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9007         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9008         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9009         default:
9010           return false;
9011         }
9012     }
9013
9014   emit_insn (gen (out, in0, in1));
9015   return true;
9016 }
9017
9018 /* Recognize patterns for the EXT insn.  */
9019
9020 static bool
9021 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9022 {
9023   unsigned int i, nelt = d->nelt;
9024   rtx (*gen) (rtx, rtx, rtx, rtx);
9025   rtx offset;
9026
9027   unsigned int location = d->perm[0]; /* Always < nelt.  */
9028
9029   /* Check if the extracted indices are increasing by one.  */
9030   for (i = 1; i < nelt; i++)
9031     {
9032       unsigned int required = location + i;
9033       if (d->one_vector_p)
9034         {
9035           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9036           required &= (nelt - 1);
9037         }
9038       if (d->perm[i] != required)
9039         return false;
9040     }
9041
9042   switch (d->vmode)
9043     {
9044     case V16QImode: gen = gen_aarch64_extv16qi; break;
9045     case V8QImode: gen = gen_aarch64_extv8qi; break;
9046     case V4HImode: gen = gen_aarch64_extv4hi; break;
9047     case V8HImode: gen = gen_aarch64_extv8hi; break;
9048     case V2SImode: gen = gen_aarch64_extv2si; break;
9049     case V4SImode: gen = gen_aarch64_extv4si; break;
9050     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9051     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9052     case V2DImode: gen = gen_aarch64_extv2di; break;
9053     case V2DFmode: gen = gen_aarch64_extv2df; break;
9054     default:
9055       return false;
9056     }
9057
9058   /* Success! */
9059   if (d->testing_p)
9060     return true;
9061
9062   /* The case where (location == 0) is a no-op for both big- and little-endian,
9063      and is removed by the mid-end at optimization levels -O1 and higher.  */
9064
9065   if (BYTES_BIG_ENDIAN && (location != 0))
9066     {
9067       /* After setup, we want the high elements of the first vector (stored
9068          at the LSB end of the register), and the low elements of the second
9069          vector (stored at the MSB end of the register). So swap.  */
9070       rtx temp = d->op0;
9071       d->op0 = d->op1;
9072       d->op1 = temp;
9073       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9074       location = nelt - location;
9075     }
9076
9077   offset = GEN_INT (location);
9078   emit_insn (gen (d->target, d->op0, d->op1, offset));
9079   return true;
9080 }
9081
9082 static bool
9083 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9084 {
9085   rtx (*gen) (rtx, rtx, rtx);
9086   rtx out = d->target;
9087   rtx in0;
9088   enum machine_mode vmode = d->vmode;
9089   unsigned int i, elt, nelt = d->nelt;
9090   rtx lane;
9091
9092   /* TODO: This may not be big-endian safe.  */
9093   if (BYTES_BIG_ENDIAN)
9094     return false;
9095
9096   elt = d->perm[0];
9097   for (i = 1; i < nelt; i++)
9098     {
9099       if (elt != d->perm[i])
9100         return false;
9101     }
9102
9103   /* The generic preparation in aarch64_expand_vec_perm_const_1
9104      swaps the operand order and the permute indices if it finds
9105      d->perm[0] to be in the second operand.  Thus, we can always
9106      use d->op0 and need not do any extra arithmetic to get the
9107      correct lane number.  */
9108   in0 = d->op0;
9109   lane = GEN_INT (elt);
9110
9111   switch (vmode)
9112     {
9113     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9114     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9115     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9116     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9117     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9118     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9119     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9120     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9121     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9122     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9123     default:
9124       return false;
9125     }
9126
9127   emit_insn (gen (out, in0, lane));
9128   return true;
9129 }
9130
9131 static bool
9132 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9133 {
9134   rtx rperm[MAX_VECT_LEN], sel;
9135   enum machine_mode vmode = d->vmode;
9136   unsigned int i, nelt = d->nelt;
9137
9138   if (d->testing_p)
9139     return true;
9140
9141   /* Generic code will try constant permutation twice.  Once with the
9142      original mode and again with the elements lowered to QImode.
9143      So wait and don't do the selector expansion ourselves.  */
9144   if (vmode != V8QImode && vmode != V16QImode)
9145     return false;
9146
9147   for (i = 0; i < nelt; ++i)
9148     {
9149       int nunits = GET_MODE_NUNITS (vmode);
9150
9151       /* If big-endian and two vectors we end up with a weird mixed-endian
9152          mode on NEON.  Reverse the index within each word but not the word
9153          itself.  */
9154       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9155                                            : d->perm[i]);
9156     }
9157   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9158   sel = force_reg (vmode, sel);
9159
9160   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9161   return true;
9162 }
9163
9164 static bool
9165 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9166 {
9167   /* The pattern matching functions above are written to look for a small
9168      number to begin the sequence (0, 1, N/2).  If we begin with an index
9169      from the second operand, we can swap the operands.  */
9170   if (d->perm[0] >= d->nelt)
9171     {
9172       unsigned i, nelt = d->nelt;
9173       rtx x;
9174
9175       for (i = 0; i < nelt; ++i)
9176         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
9177
9178       x = d->op0;
9179       d->op0 = d->op1;
9180       d->op1 = x;
9181     }
9182
9183   if (TARGET_SIMD)
9184     {
9185       if (aarch64_evpc_ext (d))
9186         return true;
9187       else if (aarch64_evpc_zip (d))
9188         return true;
9189       else if (aarch64_evpc_uzp (d))
9190         return true;
9191       else if (aarch64_evpc_trn (d))
9192         return true;
9193       else if (aarch64_evpc_dup (d))
9194         return true;
9195       return aarch64_evpc_tbl (d);
9196     }
9197   return false;
9198 }
9199
9200 /* Expand a vec_perm_const pattern.  */
9201
9202 bool
9203 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9204 {
9205   struct expand_vec_perm_d d;
9206   int i, nelt, which;
9207
9208   d.target = target;
9209   d.op0 = op0;
9210   d.op1 = op1;
9211
9212   d.vmode = GET_MODE (target);
9213   gcc_assert (VECTOR_MODE_P (d.vmode));
9214   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9215   d.testing_p = false;
9216
9217   for (i = which = 0; i < nelt; ++i)
9218     {
9219       rtx e = XVECEXP (sel, 0, i);
9220       int ei = INTVAL (e) & (2 * nelt - 1);
9221       which |= (ei < nelt ? 1 : 2);
9222       d.perm[i] = ei;
9223     }
9224
9225   switch (which)
9226     {
9227     default:
9228       gcc_unreachable ();
9229
9230     case 3:
9231       d.one_vector_p = false;
9232       if (!rtx_equal_p (op0, op1))
9233         break;
9234
9235       /* The elements of PERM do not suggest that only the first operand
9236          is used, but both operands are identical.  Allow easier matching
9237          of the permutation by folding the permutation into the single
9238          input vector.  */
9239       /* Fall Through.  */
9240     case 2:
9241       for (i = 0; i < nelt; ++i)
9242         d.perm[i] &= nelt - 1;
9243       d.op0 = op1;
9244       d.one_vector_p = true;
9245       break;
9246
9247     case 1:
9248       d.op1 = op0;
9249       d.one_vector_p = true;
9250       break;
9251     }
9252
9253   return aarch64_expand_vec_perm_const_1 (&d);
9254 }
9255
9256 static bool
9257 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9258                                      const unsigned char *sel)
9259 {
9260   struct expand_vec_perm_d d;
9261   unsigned int i, nelt, which;
9262   bool ret;
9263
9264   d.vmode = vmode;
9265   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9266   d.testing_p = true;
9267   memcpy (d.perm, sel, nelt);
9268
9269   /* Calculate whether all elements are in one vector.  */
9270   for (i = which = 0; i < nelt; ++i)
9271     {
9272       unsigned char e = d.perm[i];
9273       gcc_assert (e < 2 * nelt);
9274       which |= (e < nelt ? 1 : 2);
9275     }
9276
9277   /* If all elements are from the second vector, reindex as if from the
9278      first vector.  */
9279   if (which == 2)
9280     for (i = 0; i < nelt; ++i)
9281       d.perm[i] -= nelt;
9282
9283   /* Check whether the mask can be applied to a single vector.  */
9284   d.one_vector_p = (which != 3);
9285
9286   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9287   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9288   if (!d.one_vector_p)
9289     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9290
9291   start_sequence ();
9292   ret = aarch64_expand_vec_perm_const_1 (&d);
9293   end_sequence ();
9294
9295   return ret;
9296 }
9297
9298 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
9299 bool
9300 aarch64_cannot_change_mode_class (enum machine_mode from,
9301                                   enum machine_mode to,
9302                                   enum reg_class rclass)
9303 {
9304   /* Full-reg subregs are allowed on general regs or any class if they are
9305      the same size.  */
9306   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9307       || !reg_classes_intersect_p (FP_REGS, rclass))
9308     return false;
9309
9310   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
9311      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9312      2. Scalar to Scalar for integer modes or same size float modes.
9313      3. Vector to Vector modes.
9314      4. On little-endian only, Vector-Structure to Vector modes.  */
9315   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9316     {
9317       if (aarch64_vector_mode_supported_p (from)
9318           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9319         return false;
9320
9321       if (GET_MODE_NUNITS (from) == 1
9322           && GET_MODE_NUNITS (to) == 1
9323           && (GET_MODE_CLASS (from) == MODE_INT
9324               || from == to))
9325         return false;
9326
9327       if (aarch64_vector_mode_supported_p (from)
9328           && aarch64_vector_mode_supported_p (to))
9329         return false;
9330
9331       /* Within an vector structure straddling multiple vector registers
9332          we are in a mixed-endian representation.  As such, we can't
9333          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
9334          switch between vectors and vector structures cheaply.  */
9335       if (!BYTES_BIG_ENDIAN)
9336         if ((aarch64_vector_mode_supported_p (from)
9337               && aarch64_vect_struct_mode_p (to))
9338             || (aarch64_vector_mode_supported_p (to)
9339               && aarch64_vect_struct_mode_p (from)))
9340           return false;
9341     }
9342
9343   return true;
9344 }
9345
9346 /* Implement MODES_TIEABLE_P.  */
9347
9348 bool
9349 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9350 {
9351   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9352     return true;
9353
9354   /* We specifically want to allow elements of "structure" modes to
9355      be tieable to the structure.  This more general condition allows
9356      other rarer situations too.  */
9357   if (TARGET_SIMD
9358       && aarch64_vector_mode_p (mode1)
9359       && aarch64_vector_mode_p (mode2))
9360     return true;
9361
9362   return false;
9363 }
9364
9365 #undef TARGET_ADDRESS_COST
9366 #define TARGET_ADDRESS_COST aarch64_address_cost
9367
9368 /* This hook will determines whether unnamed bitfields affect the alignment
9369    of the containing structure.  The hook returns true if the structure
9370    should inherit the alignment requirements of an unnamed bitfield's
9371    type.  */
9372 #undef TARGET_ALIGN_ANON_BITFIELD
9373 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9374
9375 #undef TARGET_ASM_ALIGNED_DI_OP
9376 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9377
9378 #undef TARGET_ASM_ALIGNED_HI_OP
9379 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9380
9381 #undef TARGET_ASM_ALIGNED_SI_OP
9382 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9383
9384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9386   hook_bool_const_tree_hwi_hwi_const_tree_true
9387
9388 #undef TARGET_ASM_FILE_START
9389 #define TARGET_ASM_FILE_START aarch64_start_file
9390
9391 #undef TARGET_ASM_OUTPUT_MI_THUNK
9392 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9393
9394 #undef TARGET_ASM_SELECT_RTX_SECTION
9395 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9396
9397 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9398 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9399
9400 #undef TARGET_BUILD_BUILTIN_VA_LIST
9401 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9402
9403 #undef TARGET_CALLEE_COPIES
9404 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9405
9406 #undef TARGET_CAN_ELIMINATE
9407 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9408
9409 #undef TARGET_CANNOT_FORCE_CONST_MEM
9410 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9411
9412 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9413 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9414
9415 /* Only the least significant bit is used for initialization guard
9416    variables.  */
9417 #undef TARGET_CXX_GUARD_MASK_BIT
9418 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9419
9420 #undef TARGET_C_MODE_FOR_SUFFIX
9421 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9422
9423 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9424 #undef  TARGET_DEFAULT_TARGET_FLAGS
9425 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9426 #endif
9427
9428 #undef TARGET_CLASS_MAX_NREGS
9429 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9430
9431 #undef TARGET_BUILTIN_DECL
9432 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9433
9434 #undef  TARGET_EXPAND_BUILTIN
9435 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9436
9437 #undef TARGET_EXPAND_BUILTIN_VA_START
9438 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9439
9440 #undef TARGET_FOLD_BUILTIN
9441 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9442
9443 #undef TARGET_FUNCTION_ARG
9444 #define TARGET_FUNCTION_ARG aarch64_function_arg
9445
9446 #undef TARGET_FUNCTION_ARG_ADVANCE
9447 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9448
9449 #undef TARGET_FUNCTION_ARG_BOUNDARY
9450 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9451
9452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9453 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9454
9455 #undef TARGET_FUNCTION_VALUE
9456 #define TARGET_FUNCTION_VALUE aarch64_function_value
9457
9458 #undef TARGET_FUNCTION_VALUE_REGNO_P
9459 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9460
9461 #undef TARGET_FRAME_POINTER_REQUIRED
9462 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9463
9464 #undef TARGET_GIMPLE_FOLD_BUILTIN
9465 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9466
9467 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9468 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9469
9470 #undef  TARGET_INIT_BUILTINS
9471 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9472
9473 #undef TARGET_LEGITIMATE_ADDRESS_P
9474 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9475
9476 #undef TARGET_LEGITIMATE_CONSTANT_P
9477 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9478
9479 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9480 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9481
9482 #undef TARGET_LRA_P
9483 #define TARGET_LRA_P aarch64_lra_p
9484
9485 #undef TARGET_MANGLE_TYPE
9486 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9487
9488 #undef TARGET_MEMORY_MOVE_COST
9489 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9490
9491 #undef TARGET_MUST_PASS_IN_STACK
9492 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9493
9494 /* This target hook should return true if accesses to volatile bitfields
9495    should use the narrowest mode possible.  It should return false if these
9496    accesses should use the bitfield container type.  */
9497 #undef TARGET_NARROW_VOLATILE_BITFIELD
9498 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9499
9500 #undef  TARGET_OPTION_OVERRIDE
9501 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9502
9503 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9504 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9505   aarch64_override_options_after_change
9506
9507 #undef TARGET_PASS_BY_REFERENCE
9508 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9509
9510 #undef TARGET_PREFERRED_RELOAD_CLASS
9511 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9512
9513 #undef TARGET_SECONDARY_RELOAD
9514 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9515
9516 #undef TARGET_SHIFT_TRUNCATION_MASK
9517 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9518
9519 #undef TARGET_SETUP_INCOMING_VARARGS
9520 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9521
9522 #undef TARGET_STRUCT_VALUE_RTX
9523 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9524
9525 #undef TARGET_REGISTER_MOVE_COST
9526 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9527
9528 #undef TARGET_RETURN_IN_MEMORY
9529 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9530
9531 #undef TARGET_RETURN_IN_MSB
9532 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9533
9534 #undef TARGET_RTX_COSTS
9535 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9536
9537 #undef TARGET_SCHED_ISSUE_RATE
9538 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9539
9540 #undef TARGET_TRAMPOLINE_INIT
9541 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9542
9543 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9544 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9545
9546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9547 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9548
9549 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9550 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9551
9552 #undef TARGET_VECTORIZE_ADD_STMT_COST
9553 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9554
9555 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9556 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9557   aarch64_builtin_vectorization_cost
9558
9559 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9560 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9561
9562 #undef TARGET_VECTORIZE_BUILTINS
9563 #define TARGET_VECTORIZE_BUILTINS
9564
9565 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9566 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9567   aarch64_builtin_vectorized_function
9568
9569 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9570 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9571   aarch64_autovectorize_vector_sizes
9572
9573 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9574 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9575   aarch64_atomic_assign_expand_fenv
9576
9577 /* Section anchor support.  */
9578
9579 #undef TARGET_MIN_ANCHOR_OFFSET
9580 #define TARGET_MIN_ANCHOR_OFFSET -256
9581
9582 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9583    byte offset; we can do much more for larger data types, but have no way
9584    to determine the size of the access.  We assume accesses are aligned.  */
9585 #undef TARGET_MAX_ANCHOR_OFFSET
9586 #define TARGET_MAX_ANCHOR_OFFSET 4095
9587
9588 #undef TARGET_VECTOR_ALIGNMENT
9589 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9590
9591 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9592 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9593   aarch64_simd_vector_alignment_reachable
9594
9595 /* vec_perm support.  */
9596
9597 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9598 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9599   aarch64_vectorize_vec_perm_const_ok
9600
9601
9602 #undef TARGET_FIXED_CONDITION_CODE_REGS
9603 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9604
9605 struct gcc_target targetm = TARGET_INITIALIZER;
9606
9607 #include "gt-aarch64.h"