gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "hash-table.h"
  50 #include "vec.h"
  51 #include "basic-block.h"
  52 #include "tree-ssa-alias.h"
  53 #include "internal-fn.h"
  54 #include "gimple-fold.h"
  55 #include "tree-eh.h"
  56 #include "gimple-expr.h"
  57 #include "is-a.h"
  58 #include "gimple.h"
  59 #include "gimplify.h"
  60 #include "optabs.h"
  61 #include "dwarf2.h"
  62 #include "cfgloop.h"
  63 #include "tree-vectorizer.h"
  64 #include "config/arm/aarch-cost-tables.h"
  65 #include "dumpfile.h"
  66 #include "builtins.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 141                                                  const unsigned char *sel);
 142 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 143
 144 /* The processor for which instructions should be scheduled.  */
 145 enum aarch64_processor aarch64_tune = cortexa53;
 146
 147 /* The current tuning set.  */
 148 const struct tune_params *aarch64_tune_params;
 149
 150 /* Mask to specify which instructions we are allowed to generate.  */
 151 unsigned long aarch64_isa_flags = 0;
 152
 153 /* Mask to specify which instruction scheduling options should be used.  */
 154 unsigned long aarch64_tune_flags = 0;
 155
 156 /* Tuning parameters.  */
 157
 158 #if HAVE_DESIGNATED_INITIALIZERS
 159 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 160 #else
 161 #define NAMED_PARAM(NAME, VAL) (VAL)
 162 #endif
 163
 164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 165 __extension__
 166 #endif
 167
 168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 169 __extension__
 170 #endif
 171 static const struct cpu_addrcost_table generic_addrcost_table =
 172 {
 173 #if HAVE_DESIGNATED_INITIALIZERS
 174   .addr_scale_costs =
 175 #endif
 176     {
 177       NAMED_PARAM (hi, 0),
 178       NAMED_PARAM (si, 0),
 179       NAMED_PARAM (di, 0),
 180       NAMED_PARAM (ti, 0),
 181     },
 182   NAMED_PARAM (pre_modify, 0),
 183   NAMED_PARAM (post_modify, 0),
 184   NAMED_PARAM (register_offset, 0),
 185   NAMED_PARAM (register_extend, 0),
 186   NAMED_PARAM (imm_offset, 0)
 187 };
 188
 189 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 190 __extension__
 191 #endif
 192 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 193 {
 194 #if HAVE_DESIGNATED_INITIALIZERS
 195   .addr_scale_costs =
 196 #endif
 197     {
 198       NAMED_PARAM (hi, 1),
 199       NAMED_PARAM (si, 0),
 200       NAMED_PARAM (di, 0),
 201       NAMED_PARAM (ti, 1),
 202     },
 203   NAMED_PARAM (pre_modify, 0),
 204   NAMED_PARAM (post_modify, 0),
 205   NAMED_PARAM (register_offset, 0),
 206   NAMED_PARAM (register_extend, 0),
 207   NAMED_PARAM (imm_offset, 0),
 208 };
 209
 210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 211 __extension__
 212 #endif
 213 static const struct cpu_regmove_cost generic_regmove_cost =
 214 {
 215   NAMED_PARAM (GP2GP, 1),
 216   NAMED_PARAM (GP2FP, 2),
 217   NAMED_PARAM (FP2GP, 2),
 218   NAMED_PARAM (FP2FP, 2)
 219 };
 220
 221 static const struct cpu_regmove_cost cortexa57_regmove_cost =
 222 {
 223   NAMED_PARAM (GP2GP, 1),
 224   /* Avoid the use of slow int<->fp moves for spilling by setting
 225      their cost higher than memmov_cost.  */
 226   NAMED_PARAM (GP2FP, 5),
 227   NAMED_PARAM (FP2GP, 5),
 228   NAMED_PARAM (FP2FP, 2)
 229 };
 230
 231 static const struct cpu_regmove_cost cortexa53_regmove_cost =
 232 {
 233   NAMED_PARAM (GP2GP, 1),
 234   /* Avoid the use of slow int<->fp moves for spilling by setting
 235      their cost higher than memmov_cost.  */
 236   NAMED_PARAM (GP2FP, 5),
 237   NAMED_PARAM (FP2GP, 5),
 238   NAMED_PARAM (FP2FP, 2)
 239 };
 240
 241 /* Generic costs for vector insn classes.  */
 242 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 243 __extension__
 244 #endif
 245 static const struct cpu_vector_cost generic_vector_cost =
 246 {
 247   NAMED_PARAM (scalar_stmt_cost, 1),
 248   NAMED_PARAM (scalar_load_cost, 1),
 249   NAMED_PARAM (scalar_store_cost, 1),
 250   NAMED_PARAM (vec_stmt_cost, 1),
 251   NAMED_PARAM (vec_to_scalar_cost, 1),
 252   NAMED_PARAM (scalar_to_vec_cost, 1),
 253   NAMED_PARAM (vec_align_load_cost, 1),
 254   NAMED_PARAM (vec_unalign_load_cost, 1),
 255   NAMED_PARAM (vec_unalign_store_cost, 1),
 256   NAMED_PARAM (vec_store_cost, 1),
 257   NAMED_PARAM (cond_taken_branch_cost, 3),
 258   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 259 };
 260
 261 /* Generic costs for vector insn classes.  */
 262 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 263 __extension__
 264 #endif
 265 static const struct cpu_vector_cost cortexa57_vector_cost =
 266 {
 267   NAMED_PARAM (scalar_stmt_cost, 1),
 268   NAMED_PARAM (scalar_load_cost, 4),
 269   NAMED_PARAM (scalar_store_cost, 1),
 270   NAMED_PARAM (vec_stmt_cost, 3),
 271   NAMED_PARAM (vec_to_scalar_cost, 8),
 272   NAMED_PARAM (scalar_to_vec_cost, 8),
 273   NAMED_PARAM (vec_align_load_cost, 5),
 274   NAMED_PARAM (vec_unalign_load_cost, 5),
 275   NAMED_PARAM (vec_unalign_store_cost, 1),
 276   NAMED_PARAM (vec_store_cost, 1),
 277   NAMED_PARAM (cond_taken_branch_cost, 1),
 278   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 279 };
 280
 281 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 282 __extension__
 283 #endif
 284 static const struct tune_params generic_tunings =
 285 {
 286   &cortexa57_extra_costs,
 287   &generic_addrcost_table,
 288   &generic_regmove_cost,
 289   &generic_vector_cost,
 290   NAMED_PARAM (memmov_cost, 4),
 291   NAMED_PARAM (issue_rate, 2)
 292 };
 293
 294 static const struct tune_params cortexa53_tunings =
 295 {
 296   &cortexa53_extra_costs,
 297   &generic_addrcost_table,
 298   &cortexa53_regmove_cost,
 299   &generic_vector_cost,
 300   NAMED_PARAM (memmov_cost, 4),
 301   NAMED_PARAM (issue_rate, 2)
 302 };
 303
 304 static const struct tune_params cortexa57_tunings =
 305 {
 306   &cortexa57_extra_costs,
 307   &cortexa57_addrcost_table,
 308   &cortexa57_regmove_cost,
 309   &cortexa57_vector_cost,
 310   NAMED_PARAM (memmov_cost, 4),
 311   NAMED_PARAM (issue_rate, 3)
 312 };
 313
 314 /* A processor implementing AArch64.  */
 315 struct processor
 316 {
 317   const char *const name;
 318   enum aarch64_processor core;
 319   const char *arch;
 320   const unsigned long flags;
 321   const struct tune_params *const tune;
 322 };
 323
 324 /* Processor cores implementing AArch64.  */
 325 static const struct processor all_cores[] =
 326 {
 327 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 328   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 329 #include "aarch64-cores.def"
 330 #undef AARCH64_CORE
 331   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 332   {NULL, aarch64_none, NULL, 0, NULL}
 333 };
 334
 335 /* Architectures implementing AArch64.  */
 336 static const struct processor all_architectures[] =
 337 {
 338 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 339   {NAME, CORE, #ARCH, FLAGS, NULL},
 340 #include "aarch64-arches.def"
 341 #undef AARCH64_ARCH
 342   {NULL, aarch64_none, NULL, 0, NULL}
 343 };
 344
 345 /* Target specification.  These are populated as commandline arguments
 346    are processed, or NULL if not specified.  */
 347 static const struct processor *selected_arch;
 348 static const struct processor *selected_cpu;
 349 static const struct processor *selected_tune;
 350
 351 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 352
 353 /* An ISA extension in the co-processor and main instruction set space.  */
 354 struct aarch64_option_extension
 355 {
 356   const char *const name;
 357   const unsigned long flags_on;
 358   const unsigned long flags_off;
 359 };
 360
 361 /* ISA extensions in AArch64.  */
 362 static const struct aarch64_option_extension all_extensions[] =
 363 {
 364 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 365   {NAME, FLAGS_ON, FLAGS_OFF},
 366 #include "aarch64-option-extensions.def"
 367 #undef AARCH64_OPT_EXTENSION
 368   {NULL, 0, 0}
 369 };
 370
 371 /* Used to track the size of an address when generating a pre/post
 372    increment address.  */
 373 static enum machine_mode aarch64_memory_reference_mode;
 374
 375 /* Used to force GTY into this file.  */
 376 static GTY(()) int gty_dummy;
 377
 378 /* A table of valid AArch64 "bitmask immediate" values for
 379    logical instructions.  */
 380
 381 #define AARCH64_NUM_BITMASKS  5334
 382 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 383
 384 typedef enum aarch64_cond_code
 385 {
 386   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 387   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 388   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 389 }
 390 aarch64_cc;
 391
 392 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 393
 394 /* The condition codes of the processor, and the inverse function.  */
 395 static const char * const aarch64_condition_codes[] =
 396 {
 397   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 398   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 399 };
 400
 401 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 402 unsigned
 403 aarch64_dbx_register_number (unsigned regno)
 404 {
 405    if (GP_REGNUM_P (regno))
 406      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 407    else if (regno == SP_REGNUM)
 408      return AARCH64_DWARF_SP;
 409    else if (FP_REGNUM_P (regno))
 410      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 411
 412    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 413       equivalent DWARF register.  */
 414    return DWARF_FRAME_REGISTERS;
 415 }
 416
 417 /* Return TRUE if MODE is any of the large INT modes.  */
 418 static bool
 419 aarch64_vect_struct_mode_p (enum machine_mode mode)
 420 {
 421   return mode == OImode || mode == CImode || mode == XImode;
 422 }
 423
 424 /* Return TRUE if MODE is any of the vector modes.  */
 425 static bool
 426 aarch64_vector_mode_p (enum machine_mode mode)
 427 {
 428   return aarch64_vector_mode_supported_p (mode)
 429          || aarch64_vect_struct_mode_p (mode);
 430 }
 431
 432 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 433 static bool
 434 aarch64_array_mode_supported_p (enum machine_mode mode,
 435                                 unsigned HOST_WIDE_INT nelems)
 436 {
 437   if (TARGET_SIMD
 438       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 439       && (nelems >= 2 && nelems <= 4))
 440     return true;
 441
 442   return false;
 443 }
 444
 445 /* Implement HARD_REGNO_NREGS.  */
 446
 447 int
 448 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 449 {
 450   switch (aarch64_regno_regclass (regno))
 451     {
 452     case FP_REGS:
 453     case FP_LO_REGS:
 454       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 455     default:
 456       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 457     }
 458   gcc_unreachable ();
 459 }
 460
 461 /* Implement HARD_REGNO_MODE_OK.  */
 462
 463 int
 464 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 465 {
 466   if (GET_MODE_CLASS (mode) == MODE_CC)
 467     return regno == CC_REGNUM;
 468
 469   if (regno == SP_REGNUM)
 470     /* The purpose of comparing with ptr_mode is to support the
 471        global register variable associated with the stack pointer
 472        register via the syntax of asm ("wsp") in ILP32.  */
 473     return mode == Pmode || mode == ptr_mode;
 474
 475   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 476     return mode == Pmode;
 477
 478   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 479     return 1;
 480
 481   if (FP_REGNUM_P (regno))
 482     {
 483       if (aarch64_vect_struct_mode_p (mode))
 484         return
 485           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 486       else
 487         return 1;
 488     }
 489
 490   return 0;
 491 }
 492
 493 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 494 enum machine_mode
 495 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 496                                      enum machine_mode mode)
 497 {
 498   /* Handle modes that fit within single registers.  */
 499   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 500     {
 501       if (GET_MODE_SIZE (mode) >= 4)
 502         return mode;
 503       else
 504         return SImode;
 505     }
 506   /* Fall back to generic for multi-reg and very large modes.  */
 507   else
 508     return choose_hard_reg_mode (regno, nregs, false);
 509 }
 510
 511 /* Return true if calls to DECL should be treated as
 512    long-calls (ie called via a register).  */
 513 static bool
 514 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 515 {
 516   return false;
 517 }
 518
 519 /* Return true if calls to symbol-ref SYM should be treated as
 520    long-calls (ie called via a register).  */
 521 bool
 522 aarch64_is_long_call_p (rtx sym)
 523 {
 524   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 525 }
 526
 527 /* Return true if the offsets to a zero/sign-extract operation
 528    represent an expression that matches an extend operation.  The
 529    operands represent the paramters from
 530
 531    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 532 bool
 533 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 534                                 rtx extract_imm)
 535 {
 536   HOST_WIDE_INT mult_val, extract_val;
 537
 538   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 539     return false;
 540
 541   mult_val = INTVAL (mult_imm);
 542   extract_val = INTVAL (extract_imm);
 543
 544   if (extract_val > 8
 545       && extract_val < GET_MODE_BITSIZE (mode)
 546       && exact_log2 (extract_val & ~7) > 0
 547       && (extract_val & 7) <= 4
 548       && mult_val == (1 << (extract_val & 7)))
 549     return true;
 550
 551   return false;
 552 }
 553
 554 /* Emit an insn that's a simple single-set.  Both the operands must be
 555    known to be valid.  */
 556 inline static rtx
 557 emit_set_insn (rtx x, rtx y)
 558 {
 559   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 560 }
 561
 562 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 563    return the rtx for register 0 in the proper mode.  */
 564 rtx
 565 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 566 {
 567   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 568   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 569
 570   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 571   return cc_reg;
 572 }
 573
 574 /* Build the SYMBOL_REF for __tls_get_addr.  */
 575
 576 static GTY(()) rtx tls_get_addr_libfunc;
 577
 578 rtx
 579 aarch64_tls_get_addr (void)
 580 {
 581   if (!tls_get_addr_libfunc)
 582     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 583   return tls_get_addr_libfunc;
 584 }
 585
 586 /* Return the TLS model to use for ADDR.  */
 587
 588 static enum tls_model
 589 tls_symbolic_operand_type (rtx addr)
 590 {
 591   enum tls_model tls_kind = TLS_MODEL_NONE;
 592   rtx sym, addend;
 593
 594   if (GET_CODE (addr) == CONST)
 595     {
 596       split_const (addr, &sym, &addend);
 597       if (GET_CODE (sym) == SYMBOL_REF)
 598         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 599     }
 600   else if (GET_CODE (addr) == SYMBOL_REF)
 601     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 602
 603   return tls_kind;
 604 }
 605
 606 /* We'll allow lo_sum's in addresses in our legitimate addresses
 607    so that combine would take care of combining addresses where
 608    necessary, but for generation purposes, we'll generate the address
 609    as :
 610    RTL                               Absolute
 611    tmp = hi (symbol_ref);            adrp  x1, foo
 612    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 613                                      nop
 614
 615    PIC                               TLS
 616    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 617    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 618                                      bl   __tls_get_addr
 619                                      nop
 620
 621    Load TLS symbol, depending on TLS mechanism and TLS access model.
 622
 623    Global Dynamic - Traditional TLS:
 624    adrp tmp, :tlsgd:imm
 625    add  dest, tmp, #:tlsgd_lo12:imm
 626    bl   __tls_get_addr
 627
 628    Global Dynamic - TLS Descriptors:
 629    adrp dest, :tlsdesc:imm
 630    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 631    add  dest, dest, #:tlsdesc_lo12:imm
 632    blr  tmp
 633    mrs  tp, tpidr_el0
 634    add  dest, dest, tp
 635
 636    Initial Exec:
 637    mrs  tp, tpidr_el0
 638    adrp tmp, :gottprel:imm
 639    ldr  dest, [tmp, #:gottprel_lo12:imm]
 640    add  dest, dest, tp
 641
 642    Local Exec:
 643    mrs  tp, tpidr_el0
 644    add  t0, tp, #:tprel_hi12:imm
 645    add  t0, #:tprel_lo12_nc:imm
 646 */
 647
 648 static void
 649 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 650                                    enum aarch64_symbol_type type)
 651 {
 652   switch (type)
 653     {
 654     case SYMBOL_SMALL_ABSOLUTE:
 655       {
 656         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 657         rtx tmp_reg = dest;
 658         enum machine_mode mode = GET_MODE (dest);
 659
 660         gcc_assert (mode == Pmode || mode == ptr_mode);
 661
 662         if (can_create_pseudo_p ())
 663           tmp_reg = gen_reg_rtx (mode);
 664
 665         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 666         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 667         return;
 668       }
 669
 670     case SYMBOL_TINY_ABSOLUTE:
 671       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 672       return;
 673
 674     case SYMBOL_SMALL_GOT:
 675       {
 676         /* In ILP32, the mode of dest can be either SImode or DImode,
 677            while the got entry is always of SImode size.  The mode of
 678            dest depends on how dest is used: if dest is assigned to a
 679            pointer (e.g. in the memory), it has SImode; it may have
 680            DImode if dest is dereferenced to access the memeory.
 681            This is why we have to handle three different ldr_got_small
 682            patterns here (two patterns for ILP32).  */
 683         rtx tmp_reg = dest;
 684         enum machine_mode mode = GET_MODE (dest);
 685
 686         if (can_create_pseudo_p ())
 687           tmp_reg = gen_reg_rtx (mode);
 688
 689         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 690         if (mode == ptr_mode)
 691           {
 692             if (mode == DImode)
 693               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 694             else
 695               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 696           }
 697         else
 698           {
 699             gcc_assert (mode == Pmode);
 700             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 701           }
 702
 703         return;
 704       }
 705
 706     case SYMBOL_SMALL_TLSGD:
 707       {
 708         rtx_insn *insns;
 709         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 710
 711         start_sequence ();
 712         aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
 713         insns = get_insns ();
 714         end_sequence ();
 715
 716         RTL_CONST_CALL_P (insns) = 1;
 717         emit_libcall_block (insns, dest, result, imm);
 718         return;
 719       }
 720
 721     case SYMBOL_SMALL_TLSDESC:
 722       {
 723         enum machine_mode mode = GET_MODE (dest);
 724         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 725         rtx tp;
 726
 727         gcc_assert (mode == Pmode || mode == ptr_mode);
 728
 729         /* In ILP32, the got entry is always of SImode size.  Unlike
 730            small GOT, the dest is fixed at reg 0.  */
 731         if (TARGET_ILP32)
 732           emit_insn (gen_tlsdesc_small_si (imm));
 733         else
 734           emit_insn (gen_tlsdesc_small_di (imm));
 735         tp = aarch64_load_tp (NULL);
 736
 737         if (mode != Pmode)
 738           tp = gen_lowpart (mode, tp);
 739
 740         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 741         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 742         return;
 743       }
 744
 745     case SYMBOL_SMALL_GOTTPREL:
 746       {
 747         /* In ILP32, the mode of dest can be either SImode or DImode,
 748            while the got entry is always of SImode size.  The mode of
 749            dest depends on how dest is used: if dest is assigned to a
 750            pointer (e.g. in the memory), it has SImode; it may have
 751            DImode if dest is dereferenced to access the memeory.
 752            This is why we have to handle three different tlsie_small
 753            patterns here (two patterns for ILP32).  */
 754         enum machine_mode mode = GET_MODE (dest);
 755         rtx tmp_reg = gen_reg_rtx (mode);
 756         rtx tp = aarch64_load_tp (NULL);
 757
 758         if (mode == ptr_mode)
 759           {
 760             if (mode == DImode)
 761               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 762             else
 763               {
 764                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 765                 tp = gen_lowpart (mode, tp);
 766               }
 767           }
 768         else
 769           {
 770             gcc_assert (mode == Pmode);
 771             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 772           }
 773
 774         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 775         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 776         return;
 777       }
 778
 779     case SYMBOL_SMALL_TPREL:
 780       {
 781         rtx tp = aarch64_load_tp (NULL);
 782         emit_insn (gen_tlsle_small (dest, tp, imm));
 783         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 784         return;
 785       }
 786
 787     case SYMBOL_TINY_GOT:
 788       emit_insn (gen_ldr_got_tiny (dest, imm));
 789       return;
 790
 791     default:
 792       gcc_unreachable ();
 793     }
 794 }
 795
 796 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 797    handle all moves if !can_create_pseudo_p ().  The distinction is
 798    important because, unlike emit_move_insn, the move expanders know
 799    how to force Pmode objects into the constant pool even when the
 800    constant pool address is not itself legitimate.  */
 801 static rtx
 802 aarch64_emit_move (rtx dest, rtx src)
 803 {
 804   return (can_create_pseudo_p ()
 805           ? emit_move_insn (dest, src)
 806           : emit_move_insn_1 (dest, src));
 807 }
 808
 809 /* Split a 128-bit move operation into two 64-bit move operations,
 810    taking care to handle partial overlap of register to register
 811    copies.  Special cases are needed when moving between GP regs and
 812    FP regs.  SRC can be a register, constant or memory; DST a register
 813    or memory.  If either operand is memory it must not have any side
 814    effects.  */
 815 void
 816 aarch64_split_128bit_move (rtx dst, rtx src)
 817 {
 818   rtx dst_lo, dst_hi;
 819   rtx src_lo, src_hi;
 820
 821   enum machine_mode mode = GET_MODE (dst);
 822
 823   gcc_assert (mode == TImode || mode == TFmode);
 824   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 825   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 826
 827   if (REG_P (dst) && REG_P (src))
 828     {
 829       int src_regno = REGNO (src);
 830       int dst_regno = REGNO (dst);
 831
 832       /* Handle FP <-> GP regs.  */
 833       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 834         {
 835           src_lo = gen_lowpart (word_mode, src);
 836           src_hi = gen_highpart (word_mode, src);
 837
 838           if (mode == TImode)
 839             {
 840               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 841               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 842             }
 843           else
 844             {
 845               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 846               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 847             }
 848           return;
 849         }
 850       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 851         {
 852           dst_lo = gen_lowpart (word_mode, dst);
 853           dst_hi = gen_highpart (word_mode, dst);
 854
 855           if (mode == TImode)
 856             {
 857               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 858               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 859             }
 860           else
 861             {
 862               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 863               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 864             }
 865           return;
 866         }
 867     }
 868
 869   dst_lo = gen_lowpart (word_mode, dst);
 870   dst_hi = gen_highpart (word_mode, dst);
 871   src_lo = gen_lowpart (word_mode, src);
 872   src_hi = gen_highpart_mode (word_mode, mode, src);
 873
 874   /* At most one pairing may overlap.  */
 875   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 876     {
 877       aarch64_emit_move (dst_hi, src_hi);
 878       aarch64_emit_move (dst_lo, src_lo);
 879     }
 880   else
 881     {
 882       aarch64_emit_move (dst_lo, src_lo);
 883       aarch64_emit_move (dst_hi, src_hi);
 884     }
 885 }
 886
 887 bool
 888 aarch64_split_128bit_move_p (rtx dst, rtx src)
 889 {
 890   return (! REG_P (src)
 891           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 892 }
 893
 894 /* Split a complex SIMD combine.  */
 895
 896 void
 897 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 898 {
 899   enum machine_mode src_mode = GET_MODE (src1);
 900   enum machine_mode dst_mode = GET_MODE (dst);
 901
 902   gcc_assert (VECTOR_MODE_P (dst_mode));
 903
 904   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 905     {
 906       rtx (*gen) (rtx, rtx, rtx);
 907
 908       switch (src_mode)
 909         {
 910         case V8QImode:
 911           gen = gen_aarch64_simd_combinev8qi;
 912           break;
 913         case V4HImode:
 914           gen = gen_aarch64_simd_combinev4hi;
 915           break;
 916         case V2SImode:
 917           gen = gen_aarch64_simd_combinev2si;
 918           break;
 919         case V2SFmode:
 920           gen = gen_aarch64_simd_combinev2sf;
 921           break;
 922         case DImode:
 923           gen = gen_aarch64_simd_combinedi;
 924           break;
 925         case DFmode:
 926           gen = gen_aarch64_simd_combinedf;
 927           break;
 928         default:
 929           gcc_unreachable ();
 930         }
 931
 932       emit_insn (gen (dst, src1, src2));
 933       return;
 934     }
 935 }
 936
 937 /* Split a complex SIMD move.  */
 938
 939 void
 940 aarch64_split_simd_move (rtx dst, rtx src)
 941 {
 942   enum machine_mode src_mode = GET_MODE (src);
 943   enum machine_mode dst_mode = GET_MODE (dst);
 944
 945   gcc_assert (VECTOR_MODE_P (dst_mode));
 946
 947   if (REG_P (dst) && REG_P (src))
 948     {
 949       rtx (*gen) (rtx, rtx);
 950
 951       gcc_assert (VECTOR_MODE_P (src_mode));
 952
 953       switch (src_mode)
 954         {
 955         case V16QImode:
 956           gen = gen_aarch64_split_simd_movv16qi;
 957           break;
 958         case V8HImode:
 959           gen = gen_aarch64_split_simd_movv8hi;
 960           break;
 961         case V4SImode:
 962           gen = gen_aarch64_split_simd_movv4si;
 963           break;
 964         case V2DImode:
 965           gen = gen_aarch64_split_simd_movv2di;
 966           break;
 967         case V4SFmode:
 968           gen = gen_aarch64_split_simd_movv4sf;
 969           break;
 970         case V2DFmode:
 971           gen = gen_aarch64_split_simd_movv2df;
 972           break;
 973         default:
 974           gcc_unreachable ();
 975         }
 976
 977       emit_insn (gen (dst, src));
 978       return;
 979     }
 980 }
 981
 982 static rtx
 983 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 984 {
 985   if (can_create_pseudo_p ())
 986     return force_reg (mode, value);
 987   else
 988     {
 989       x = aarch64_emit_move (x, value);
 990       return x;
 991     }
 992 }
 993
 994
 995 static rtx
 996 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 997 {
 998   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 999     {
1000       rtx high;
1001       /* Load the full offset into a register.  This
1002          might be improvable in the future.  */
1003       high = GEN_INT (offset);
1004       offset = 0;
1005       high = aarch64_force_temporary (mode, temp, high);
1006       reg = aarch64_force_temporary (mode, temp,
1007                                      gen_rtx_PLUS (mode, high, reg));
1008     }
1009   return plus_constant (mode, reg, offset);
1010 }
1011
1012 void
1013 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1014 {
1015   enum machine_mode mode = GET_MODE (dest);
1016   unsigned HOST_WIDE_INT mask;
1017   int i;
1018   bool first;
1019   unsigned HOST_WIDE_INT val;
1020   bool subtargets;
1021   rtx subtarget;
1022   int one_match, zero_match, first_not_ffff_match;
1023
1024   gcc_assert (mode == SImode || mode == DImode);
1025
1026   /* Check on what type of symbol it is.  */
1027   if (GET_CODE (imm) == SYMBOL_REF
1028       || GET_CODE (imm) == LABEL_REF
1029       || GET_CODE (imm) == CONST)
1030     {
1031       rtx mem, base, offset;
1032       enum aarch64_symbol_type sty;
1033
1034       /* If we have (const (plus symbol offset)), separate out the offset
1035          before we start classifying the symbol.  */
1036       split_const (imm, &base, &offset);
1037
1038       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1039       switch (sty)
1040         {
1041         case SYMBOL_FORCE_TO_MEM:
1042           if (offset != const0_rtx
1043               && targetm.cannot_force_const_mem (mode, imm))
1044             {
1045               gcc_assert (can_create_pseudo_p ());
1046               base = aarch64_force_temporary (mode, dest, base);
1047               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1048               aarch64_emit_move (dest, base);
1049               return;
1050             }
1051           mem = force_const_mem (ptr_mode, imm);
1052           gcc_assert (mem);
1053           if (mode != ptr_mode)
1054             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1055           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1056           return;
1057
1058         case SYMBOL_SMALL_TLSGD:
1059         case SYMBOL_SMALL_TLSDESC:
1060         case SYMBOL_SMALL_GOTTPREL:
1061         case SYMBOL_SMALL_GOT:
1062         case SYMBOL_TINY_GOT:
1063           if (offset != const0_rtx)
1064             {
1065               gcc_assert(can_create_pseudo_p ());
1066               base = aarch64_force_temporary (mode, dest, base);
1067               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1068               aarch64_emit_move (dest, base);
1069               return;
1070             }
1071           /* FALLTHRU */
1072
1073         case SYMBOL_SMALL_TPREL:
1074         case SYMBOL_SMALL_ABSOLUTE:
1075         case SYMBOL_TINY_ABSOLUTE:
1076           aarch64_load_symref_appropriately (dest, imm, sty);
1077           return;
1078
1079         default:
1080           gcc_unreachable ();
1081         }
1082     }
1083
1084   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1085     {
1086       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1087       return;
1088     }
1089
1090   if (!CONST_INT_P (imm))
1091     {
1092       if (GET_CODE (imm) == HIGH)
1093         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1094       else
1095         {
1096           rtx mem = force_const_mem (mode, imm);
1097           gcc_assert (mem);
1098           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1099         }
1100
1101       return;
1102     }
1103
1104   if (mode == SImode)
1105     {
1106       /* We know we can't do this in 1 insn, and we must be able to do it
1107          in two; so don't mess around looking for sequences that don't buy
1108          us anything.  */
1109       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1110       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1111                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1112       return;
1113     }
1114
1115   /* Remaining cases are all for DImode.  */
1116
1117   val = INTVAL (imm);
1118   subtargets = optimize && can_create_pseudo_p ();
1119
1120   one_match = 0;
1121   zero_match = 0;
1122   mask = 0xffff;
1123   first_not_ffff_match = -1;
1124
1125   for (i = 0; i < 64; i += 16, mask <<= 16)
1126     {
1127       if ((val & mask) == mask)
1128         one_match++;
1129       else
1130         {
1131           if (first_not_ffff_match < 0)
1132             first_not_ffff_match = i;
1133           if ((val & mask) == 0)
1134             zero_match++;
1135         }
1136     }
1137
1138   if (one_match == 2)
1139     {
1140       /* Set one of the quarters and then insert back into result.  */
1141       mask = 0xffffll << first_not_ffff_match;
1142       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1143       emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1144                                  GEN_INT ((val >> first_not_ffff_match)
1145                                           & 0xffff)));
1146       return;
1147     }
1148
1149   if (zero_match == 2)
1150     goto simple_sequence;
1151
1152   mask = 0x0ffff0000UL;
1153   for (i = 16; i < 64; i += 16, mask <<= 16)
1154     {
1155       HOST_WIDE_INT comp = mask & ~(mask - 1);
1156
1157       if (aarch64_uimm12_shift (val - (val & mask)))
1158         {
1159           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1160
1161           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1162           emit_insn (gen_adddi3 (dest, subtarget,
1163                                  GEN_INT (val - (val & mask))));
1164           return;
1165         }
1166       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1167         {
1168           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1169
1170           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1171                                   GEN_INT ((val + comp) & mask)));
1172           emit_insn (gen_adddi3 (dest, subtarget,
1173                                  GEN_INT (val - ((val + comp) & mask))));
1174           return;
1175         }
1176       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1177         {
1178           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1179
1180           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1181                                   GEN_INT ((val - comp) | ~mask)));
1182           emit_insn (gen_adddi3 (dest, subtarget,
1183                                  GEN_INT (val - ((val - comp) | ~mask))));
1184           return;
1185         }
1186       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1187         {
1188           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1189
1190           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1191                                   GEN_INT (val | ~mask)));
1192           emit_insn (gen_adddi3 (dest, subtarget,
1193                                  GEN_INT (val - (val | ~mask))));
1194           return;
1195         }
1196     }
1197
1198   /* See if we can do it by arithmetically combining two
1199      immediates.  */
1200   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1201     {
1202       int j;
1203       mask = 0xffff;
1204
1205       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1206           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1207         {
1208           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1209           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1210                                   GEN_INT (aarch64_bitmasks[i])));
1211           emit_insn (gen_adddi3 (dest, subtarget,
1212                                  GEN_INT (val - aarch64_bitmasks[i])));
1213           return;
1214         }
1215
1216       for (j = 0; j < 64; j += 16, mask <<= 16)
1217         {
1218           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1219             {
1220               emit_insn (gen_rtx_SET (VOIDmode, dest,
1221                                       GEN_INT (aarch64_bitmasks[i])));
1222               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1223                                          GEN_INT ((val >> j) & 0xffff)));
1224               return;
1225             }
1226         }
1227     }
1228
1229   /* See if we can do it by logically combining two immediates.  */
1230   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1231     {
1232       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1233         {
1234           int j;
1235
1236           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1238               {
1239                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1240                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241                                         GEN_INT (aarch64_bitmasks[i])));
1242                 emit_insn (gen_iordi3 (dest, subtarget,
1243                                        GEN_INT (aarch64_bitmasks[j])));
1244                 return;
1245               }
1246         }
1247       else if ((val & aarch64_bitmasks[i]) == val)
1248         {
1249           int j;
1250
1251           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1252             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1253               {
1254
1255                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1256                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1257                                         GEN_INT (aarch64_bitmasks[j])));
1258                 emit_insn (gen_anddi3 (dest, subtarget,
1259                                        GEN_INT (aarch64_bitmasks[i])));
1260                 return;
1261               }
1262         }
1263     }
1264
1265   if (one_match > zero_match)
1266     {
1267       /* Set either first three quarters or all but the third.   */
1268       mask = 0xffffll << (16 - first_not_ffff_match);
1269       emit_insn (gen_rtx_SET (VOIDmode, dest,
1270                               GEN_INT (val | mask | 0xffffffff00000000ull)));
1271
1272       /* Now insert other two quarters.  */
1273       for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1274            i < 64; i += 16, mask <<= 16)
1275         {
1276           if ((val & mask) != mask)
1277             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1278                                        GEN_INT ((val >> i) & 0xffff)));
1279         }
1280       return;
1281     }
1282
1283  simple_sequence:
1284   first = true;
1285   mask = 0xffff;
1286   for (i = 0; i < 64; i += 16, mask <<= 16)
1287     {
1288       if ((val & mask) != 0)
1289         {
1290           if (first)
1291             {
1292               emit_insn (gen_rtx_SET (VOIDmode, dest,
1293                                       GEN_INT (val & mask)));
1294               first = false;
1295             }
1296           else
1297             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1298                                        GEN_INT ((val >> i) & 0xffff)));
1299         }
1300     }
1301 }
1302
1303 static bool
1304 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1305                                  tree exp ATTRIBUTE_UNUSED)
1306 {
1307   /* Currently, always true.  */
1308   return true;
1309 }
1310
1311 /* Implement TARGET_PASS_BY_REFERENCE.  */
1312
1313 static bool
1314 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1315                            enum machine_mode mode,
1316                            const_tree type,
1317                            bool named ATTRIBUTE_UNUSED)
1318 {
1319   HOST_WIDE_INT size;
1320   enum machine_mode dummymode;
1321   int nregs;
1322
1323   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1324   size = (mode == BLKmode && type)
1325     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1326
1327   /* Aggregates are passed by reference based on their size.  */
1328   if (type && AGGREGATE_TYPE_P (type))
1329     {
1330       size = int_size_in_bytes (type);
1331     }
1332
1333   /* Variable sized arguments are always returned by reference.  */
1334   if (size < 0)
1335     return true;
1336
1337   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1338   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1339                                                &dummymode, &nregs,
1340                                                NULL))
1341     return false;
1342
1343   /* Arguments which are variable sized or larger than 2 registers are
1344      passed by reference unless they are a homogenous floating point
1345      aggregate.  */
1346   return size > 2 * UNITS_PER_WORD;
1347 }
1348
1349 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1350 static bool
1351 aarch64_return_in_msb (const_tree valtype)
1352 {
1353   enum machine_mode dummy_mode;
1354   int dummy_int;
1355
1356   /* Never happens in little-endian mode.  */
1357   if (!BYTES_BIG_ENDIAN)
1358     return false;
1359
1360   /* Only composite types smaller than or equal to 16 bytes can
1361      be potentially returned in registers.  */
1362   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1363       || int_size_in_bytes (valtype) <= 0
1364       || int_size_in_bytes (valtype) > 16)
1365     return false;
1366
1367   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1368      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1369      is always passed/returned in the least significant bits of fp/simd
1370      register(s).  */
1371   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1372                                                &dummy_mode, &dummy_int, NULL))
1373     return false;
1374
1375   return true;
1376 }
1377
1378 /* Implement TARGET_FUNCTION_VALUE.
1379    Define how to find the value returned by a function.  */
1380
1381 static rtx
1382 aarch64_function_value (const_tree type, const_tree func,
1383                         bool outgoing ATTRIBUTE_UNUSED)
1384 {
1385   enum machine_mode mode;
1386   int unsignedp;
1387   int count;
1388   enum machine_mode ag_mode;
1389
1390   mode = TYPE_MODE (type);
1391   if (INTEGRAL_TYPE_P (type))
1392     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1393
1394   if (aarch64_return_in_msb (type))
1395     {
1396       HOST_WIDE_INT size = int_size_in_bytes (type);
1397
1398       if (size % UNITS_PER_WORD != 0)
1399         {
1400           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1401           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1402         }
1403     }
1404
1405   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1406                                                &ag_mode, &count, NULL))
1407     {
1408       if (!aarch64_composite_type_p (type, mode))
1409         {
1410           gcc_assert (count == 1 && mode == ag_mode);
1411           return gen_rtx_REG (mode, V0_REGNUM);
1412         }
1413       else
1414         {
1415           int i;
1416           rtx par;
1417
1418           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1419           for (i = 0; i < count; i++)
1420             {
1421               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1422               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1423                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1424               XVECEXP (par, 0, i) = tmp;
1425             }
1426           return par;
1427         }
1428     }
1429   else
1430     return gen_rtx_REG (mode, R0_REGNUM);
1431 }
1432
1433 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1434    Return true if REGNO is the number of a hard register in which the values
1435    of called function may come back.  */
1436
1437 static bool
1438 aarch64_function_value_regno_p (const unsigned int regno)
1439 {
1440   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1441      of 16-byte return values are: 128-bit integers and 16-byte small
1442      structures (excluding homogeneous floating-point aggregates).  */
1443   if (regno == R0_REGNUM || regno == R1_REGNUM)
1444     return true;
1445
1446   /* Up to four fp/simd registers can return a function value, e.g. a
1447      homogeneous floating-point aggregate having four members.  */
1448   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1449     return !TARGET_GENERAL_REGS_ONLY;
1450
1451   return false;
1452 }
1453
1454 /* Implement TARGET_RETURN_IN_MEMORY.
1455
1456    If the type T of the result of a function is such that
1457      void func (T arg)
1458    would require that arg be passed as a value in a register (or set of
1459    registers) according to the parameter passing rules, then the result
1460    is returned in the same registers as would be used for such an
1461    argument.  */
1462
1463 static bool
1464 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1465 {
1466   HOST_WIDE_INT size;
1467   enum machine_mode ag_mode;
1468   int count;
1469
1470   if (!AGGREGATE_TYPE_P (type)
1471       && TREE_CODE (type) != COMPLEX_TYPE
1472       && TREE_CODE (type) != VECTOR_TYPE)
1473     /* Simple scalar types always returned in registers.  */
1474     return false;
1475
1476   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1477                                                type,
1478                                                &ag_mode,
1479                                                &count,
1480                                                NULL))
1481     return false;
1482
1483   /* Types larger than 2 registers returned in memory.  */
1484   size = int_size_in_bytes (type);
1485   return (size < 0 || size > 2 * UNITS_PER_WORD);
1486 }
1487
1488 static bool
1489 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1490                                const_tree type, int *nregs)
1491 {
1492   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1493   return aarch64_vfp_is_call_or_return_candidate (mode,
1494                                                   type,
1495                                                   &pcum->aapcs_vfp_rmode,
1496                                                   nregs,
1497                                                   NULL);
1498 }
1499
1500 /* Given MODE and TYPE of a function argument, return the alignment in
1501    bits.  The idea is to suppress any stronger alignment requested by
1502    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1503    This is a helper function for local use only.  */
1504
1505 static unsigned int
1506 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1507 {
1508   unsigned int alignment;
1509
1510   if (type)
1511     {
1512       if (!integer_zerop (TYPE_SIZE (type)))
1513         {
1514           if (TYPE_MODE (type) == mode)
1515             alignment = TYPE_ALIGN (type);
1516           else
1517             alignment = GET_MODE_ALIGNMENT (mode);
1518         }
1519       else
1520         alignment = 0;
1521     }
1522   else
1523     alignment = GET_MODE_ALIGNMENT (mode);
1524
1525   return alignment;
1526 }
1527
1528 /* Layout a function argument according to the AAPCS64 rules.  The rule
1529    numbers refer to the rule numbers in the AAPCS64.  */
1530
1531 static void
1532 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1533                     const_tree type,
1534                     bool named ATTRIBUTE_UNUSED)
1535 {
1536   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537   int ncrn, nvrn, nregs;
1538   bool allocate_ncrn, allocate_nvrn;
1539   HOST_WIDE_INT size;
1540
1541   /* We need to do this once per argument.  */
1542   if (pcum->aapcs_arg_processed)
1543     return;
1544
1545   pcum->aapcs_arg_processed = true;
1546
1547   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1548   size
1549     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1550                         UNITS_PER_WORD);
1551
1552   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1553   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1554                                                  mode,
1555                                                  type,
1556                                                  &nregs);
1557
1558   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1559      The following code thus handles passing by SIMD/FP registers first.  */
1560
1561   nvrn = pcum->aapcs_nvrn;
1562
1563   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1564      and homogenous short-vector aggregates (HVA).  */
1565   if (allocate_nvrn)
1566     {
1567       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1568         {
1569           pcum->aapcs_nextnvrn = nvrn + nregs;
1570           if (!aarch64_composite_type_p (type, mode))
1571             {
1572               gcc_assert (nregs == 1);
1573               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1574             }
1575           else
1576             {
1577               rtx par;
1578               int i;
1579               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1580               for (i = 0; i < nregs; i++)
1581                 {
1582                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1583                                          V0_REGNUM + nvrn + i);
1584                   tmp = gen_rtx_EXPR_LIST
1585                     (VOIDmode, tmp,
1586                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1587                   XVECEXP (par, 0, i) = tmp;
1588                 }
1589               pcum->aapcs_reg = par;
1590             }
1591           return;
1592         }
1593       else
1594         {
1595           /* C.3 NSRN is set to 8.  */
1596           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1597           goto on_stack;
1598         }
1599     }
1600
1601   ncrn = pcum->aapcs_ncrn;
1602   nregs = size / UNITS_PER_WORD;
1603
1604   /* C6 - C9.  though the sign and zero extension semantics are
1605      handled elsewhere.  This is the case where the argument fits
1606      entirely general registers.  */
1607   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1608     {
1609       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1610
1611       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1612
1613       /* C.8 if the argument has an alignment of 16 then the NGRN is
1614          rounded up to the next even number.  */
1615       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1616         {
1617           ++ncrn;
1618           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1619         }
1620       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1621          A reg is still generated for it, but the caller should be smart
1622          enough not to use it.  */
1623       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1624         {
1625           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1626         }
1627       else
1628         {
1629           rtx par;
1630           int i;
1631
1632           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1633           for (i = 0; i < nregs; i++)
1634             {
1635               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1636               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1637                                        GEN_INT (i * UNITS_PER_WORD));
1638               XVECEXP (par, 0, i) = tmp;
1639             }
1640           pcum->aapcs_reg = par;
1641         }
1642
1643       pcum->aapcs_nextncrn = ncrn + nregs;
1644       return;
1645     }
1646
1647   /* C.11  */
1648   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1649
1650   /* The argument is passed on stack; record the needed number of words for
1651      this argument and align the total size if necessary.  */
1652 on_stack:
1653   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1654   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1655     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1656                                                16 / UNITS_PER_WORD);
1657   return;
1658 }
1659
1660 /* Implement TARGET_FUNCTION_ARG.  */
1661
1662 static rtx
1663 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1664                       const_tree type, bool named)
1665 {
1666   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1667   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1668
1669   if (mode == VOIDmode)
1670     return NULL_RTX;
1671
1672   aarch64_layout_arg (pcum_v, mode, type, named);
1673   return pcum->aapcs_reg;
1674 }
1675
1676 void
1677 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1678                            const_tree fntype ATTRIBUTE_UNUSED,
1679                            rtx libname ATTRIBUTE_UNUSED,
1680                            const_tree fndecl ATTRIBUTE_UNUSED,
1681                            unsigned n_named ATTRIBUTE_UNUSED)
1682 {
1683   pcum->aapcs_ncrn = 0;
1684   pcum->aapcs_nvrn = 0;
1685   pcum->aapcs_nextncrn = 0;
1686   pcum->aapcs_nextnvrn = 0;
1687   pcum->pcs_variant = ARM_PCS_AAPCS64;
1688   pcum->aapcs_reg = NULL_RTX;
1689   pcum->aapcs_arg_processed = false;
1690   pcum->aapcs_stack_words = 0;
1691   pcum->aapcs_stack_size = 0;
1692
1693   return;
1694 }
1695
1696 static void
1697 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1698                               enum machine_mode mode,
1699                               const_tree type,
1700                               bool named)
1701 {
1702   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1703   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1704     {
1705       aarch64_layout_arg (pcum_v, mode, type, named);
1706       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1707                   != (pcum->aapcs_stack_words != 0));
1708       pcum->aapcs_arg_processed = false;
1709       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1710       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1711       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1712       pcum->aapcs_stack_words = 0;
1713       pcum->aapcs_reg = NULL_RTX;
1714     }
1715 }
1716
1717 bool
1718 aarch64_function_arg_regno_p (unsigned regno)
1719 {
1720   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1721           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1722 }
1723
1724 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1725    PARM_BOUNDARY bits of alignment, but will be given anything up
1726    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1727    that both before and after the layout of each argument, the Next
1728    Stacked Argument Address (NSAA) will have a minimum alignment of
1729    8 bytes.  */
1730
1731 static unsigned int
1732 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1733 {
1734   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1735
1736   if (alignment < PARM_BOUNDARY)
1737     alignment = PARM_BOUNDARY;
1738   if (alignment > STACK_BOUNDARY)
1739     alignment = STACK_BOUNDARY;
1740   return alignment;
1741 }
1742
1743 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1744
1745    Return true if an argument passed on the stack should be padded upwards,
1746    i.e. if the least-significant byte of the stack slot has useful data.
1747
1748    Small aggregate types are placed in the lowest memory address.
1749
1750    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1751
1752 bool
1753 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1754 {
1755   /* On little-endian targets, the least significant byte of every stack
1756      argument is passed at the lowest byte address of the stack slot.  */
1757   if (!BYTES_BIG_ENDIAN)
1758     return true;
1759
1760   /* Otherwise, integral, floating-point and pointer types are padded downward:
1761      the least significant byte of a stack argument is passed at the highest
1762      byte address of the stack slot.  */
1763   if (type
1764       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1765          || POINTER_TYPE_P (type))
1766       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1767     return false;
1768
1769   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1770   return true;
1771 }
1772
1773 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1774
1775    It specifies padding for the last (may also be the only)
1776    element of a block move between registers and memory.  If
1777    assuming the block is in the memory, padding upward means that
1778    the last element is padded after its highest significant byte,
1779    while in downward padding, the last element is padded at the
1780    its least significant byte side.
1781
1782    Small aggregates and small complex types are always padded
1783    upwards.
1784
1785    We don't need to worry about homogeneous floating-point or
1786    short-vector aggregates; their move is not affected by the
1787    padding direction determined here.  Regardless of endianness,
1788    each element of such an aggregate is put in the least
1789    significant bits of a fp/simd register.
1790
1791    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1792    register has useful data, and return the opposite if the most
1793    significant byte does.  */
1794
1795 bool
1796 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1797                      bool first ATTRIBUTE_UNUSED)
1798 {
1799
1800   /* Small composite types are always padded upward.  */
1801   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1802     {
1803       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1804                             : GET_MODE_SIZE (mode));
1805       if (size < 2 * UNITS_PER_WORD)
1806         return true;
1807     }
1808
1809   /* Otherwise, use the default padding.  */
1810   return !BYTES_BIG_ENDIAN;
1811 }
1812
1813 static enum machine_mode
1814 aarch64_libgcc_cmp_return_mode (void)
1815 {
1816   return SImode;
1817 }
1818
1819 static bool
1820 aarch64_frame_pointer_required (void)
1821 {
1822   /* In aarch64_override_options_after_change
1823      flag_omit_leaf_frame_pointer turns off the frame pointer by
1824      default.  Turn it back on now if we've not got a leaf
1825      function.  */
1826   if (flag_omit_leaf_frame_pointer
1827       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1828     return true;
1829
1830   return false;
1831 }
1832
1833 /* Mark the registers that need to be saved by the callee and calculate
1834    the size of the callee-saved registers area and frame record (both FP
1835    and LR may be omitted).  */
1836 static void
1837 aarch64_layout_frame (void)
1838 {
1839   HOST_WIDE_INT offset = 0;
1840   int regno;
1841
1842   if (reload_completed && cfun->machine->frame.laid_out)
1843     return;
1844
1845 #define SLOT_NOT_REQUIRED (-2)
1846 #define SLOT_REQUIRED     (-1)
1847
1848   cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1849   cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1850
1851   /* First mark all the registers that really need to be saved...  */
1852   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1853     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1854
1855   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1856     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1857
1858   /* ... that includes the eh data registers (if needed)...  */
1859   if (crtl->calls_eh_return)
1860     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1861       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1862         = SLOT_REQUIRED;
1863
1864   /* ... and any callee saved register that dataflow says is live.  */
1865   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1866     if (df_regs_ever_live_p (regno)
1867         && !call_used_regs[regno])
1868       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1869
1870   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1871     if (df_regs_ever_live_p (regno)
1872         && !call_used_regs[regno])
1873       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1874
1875   if (frame_pointer_needed)
1876     {
1877       /* FP and LR are placed in the linkage record.  */
1878       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1879       cfun->machine->frame.wb_candidate1 = R29_REGNUM;
1880       cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1881       cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1882       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1883       offset += 2 * UNITS_PER_WORD;
1884     }
1885
1886   /* Now assign stack slots for them.  */
1887   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1888     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1889       {
1890         cfun->machine->frame.reg_offset[regno] = offset;
1891         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1892           cfun->machine->frame.wb_candidate1 = regno;
1893         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1894           cfun->machine->frame.wb_candidate2 = regno;
1895         offset += UNITS_PER_WORD;
1896       }
1897
1898   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1899     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1900       {
1901         cfun->machine->frame.reg_offset[regno] = offset;
1902         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1903           cfun->machine->frame.wb_candidate1 = regno;
1904         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1905                  && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1906           cfun->machine->frame.wb_candidate2 = regno;
1907         offset += UNITS_PER_WORD;
1908       }
1909
1910   cfun->machine->frame.padding0 =
1911     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1912   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1913
1914   cfun->machine->frame.saved_regs_size = offset;
1915
1916   cfun->machine->frame.hard_fp_offset
1917     = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1918                         + get_frame_size ()
1919                         + cfun->machine->frame.saved_regs_size,
1920                         STACK_BOUNDARY / BITS_PER_UNIT);
1921
1922   cfun->machine->frame.frame_size
1923     = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1924                         + crtl->outgoing_args_size,
1925                         STACK_BOUNDARY / BITS_PER_UNIT);
1926
1927   cfun->machine->frame.laid_out = true;
1928 }
1929
1930 static bool
1931 aarch64_register_saved_on_entry (int regno)
1932 {
1933   return cfun->machine->frame.reg_offset[regno] >= 0;
1934 }
1935
1936 static unsigned
1937 aarch64_next_callee_save (unsigned regno, unsigned limit)
1938 {
1939   while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1940     regno ++;
1941   return regno;
1942 }
1943
1944 static void
1945 aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1946                            HOST_WIDE_INT adjustment)
1947  {
1948   rtx base_rtx = stack_pointer_rtx;
1949   rtx insn, reg, mem;
1950
1951   reg = gen_rtx_REG (mode, regno);
1952   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1953                             plus_constant (Pmode, base_rtx, -adjustment));
1954   mem = gen_rtx_MEM (mode, mem);
1955
1956   insn = emit_move_insn (mem, reg);
1957   RTX_FRAME_RELATED_P (insn) = 1;
1958 }
1959
1960 static rtx
1961 aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1962                           HOST_WIDE_INT adjustment)
1963 {
1964   switch (mode)
1965     {
1966     case DImode:
1967       return gen_storewb_pairdi_di (base, base, reg, reg2,
1968                                     GEN_INT (-adjustment),
1969                                     GEN_INT (UNITS_PER_WORD - adjustment));
1970     case DFmode:
1971       return gen_storewb_pairdf_di (base, base, reg, reg2,
1972                                     GEN_INT (-adjustment),
1973                                     GEN_INT (UNITS_PER_WORD - adjustment));
1974     default:
1975       gcc_unreachable ();
1976     }
1977 }
1978
1979 static void
1980 aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1981                          unsigned regno2, HOST_WIDE_INT adjustment)
1982 {
1983   rtx_insn *insn;
1984   rtx reg1 = gen_rtx_REG (mode, regno1);
1985   rtx reg2 = gen_rtx_REG (mode, regno2);
1986
1987   insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1988                                               reg2, adjustment));
1989   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1990   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1991   RTX_FRAME_RELATED_P (insn) = 1;
1992 }
1993
1994 static rtx
1995 aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1996                          HOST_WIDE_INT adjustment)
1997 {
1998   switch (mode)
1999     {
2000     case DImode:
2001       return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2002                                    GEN_INT (UNITS_PER_WORD));
2003     case DFmode:
2004       return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2005                                    GEN_INT (UNITS_PER_WORD));
2006     default:
2007       gcc_unreachable ();
2008     }
2009 }
2010
2011 static rtx
2012 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2013                         rtx reg2)
2014 {
2015   switch (mode)
2016     {
2017     case DImode:
2018       return gen_store_pairdi (mem1, reg1, mem2, reg2);
2019
2020     case DFmode:
2021       return gen_store_pairdf (mem1, reg1, mem2, reg2);
2022
2023     default:
2024       gcc_unreachable ();
2025     }
2026 }
2027
2028 static rtx
2029 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2030                        rtx mem2)
2031 {
2032   switch (mode)
2033     {
2034     case DImode:
2035       return gen_load_pairdi (reg1, mem1, reg2, mem2);
2036
2037     case DFmode:
2038       return gen_load_pairdf (reg1, mem1, reg2, mem2);
2039
2040     default:
2041       gcc_unreachable ();
2042     }
2043 }
2044
2045
2046 static void
2047 aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
2048                            unsigned start, unsigned limit, bool skip_wb)
2049 {
2050   rtx_insn *insn;
2051   rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2052                                                  ? gen_frame_mem : gen_rtx_MEM);
2053   unsigned regno;
2054   unsigned regno2;
2055
2056   for (regno = aarch64_next_callee_save (start, limit);
2057        regno <= limit;
2058        regno = aarch64_next_callee_save (regno + 1, limit))
2059     {
2060       rtx reg, mem;
2061       HOST_WIDE_INT offset;
2062
2063       if (skip_wb
2064           && (regno == cfun->machine->frame.wb_candidate1
2065               || regno == cfun->machine->frame.wb_candidate2))
2066         continue;
2067
2068       reg = gen_rtx_REG (mode, regno);
2069       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2070       mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2071                                               offset));
2072
2073       regno2 = aarch64_next_callee_save (regno + 1, limit);
2074
2075       if (regno2 <= limit
2076           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2077               == cfun->machine->frame.reg_offset[regno2]))
2078
2079         {
2080           rtx reg2 = gen_rtx_REG (mode, regno2);
2081           rtx mem2;
2082
2083           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2084           mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2085                                                    offset));
2086           insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2087                                                     reg2));
2088
2089           /* The first part of a frame-related parallel insn is
2090              always assumed to be relevant to the frame
2091              calculations; subsequent parts, are only
2092              frame-related if explicitly marked.  */
2093           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094           regno = regno2;
2095         }
2096       else
2097         insn = emit_move_insn (mem, reg);
2098
2099       RTX_FRAME_RELATED_P (insn) = 1;
2100     }
2101 }
2102
2103 static void
2104 aarch64_restore_callee_saves (enum machine_mode mode,
2105                               HOST_WIDE_INT start_offset, unsigned start,
2106                               unsigned limit, bool skip_wb, rtx *cfi_ops)
2107 {
2108   rtx base_rtx = stack_pointer_rtx;
2109   rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2110                                                  ? gen_frame_mem : gen_rtx_MEM);
2111   unsigned regno;
2112   unsigned regno2;
2113   HOST_WIDE_INT offset;
2114
2115   for (regno = aarch64_next_callee_save (start, limit);
2116        regno <= limit;
2117        regno = aarch64_next_callee_save (regno + 1, limit))
2118     {
2119       rtx reg, mem;
2120
2121       if (skip_wb
2122           && (regno == cfun->machine->frame.wb_candidate1
2123               || regno == cfun->machine->frame.wb_candidate2))
2124         continue;
2125
2126       reg = gen_rtx_REG (mode, regno);
2127       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2128       mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2129
2130       regno2 = aarch64_next_callee_save (regno + 1, limit);
2131
2132       if (regno2 <= limit
2133           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2134               == cfun->machine->frame.reg_offset[regno2]))
2135         {
2136           rtx reg2 = gen_rtx_REG (mode, regno2);
2137           rtx mem2;
2138
2139           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2140           mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2141           emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2142
2143           *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2144           regno = regno2;
2145         }
2146       else
2147         emit_move_insn (reg, mem);
2148       *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2149     }
2150 }
2151
2152 /* AArch64 stack frames generated by this compiler look like:
2153
2154         +-------------------------------+
2155         |                               |
2156         |  incoming stack arguments     |
2157         |                               |
2158         +-------------------------------+
2159         |                               | <-- incoming stack pointer (aligned)
2160         |  callee-allocated save area   |
2161         |  for register varargs         |
2162         |                               |
2163         +-------------------------------+
2164         |  local variables              | <-- frame_pointer_rtx
2165         |                               |
2166         +-------------------------------+
2167         |  padding0                     | \
2168         +-------------------------------+  |
2169         |  callee-saved registers       |  | frame.saved_regs_size
2170         +-------------------------------+  |
2171         |  LR'                          |  |
2172         +-------------------------------+  |
2173         |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
2174         +-------------------------------+
2175         |  dynamic allocation           |
2176         +-------------------------------+
2177         |  padding                      |
2178         +-------------------------------+
2179         |  outgoing stack arguments     | <-- arg_pointer
2180         |                               |
2181         +-------------------------------+
2182         |                               | <-- stack_pointer_rtx (aligned)
2183
2184    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2185    but leave frame_pointer_rtx and hard_frame_pointer_rtx
2186    unchanged.  */
2187
2188 /* Generate the prologue instructions for entry into a function.
2189    Establish the stack frame by decreasing the stack pointer with a
2190    properly calculated size and, if necessary, create a frame record
2191    filled with the values of LR and previous frame pointer.  The
2192    current FP is also set up if it is in use.  */
2193
2194 void
2195 aarch64_expand_prologue (void)
2196 {
2197   /* sub sp, sp, #<frame_size>
2198      stp {fp, lr}, [sp, #<frame_size> - 16]
2199      add fp, sp, #<frame_size> - hardfp_offset
2200      stp {cs_reg}, [fp, #-16] etc.
2201
2202      sub sp, sp, <final_adjustment_if_any>
2203   */
2204   HOST_WIDE_INT frame_size, offset;
2205   HOST_WIDE_INT fp_offset;              /* Offset from hard FP to SP.  */
2206   HOST_WIDE_INT hard_fp_offset;
2207   rtx_insn *insn;
2208
2209   aarch64_layout_frame ();
2210
2211   offset = frame_size = cfun->machine->frame.frame_size;
2212   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2213   fp_offset = frame_size - hard_fp_offset;
2214
2215   if (flag_stack_usage_info)
2216     current_function_static_stack_size = frame_size;
2217
2218   /* Store pairs and load pairs have a range only -512 to 504.  */
2219   if (offset >= 512)
2220     {
2221       /* When the frame has a large size, an initial decrease is done on
2222          the stack pointer to jump over the callee-allocated save area for
2223          register varargs, the local variable area and/or the callee-saved
2224          register area.  This will allow the pre-index write-back
2225          store pair instructions to be used for setting up the stack frame
2226          efficiently.  */
2227       offset = hard_fp_offset;
2228       if (offset >= 512)
2229         offset = cfun->machine->frame.saved_regs_size;
2230
2231       frame_size -= (offset + crtl->outgoing_args_size);
2232       fp_offset = 0;
2233
2234       if (frame_size >= 0x1000000)
2235         {
2236           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2237           emit_move_insn (op0, GEN_INT (-frame_size));
2238           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2239
2240           add_reg_note (insn, REG_CFA_ADJUST_CFA,
2241                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2242                                      plus_constant (Pmode, stack_pointer_rtx,
2243                                                     -frame_size)));
2244           RTX_FRAME_RELATED_P (insn) = 1;
2245         }
2246       else if (frame_size > 0)
2247         {
2248           int hi_ofs = frame_size & 0xfff000;
2249           int lo_ofs = frame_size & 0x000fff;
2250
2251           if (hi_ofs)
2252             {
2253               insn = emit_insn (gen_add2_insn
2254                                 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2255               RTX_FRAME_RELATED_P (insn) = 1;
2256             }
2257           if (lo_ofs)
2258             {
2259               insn = emit_insn (gen_add2_insn
2260                                 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2261               RTX_FRAME_RELATED_P (insn) = 1;
2262             }
2263         }
2264     }
2265   else
2266     frame_size = -1;
2267
2268   if (offset > 0)
2269     {
2270       bool skip_wb = false;
2271
2272       if (frame_pointer_needed)
2273         {
2274           skip_wb = true;
2275
2276           if (fp_offset)
2277             {
2278               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2279                                                GEN_INT (-offset)));
2280               RTX_FRAME_RELATED_P (insn) = 1;
2281
2282               aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2283                                          R30_REGNUM, false);
2284             }
2285           else
2286             aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2287
2288           /* Set up frame pointer to point to the location of the
2289              previous frame pointer on the stack.  */
2290           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2291                                            stack_pointer_rtx,
2292                                            GEN_INT (fp_offset)));
2293           RTX_FRAME_RELATED_P (insn) = 1;
2294           emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2295         }
2296       else
2297         {
2298           unsigned reg1 = cfun->machine->frame.wb_candidate1;
2299           unsigned reg2 = cfun->machine->frame.wb_candidate2;
2300
2301           if (fp_offset
2302               || reg1 == FIRST_PSEUDO_REGISTER
2303               || (reg2 == FIRST_PSEUDO_REGISTER
2304                   && offset >= 256))
2305             {
2306               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2307                                                GEN_INT (-offset)));
2308               RTX_FRAME_RELATED_P (insn) = 1;
2309             }
2310           else
2311             {
2312               enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2313
2314               skip_wb = true;
2315
2316               if (reg2 == FIRST_PSEUDO_REGISTER)
2317                 aarch64_pushwb_single_reg (mode1, reg1, offset);
2318               else
2319                 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2320             }
2321         }
2322
2323       aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2324                                  skip_wb);
2325       aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2326                                  skip_wb);
2327     }
2328
2329   /* when offset >= 512,
2330      sub sp, sp, #<outgoing_args_size> */
2331   if (frame_size > -1)
2332     {
2333       if (crtl->outgoing_args_size > 0)
2334         {
2335           insn = emit_insn (gen_add2_insn
2336                             (stack_pointer_rtx,
2337                              GEN_INT (- crtl->outgoing_args_size)));
2338           RTX_FRAME_RELATED_P (insn) = 1;
2339         }
2340     }
2341 }
2342
2343 /* Return TRUE if we can use a simple_return insn.
2344
2345    This function checks whether the callee saved stack is empty, which
2346    means no restore actions are need. The pro_and_epilogue will use
2347    this to check whether shrink-wrapping opt is feasible.  */
2348
2349 bool
2350 aarch64_use_return_insn_p (void)
2351 {
2352   if (!reload_completed)
2353     return false;
2354
2355   if (crtl->profile)
2356     return false;
2357
2358   aarch64_layout_frame ();
2359
2360   return cfun->machine->frame.frame_size == 0;
2361 }
2362
2363 /* Generate the epilogue instructions for returning from a function.  */
2364 void
2365 aarch64_expand_epilogue (bool for_sibcall)
2366 {
2367   HOST_WIDE_INT frame_size, offset;
2368   HOST_WIDE_INT fp_offset;
2369   HOST_WIDE_INT hard_fp_offset;
2370   rtx_insn *insn;
2371
2372   aarch64_layout_frame ();
2373
2374   offset = frame_size = cfun->machine->frame.frame_size;
2375   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2376   fp_offset = frame_size - hard_fp_offset;
2377
2378   /* Store pairs and load pairs have a range only -512 to 504.  */
2379   if (offset >= 512)
2380     {
2381       offset = hard_fp_offset;
2382       if (offset >= 512)
2383         offset = cfun->machine->frame.saved_regs_size;
2384
2385       frame_size -= (offset + crtl->outgoing_args_size);
2386       fp_offset = 0;
2387       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2388         {
2389           insn = emit_insn (gen_add2_insn
2390                             (stack_pointer_rtx,
2391                              GEN_INT (crtl->outgoing_args_size)));
2392           RTX_FRAME_RELATED_P (insn) = 1;
2393         }
2394     }
2395   else
2396     frame_size = -1;
2397
2398   /* If there were outgoing arguments or we've done dynamic stack
2399      allocation, then restore the stack pointer from the frame
2400      pointer.  This is at most one insn and more efficient than using
2401      GCC's internal mechanism.  */
2402   if (frame_pointer_needed
2403       && (crtl->outgoing_args_size || cfun->calls_alloca))
2404     {
2405       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2406                                        hard_frame_pointer_rtx,
2407                                        GEN_INT (0)));
2408       offset = offset - fp_offset;
2409     }
2410
2411   if (offset > 0)
2412     {
2413       unsigned reg1 = cfun->machine->frame.wb_candidate1;
2414       unsigned reg2 = cfun->machine->frame.wb_candidate2;
2415       bool skip_wb = true;
2416       rtx cfi_ops = NULL;
2417
2418       if (frame_pointer_needed)
2419         fp_offset = 0;
2420       else if (fp_offset
2421                || reg1 == FIRST_PSEUDO_REGISTER
2422                || (reg2 == FIRST_PSEUDO_REGISTER
2423                    && offset >= 256))
2424         skip_wb = false;
2425
2426       aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2427                                     skip_wb, &cfi_ops);
2428       aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2429                                     skip_wb, &cfi_ops);
2430
2431       if (skip_wb)
2432         {
2433           enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2434           rtx rreg1 = gen_rtx_REG (mode1, reg1);
2435
2436           cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2437           if (reg2 == FIRST_PSEUDO_REGISTER)
2438             {
2439               rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2440               mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2441               mem = gen_rtx_MEM (mode1, mem);
2442               insn = emit_move_insn (rreg1, mem);
2443             }
2444           else
2445             {
2446               rtx rreg2 = gen_rtx_REG (mode1, reg2);
2447
2448               cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2449               insn = emit_insn (aarch64_gen_loadwb_pair
2450                                 (mode1, stack_pointer_rtx, rreg1,
2451                                  rreg2, offset));
2452             }
2453         }
2454       else
2455         {
2456           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2457                                            GEN_INT (offset)));
2458         }
2459
2460       /* Reset the CFA to be SP + FRAME_SIZE.  */
2461       rtx new_cfa = stack_pointer_rtx;
2462       if (frame_size > 0)
2463         new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2464       cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2465       REG_NOTES (insn) = cfi_ops;
2466       RTX_FRAME_RELATED_P (insn) = 1;
2467     }
2468
2469   if (frame_size > 0)
2470     {
2471       if (frame_size >= 0x1000000)
2472         {
2473           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2474           emit_move_insn (op0, GEN_INT (frame_size));
2475           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2476         }
2477       else
2478         {
2479           int hi_ofs = frame_size & 0xfff000;
2480           int lo_ofs = frame_size & 0x000fff;
2481
2482           if (hi_ofs && lo_ofs)
2483             {
2484               insn = emit_insn (gen_add2_insn
2485                                 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2486               RTX_FRAME_RELATED_P (insn) = 1;
2487               frame_size = lo_ofs;
2488             }
2489           insn = emit_insn (gen_add2_insn
2490                             (stack_pointer_rtx, GEN_INT (frame_size)));
2491         }
2492
2493       /* Reset the CFA to be SP + 0.  */
2494       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2495       RTX_FRAME_RELATED_P (insn) = 1;
2496     }
2497
2498   /* Stack adjustment for exception handler.  */
2499   if (crtl->calls_eh_return)
2500     {
2501       /* We need to unwind the stack by the offset computed by
2502          EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
2503          to be SP; letting the CFA move during this adjustment
2504          is just as correct as retaining the CFA from the body
2505          of the function.  Therefore, do nothing special.  */
2506       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2507     }
2508
2509   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2510   if (!for_sibcall)
2511     emit_jump_insn (ret_rtx);
2512 }
2513
2514 /* Return the place to copy the exception unwinding return address to.
2515    This will probably be a stack slot, but could (in theory be the
2516    return register).  */
2517 rtx
2518 aarch64_final_eh_return_addr (void)
2519 {
2520   HOST_WIDE_INT fp_offset;
2521
2522   aarch64_layout_frame ();
2523
2524   fp_offset = cfun->machine->frame.frame_size
2525               - cfun->machine->frame.hard_fp_offset;
2526
2527   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2528     return gen_rtx_REG (DImode, LR_REGNUM);
2529
2530   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2531      result in a store to save LR introduced by builtin_eh_return () being
2532      incorrectly deleted because the alias is not detected.
2533      So in the calculation of the address to copy the exception unwinding
2534      return address to, we note 2 cases.
2535      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2536      we return a SP-relative location since all the addresses are SP-relative
2537      in this case.  This prevents the store from being optimized away.
2538      If the fp_offset is not 0, then the addresses will be FP-relative and
2539      therefore we return a FP-relative location.  */
2540
2541   if (frame_pointer_needed)
2542     {
2543       if (fp_offset)
2544         return gen_frame_mem (DImode,
2545                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2546       else
2547         return gen_frame_mem (DImode,
2548                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2549     }
2550
2551   /* If FP is not needed, we calculate the location of LR, which would be
2552      at the top of the saved registers block.  */
2553
2554   return gen_frame_mem (DImode,
2555                         plus_constant (Pmode,
2556                                        stack_pointer_rtx,
2557                                        fp_offset
2558                                        + cfun->machine->frame.saved_regs_size
2559                                        - 2 * UNITS_PER_WORD));
2560 }
2561
2562 /* Possibly output code to build up a constant in a register.  For
2563    the benefit of the costs infrastructure, returns the number of
2564    instructions which would be emitted.  GENERATE inhibits or
2565    enables code generation.  */
2566
2567 static int
2568 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2569 {
2570   int insns = 0;
2571
2572   if (aarch64_bitmask_imm (val, DImode))
2573     {
2574       if (generate)
2575         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2576       insns = 1;
2577     }
2578   else
2579     {
2580       int i;
2581       int ncount = 0;
2582       int zcount = 0;
2583       HOST_WIDE_INT valp = val >> 16;
2584       HOST_WIDE_INT valm;
2585       HOST_WIDE_INT tval;
2586
2587       for (i = 16; i < 64; i += 16)
2588         {
2589           valm = (valp & 0xffff);
2590
2591           if (valm != 0)
2592             ++ zcount;
2593
2594           if (valm != 0xffff)
2595             ++ ncount;
2596
2597           valp >>= 16;
2598         }
2599
2600       /* zcount contains the number of additional MOVK instructions
2601          required if the constant is built up with an initial MOVZ instruction,
2602          while ncount is the number of MOVK instructions required if starting
2603          with a MOVN instruction.  Choose the sequence that yields the fewest
2604          number of instructions, preferring MOVZ instructions when they are both
2605          the same.  */
2606       if (ncount < zcount)
2607         {
2608           if (generate)
2609             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2610                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2611           tval = 0xffff;
2612           insns++;
2613         }
2614       else
2615         {
2616           if (generate)
2617             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2618                             GEN_INT (val & 0xffff));
2619           tval = 0;
2620           insns++;
2621         }
2622
2623       val >>= 16;
2624
2625       for (i = 16; i < 64; i += 16)
2626         {
2627           if ((val & 0xffff) != tval)
2628             {
2629               if (generate)
2630                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2631                                            GEN_INT (i),
2632                                            GEN_INT (val & 0xffff)));
2633               insns++;
2634             }
2635           val >>= 16;
2636         }
2637     }
2638   return insns;
2639 }
2640
2641 static void
2642 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2643 {
2644   HOST_WIDE_INT mdelta = delta;
2645   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2646   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2647
2648   if (mdelta < 0)
2649     mdelta = -mdelta;
2650
2651   if (mdelta >= 4096 * 4096)
2652     {
2653       (void) aarch64_build_constant (scratchreg, delta, true);
2654       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2655     }
2656   else if (mdelta > 0)
2657     {
2658       if (mdelta >= 4096)
2659         {
2660           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2661           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2662           if (delta < 0)
2663             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2664                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2665           else
2666             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2667                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2668         }
2669       if (mdelta % 4096 != 0)
2670         {
2671           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2672           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2673                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2674         }
2675     }
2676 }
2677
2678 /* Output code to add DELTA to the first argument, and then jump
2679    to FUNCTION.  Used for C++ multiple inheritance.  */
2680 static void
2681 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2682                          HOST_WIDE_INT delta,
2683                          HOST_WIDE_INT vcall_offset,
2684                          tree function)
2685 {
2686   /* The this pointer is always in x0.  Note that this differs from
2687      Arm where the this pointer maybe bumped to r1 if r0 is required
2688      to return a pointer to an aggregate.  On AArch64 a result value
2689      pointer will be in x8.  */
2690   int this_regno = R0_REGNUM;
2691   rtx this_rtx, temp0, temp1, addr, funexp;
2692   rtx_insn *insn;
2693
2694   reload_completed = 1;
2695   emit_note (NOTE_INSN_PROLOGUE_END);
2696
2697   if (vcall_offset == 0)
2698     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2699   else
2700     {
2701       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2702
2703       this_rtx = gen_rtx_REG (Pmode, this_regno);
2704       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2705       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2706
2707       addr = this_rtx;
2708       if (delta != 0)
2709         {
2710           if (delta >= -256 && delta < 256)
2711             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2712                                        plus_constant (Pmode, this_rtx, delta));
2713           else
2714             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2715         }
2716
2717       if (Pmode == ptr_mode)
2718         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2719       else
2720         aarch64_emit_move (temp0,
2721                            gen_rtx_ZERO_EXTEND (Pmode,
2722                                                 gen_rtx_MEM (ptr_mode, addr)));
2723
2724       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2725           addr = plus_constant (Pmode, temp0, vcall_offset);
2726       else
2727         {
2728           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2729           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2730         }
2731
2732       if (Pmode == ptr_mode)
2733         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2734       else
2735         aarch64_emit_move (temp1,
2736                            gen_rtx_SIGN_EXTEND (Pmode,
2737                                                 gen_rtx_MEM (ptr_mode, addr)));
2738
2739       emit_insn (gen_add2_insn (this_rtx, temp1));
2740     }
2741
2742   /* Generate a tail call to the target function.  */
2743   if (!TREE_USED (function))
2744     {
2745       assemble_external (function);
2746       TREE_USED (function) = 1;
2747     }
2748   funexp = XEXP (DECL_RTL (function), 0);
2749   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2750   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2751   SIBLING_CALL_P (insn) = 1;
2752
2753   insn = get_insns ();
2754   shorten_branches (insn);
2755   final_start_function (insn, file, 1);
2756   final (insn, file, 1);
2757   final_end_function ();
2758
2759   /* Stop pretending to be a post-reload pass.  */
2760   reload_completed = 0;
2761 }
2762
2763 static int
2764 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2765 {
2766   if (GET_CODE (*x) == SYMBOL_REF)
2767     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2768
2769   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2770      TLS offsets, not real symbol references.  */
2771   if (GET_CODE (*x) == UNSPEC
2772       && XINT (*x, 1) == UNSPEC_TLS)
2773     return -1;
2774
2775   return 0;
2776 }
2777
2778 static bool
2779 aarch64_tls_referenced_p (rtx x)
2780 {
2781   if (!TARGET_HAVE_TLS)
2782     return false;
2783
2784   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2785 }
2786
2787
2788 static int
2789 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2790 {
2791   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2792   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2793
2794   if (*imm1 < *imm2)
2795     return -1;
2796   if (*imm1 > *imm2)
2797     return +1;
2798   return 0;
2799 }
2800
2801
2802 static void
2803 aarch64_build_bitmask_table (void)
2804 {
2805   unsigned HOST_WIDE_INT mask, imm;
2806   unsigned int log_e, e, s, r;
2807   unsigned int nimms = 0;
2808
2809   for (log_e = 1; log_e <= 6; log_e++)
2810     {
2811       e = 1 << log_e;
2812       if (e == 64)
2813         mask = ~(HOST_WIDE_INT) 0;
2814       else
2815         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2816       for (s = 1; s < e; s++)
2817         {
2818           for (r = 0; r < e; r++)
2819             {
2820               /* set s consecutive bits to 1 (s < 64) */
2821               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2822               /* rotate right by r */
2823               if (r != 0)
2824                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2825               /* replicate the constant depending on SIMD size */
2826               switch (log_e) {
2827               case 1: imm |= (imm <<  2);
2828               case 2: imm |= (imm <<  4);
2829               case 3: imm |= (imm <<  8);
2830               case 4: imm |= (imm << 16);
2831               case 5: imm |= (imm << 32);
2832               case 6:
2833                 break;
2834               default:
2835                 gcc_unreachable ();
2836               }
2837               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2838               aarch64_bitmasks[nimms++] = imm;
2839             }
2840         }
2841     }
2842
2843   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2844   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2845          aarch64_bitmasks_cmp);
2846 }
2847
2848
2849 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2850    a left shift of 0 or 12 bits.  */
2851 bool
2852 aarch64_uimm12_shift (HOST_WIDE_INT val)
2853 {
2854   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2855           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2856           );
2857 }
2858
2859
2860 /* Return true if val is an immediate that can be loaded into a
2861    register by a MOVZ instruction.  */
2862 static bool
2863 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2864 {
2865   if (GET_MODE_SIZE (mode) > 4)
2866     {
2867       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2868           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2869         return 1;
2870     }
2871   else
2872     {
2873       /* Ignore sign extension.  */
2874       val &= (HOST_WIDE_INT) 0xffffffff;
2875     }
2876   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2877           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2878 }
2879
2880
2881 /* Return true if val is a valid bitmask immediate.  */
2882 bool
2883 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2884 {
2885   if (GET_MODE_SIZE (mode) < 8)
2886     {
2887       /* Replicate bit pattern.  */
2888       val &= (HOST_WIDE_INT) 0xffffffff;
2889       val |= val << 32;
2890     }
2891   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2892                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2893 }
2894
2895
2896 /* Return true if val is an immediate that can be loaded into a
2897    register in a single instruction.  */
2898 bool
2899 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2900 {
2901   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2902     return 1;
2903   return aarch64_bitmask_imm (val, mode);
2904 }
2905
2906 static bool
2907 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2908 {
2909   rtx base, offset;
2910
2911   if (GET_CODE (x) == HIGH)
2912     return true;
2913
2914   split_const (x, &base, &offset);
2915   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2916     {
2917       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2918           != SYMBOL_FORCE_TO_MEM)
2919         return true;
2920       else
2921         /* Avoid generating a 64-bit relocation in ILP32; leave
2922            to aarch64_expand_mov_immediate to handle it properly.  */
2923         return mode != ptr_mode;
2924     }
2925
2926   return aarch64_tls_referenced_p (x);
2927 }
2928
2929 /* Return true if register REGNO is a valid index register.
2930    STRICT_P is true if REG_OK_STRICT is in effect.  */
2931
2932 bool
2933 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2934 {
2935   if (!HARD_REGISTER_NUM_P (regno))
2936     {
2937       if (!strict_p)
2938         return true;
2939
2940       if (!reg_renumber)
2941         return false;
2942
2943       regno = reg_renumber[regno];
2944     }
2945   return GP_REGNUM_P (regno);
2946 }
2947
2948 /* Return true if register REGNO is a valid base register for mode MODE.
2949    STRICT_P is true if REG_OK_STRICT is in effect.  */
2950
2951 bool
2952 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2953 {
2954   if (!HARD_REGISTER_NUM_P (regno))
2955     {
2956       if (!strict_p)
2957         return true;
2958
2959       if (!reg_renumber)
2960         return false;
2961
2962       regno = reg_renumber[regno];
2963     }
2964
2965   /* The fake registers will be eliminated to either the stack or
2966      hard frame pointer, both of which are usually valid base registers.
2967      Reload deals with the cases where the eliminated form isn't valid.  */
2968   return (GP_REGNUM_P (regno)
2969           || regno == SP_REGNUM
2970           || regno == FRAME_POINTER_REGNUM
2971           || regno == ARG_POINTER_REGNUM);
2972 }
2973
2974 /* Return true if X is a valid base register for mode MODE.
2975    STRICT_P is true if REG_OK_STRICT is in effect.  */
2976
2977 static bool
2978 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2979 {
2980   if (!strict_p && GET_CODE (x) == SUBREG)
2981     x = SUBREG_REG (x);
2982
2983   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2984 }
2985
2986 /* Return true if address offset is a valid index.  If it is, fill in INFO
2987    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2988
2989 static bool
2990 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2991                         enum machine_mode mode, bool strict_p)
2992 {
2993   enum aarch64_address_type type;
2994   rtx index;
2995   int shift;
2996
2997   /* (reg:P) */
2998   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2999       && GET_MODE (x) == Pmode)
3000     {
3001       type = ADDRESS_REG_REG;
3002       index = x;
3003       shift = 0;
3004     }
3005   /* (sign_extend:DI (reg:SI)) */
3006   else if ((GET_CODE (x) == SIGN_EXTEND
3007             || GET_CODE (x) == ZERO_EXTEND)
3008            && GET_MODE (x) == DImode
3009            && GET_MODE (XEXP (x, 0)) == SImode)
3010     {
3011       type = (GET_CODE (x) == SIGN_EXTEND)
3012         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3013       index = XEXP (x, 0);
3014       shift = 0;
3015     }
3016   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3017   else if (GET_CODE (x) == MULT
3018            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3019                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3020            && GET_MODE (XEXP (x, 0)) == DImode
3021            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3022            && CONST_INT_P (XEXP (x, 1)))
3023     {
3024       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3025         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3026       index = XEXP (XEXP (x, 0), 0);
3027       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3028     }
3029   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3030   else if (GET_CODE (x) == ASHIFT
3031            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3032                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3033            && GET_MODE (XEXP (x, 0)) == DImode
3034            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3035            && CONST_INT_P (XEXP (x, 1)))
3036     {
3037       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3038         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3039       index = XEXP (XEXP (x, 0), 0);
3040       shift = INTVAL (XEXP (x, 1));
3041     }
3042   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3043   else if ((GET_CODE (x) == SIGN_EXTRACT
3044             || GET_CODE (x) == ZERO_EXTRACT)
3045            && GET_MODE (x) == DImode
3046            && GET_CODE (XEXP (x, 0)) == MULT
3047            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3048            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3049     {
3050       type = (GET_CODE (x) == SIGN_EXTRACT)
3051         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3052       index = XEXP (XEXP (x, 0), 0);
3053       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3054       if (INTVAL (XEXP (x, 1)) != 32 + shift
3055           || INTVAL (XEXP (x, 2)) != 0)
3056         shift = -1;
3057     }
3058   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3059      (const_int 0xffffffff<<shift)) */
3060   else if (GET_CODE (x) == AND
3061            && GET_MODE (x) == DImode
3062            && GET_CODE (XEXP (x, 0)) == MULT
3063            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3064            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3065            && CONST_INT_P (XEXP (x, 1)))
3066     {
3067       type = ADDRESS_REG_UXTW;
3068       index = XEXP (XEXP (x, 0), 0);
3069       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3070       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3071         shift = -1;
3072     }
3073   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3074   else if ((GET_CODE (x) == SIGN_EXTRACT
3075             || GET_CODE (x) == ZERO_EXTRACT)
3076            && GET_MODE (x) == DImode
3077            && GET_CODE (XEXP (x, 0)) == ASHIFT
3078            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3079            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3080     {
3081       type = (GET_CODE (x) == SIGN_EXTRACT)
3082         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3083       index = XEXP (XEXP (x, 0), 0);
3084       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3085       if (INTVAL (XEXP (x, 1)) != 32 + shift
3086           || INTVAL (XEXP (x, 2)) != 0)
3087         shift = -1;
3088     }
3089   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3090      (const_int 0xffffffff<<shift)) */
3091   else if (GET_CODE (x) == AND
3092            && GET_MODE (x) == DImode
3093            && GET_CODE (XEXP (x, 0)) == ASHIFT
3094            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3095            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3096            && CONST_INT_P (XEXP (x, 1)))
3097     {
3098       type = ADDRESS_REG_UXTW;
3099       index = XEXP (XEXP (x, 0), 0);
3100       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3101       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3102         shift = -1;
3103     }
3104   /* (mult:P (reg:P) (const_int scale)) */
3105   else if (GET_CODE (x) == MULT
3106            && GET_MODE (x) == Pmode
3107            && GET_MODE (XEXP (x, 0)) == Pmode
3108            && CONST_INT_P (XEXP (x, 1)))
3109     {
3110       type = ADDRESS_REG_REG;
3111       index = XEXP (x, 0);
3112       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3113     }
3114   /* (ashift:P (reg:P) (const_int shift)) */
3115   else if (GET_CODE (x) == ASHIFT
3116            && GET_MODE (x) == Pmode
3117            && GET_MODE (XEXP (x, 0)) == Pmode
3118            && CONST_INT_P (XEXP (x, 1)))
3119     {
3120       type = ADDRESS_REG_REG;
3121       index = XEXP (x, 0);
3122       shift = INTVAL (XEXP (x, 1));
3123     }
3124   else
3125     return false;
3126
3127   if (GET_CODE (index) == SUBREG)
3128     index = SUBREG_REG (index);
3129
3130   if ((shift == 0 ||
3131        (shift > 0 && shift <= 3
3132         && (1 << shift) == GET_MODE_SIZE (mode)))
3133       && REG_P (index)
3134       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3135     {
3136       info->type = type;
3137       info->offset = index;
3138       info->shift = shift;
3139       return true;
3140     }
3141
3142   return false;
3143 }
3144
3145 bool
3146 aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3147 {
3148   return (offset >= -64 * GET_MODE_SIZE (mode)
3149           && offset < 64 * GET_MODE_SIZE (mode)
3150           && offset % GET_MODE_SIZE (mode) == 0);
3151 }
3152
3153 static inline bool
3154 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3155                                HOST_WIDE_INT offset)
3156 {
3157   return offset >= -256 && offset < 256;
3158 }
3159
3160 static inline bool
3161 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3162 {
3163   return (offset >= 0
3164           && offset < 4096 * GET_MODE_SIZE (mode)
3165           && offset % GET_MODE_SIZE (mode) == 0);
3166 }
3167
3168 /* Return true if X is a valid address for machine mode MODE.  If it is,
3169    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3170    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3171
3172 static bool
3173 aarch64_classify_address (struct aarch64_address_info *info,
3174                           rtx x, enum machine_mode mode,
3175                           RTX_CODE outer_code, bool strict_p)
3176 {
3177   enum rtx_code code = GET_CODE (x);
3178   rtx op0, op1;
3179   bool allow_reg_index_p =
3180     outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3181                                || aarch64_vector_mode_supported_p (mode));
3182   /* Don't support anything other than POST_INC or REG addressing for
3183      AdvSIMD.  */
3184   if (aarch64_vect_struct_mode_p (mode)
3185       && (code != POST_INC && code != REG))
3186     return false;
3187
3188   switch (code)
3189     {
3190     case REG:
3191     case SUBREG:
3192       info->type = ADDRESS_REG_IMM;
3193       info->base = x;
3194       info->offset = const0_rtx;
3195       return aarch64_base_register_rtx_p (x, strict_p);
3196
3197     case PLUS:
3198       op0 = XEXP (x, 0);
3199       op1 = XEXP (x, 1);
3200
3201       if (! strict_p
3202           && REG_P (op0)
3203           && (op0 == virtual_stack_vars_rtx
3204               || op0 == frame_pointer_rtx
3205               || op0 == arg_pointer_rtx)
3206           && CONST_INT_P (op1))
3207         {
3208           info->type = ADDRESS_REG_IMM;
3209           info->base = op0;
3210           info->offset = op1;
3211
3212           return true;
3213         }
3214
3215       if (GET_MODE_SIZE (mode) != 0
3216           && CONST_INT_P (op1)
3217           && aarch64_base_register_rtx_p (op0, strict_p))
3218         {
3219           HOST_WIDE_INT offset = INTVAL (op1);
3220
3221           info->type = ADDRESS_REG_IMM;
3222           info->base = op0;
3223           info->offset = op1;
3224
3225           /* TImode and TFmode values are allowed in both pairs of X
3226              registers and individual Q registers.  The available
3227              address modes are:
3228              X,X: 7-bit signed scaled offset
3229              Q:   9-bit signed offset
3230              We conservatively require an offset representable in either mode.
3231            */
3232           if (mode == TImode || mode == TFmode)
3233             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3234                     && offset_9bit_signed_unscaled_p (mode, offset));
3235
3236           if (outer_code == PARALLEL)
3237             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3238                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3239           else
3240             return (offset_9bit_signed_unscaled_p (mode, offset)
3241                     || offset_12bit_unsigned_scaled_p (mode, offset));
3242         }
3243
3244       if (allow_reg_index_p)
3245         {
3246           /* Look for base + (scaled/extended) index register.  */
3247           if (aarch64_base_register_rtx_p (op0, strict_p)
3248               && aarch64_classify_index (info, op1, mode, strict_p))
3249             {
3250               info->base = op0;
3251               return true;
3252             }
3253           if (aarch64_base_register_rtx_p (op1, strict_p)
3254               && aarch64_classify_index (info, op0, mode, strict_p))
3255             {
3256               info->base = op1;
3257               return true;
3258             }
3259         }
3260
3261       return false;
3262
3263     case POST_INC:
3264     case POST_DEC:
3265     case PRE_INC:
3266     case PRE_DEC:
3267       info->type = ADDRESS_REG_WB;
3268       info->base = XEXP (x, 0);
3269       info->offset = NULL_RTX;
3270       return aarch64_base_register_rtx_p (info->base, strict_p);
3271
3272     case POST_MODIFY:
3273     case PRE_MODIFY:
3274       info->type = ADDRESS_REG_WB;
3275       info->base = XEXP (x, 0);
3276       if (GET_CODE (XEXP (x, 1)) == PLUS
3277           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3278           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3279           && aarch64_base_register_rtx_p (info->base, strict_p))
3280         {
3281           HOST_WIDE_INT offset;
3282           info->offset = XEXP (XEXP (x, 1), 1);
3283           offset = INTVAL (info->offset);
3284
3285           /* TImode and TFmode values are allowed in both pairs of X
3286              registers and individual Q registers.  The available
3287              address modes are:
3288              X,X: 7-bit signed scaled offset
3289              Q:   9-bit signed offset
3290              We conservatively require an offset representable in either mode.
3291            */
3292           if (mode == TImode || mode == TFmode)
3293             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3294                     && offset_9bit_signed_unscaled_p (mode, offset));
3295
3296           if (outer_code == PARALLEL)
3297             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3298                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3299           else
3300             return offset_9bit_signed_unscaled_p (mode, offset);
3301         }
3302       return false;
3303
3304     case CONST:
3305     case SYMBOL_REF:
3306     case LABEL_REF:
3307       /* load literal: pc-relative constant pool entry.  Only supported
3308          for SI mode or larger.  */
3309       info->type = ADDRESS_SYMBOLIC;
3310       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3311         {
3312           rtx sym, addend;
3313
3314           split_const (x, &sym, &addend);
3315           return (GET_CODE (sym) == LABEL_REF
3316                   || (GET_CODE (sym) == SYMBOL_REF
3317                       && CONSTANT_POOL_ADDRESS_P (sym)));
3318         }
3319       return false;
3320
3321     case LO_SUM:
3322       info->type = ADDRESS_LO_SUM;
3323       info->base = XEXP (x, 0);
3324       info->offset = XEXP (x, 1);
3325       if (allow_reg_index_p
3326           && aarch64_base_register_rtx_p (info->base, strict_p))
3327         {
3328           rtx sym, offs;
3329           split_const (info->offset, &sym, &offs);
3330           if (GET_CODE (sym) == SYMBOL_REF
3331               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3332                   == SYMBOL_SMALL_ABSOLUTE))
3333             {
3334               /* The symbol and offset must be aligned to the access size.  */
3335               unsigned int align;
3336               unsigned int ref_size;
3337
3338               if (CONSTANT_POOL_ADDRESS_P (sym))
3339                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3340               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3341                 {
3342                   tree exp = SYMBOL_REF_DECL (sym);
3343                   align = TYPE_ALIGN (TREE_TYPE (exp));
3344                   align = CONSTANT_ALIGNMENT (exp, align);
3345                 }
3346               else if (SYMBOL_REF_DECL (sym))
3347                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3348               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3349                        && SYMBOL_REF_BLOCK (sym) != NULL)
3350                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3351               else
3352                 align = BITS_PER_UNIT;
3353
3354               ref_size = GET_MODE_SIZE (mode);
3355               if (ref_size == 0)
3356                 ref_size = GET_MODE_SIZE (DImode);
3357
3358               return ((INTVAL (offs) & (ref_size - 1)) == 0
3359                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3360             }
3361         }
3362       return false;
3363
3364     default:
3365       return false;
3366     }
3367 }
3368
3369 bool
3370 aarch64_symbolic_address_p (rtx x)
3371 {
3372   rtx offset;
3373
3374   split_const (x, &x, &offset);
3375   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3376 }
3377
3378 /* Classify the base of symbolic expression X, given that X appears in
3379    context CONTEXT.  */
3380
3381 enum aarch64_symbol_type
3382 aarch64_classify_symbolic_expression (rtx x,
3383                                       enum aarch64_symbol_context context)
3384 {
3385   rtx offset;
3386
3387   split_const (x, &x, &offset);
3388   return aarch64_classify_symbol (x, context);
3389 }
3390
3391
3392 /* Return TRUE if X is a legitimate address for accessing memory in
3393    mode MODE.  */
3394 static bool
3395 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3396 {
3397   struct aarch64_address_info addr;
3398
3399   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3400 }
3401
3402 /* Return TRUE if X is a legitimate address for accessing memory in
3403    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3404    pair operation.  */
3405 bool
3406 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3407                               RTX_CODE outer_code, bool strict_p)
3408 {
3409   struct aarch64_address_info addr;
3410
3411   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3412 }
3413
3414 /* Return TRUE if rtx X is immediate constant 0.0 */
3415 bool
3416 aarch64_float_const_zero_rtx_p (rtx x)
3417 {
3418   REAL_VALUE_TYPE r;
3419
3420   if (GET_MODE (x) == VOIDmode)
3421     return false;
3422
3423   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3424   if (REAL_VALUE_MINUS_ZERO (r))
3425     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3426   return REAL_VALUES_EQUAL (r, dconst0);
3427 }
3428
3429 /* Return the fixed registers used for condition codes.  */
3430
3431 static bool
3432 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3433 {
3434   *p1 = CC_REGNUM;
3435   *p2 = INVALID_REGNUM;
3436   return true;
3437 }
3438
3439 /* Emit call insn with PAT and do aarch64-specific handling.  */
3440
3441 void
3442 aarch64_emit_call_insn (rtx pat)
3443 {
3444   rtx insn = emit_call_insn (pat);
3445
3446   rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3447   clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3448   clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3449 }
3450
3451 enum machine_mode
3452 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3453 {
3454   /* All floating point compares return CCFP if it is an equality
3455      comparison, and CCFPE otherwise.  */
3456   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3457     {
3458       switch (code)
3459         {
3460         case EQ:
3461         case NE:
3462         case UNORDERED:
3463         case ORDERED:
3464         case UNLT:
3465         case UNLE:
3466         case UNGT:
3467         case UNGE:
3468         case UNEQ:
3469         case LTGT:
3470           return CCFPmode;
3471
3472         case LT:
3473         case LE:
3474         case GT:
3475         case GE:
3476           return CCFPEmode;
3477
3478         default:
3479           gcc_unreachable ();
3480         }
3481     }
3482
3483   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3484       && y == const0_rtx
3485       && (code == EQ || code == NE || code == LT || code == GE)
3486       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3487           || GET_CODE (x) == NEG))
3488     return CC_NZmode;
3489
3490   /* A compare with a shifted operand.  Because of canonicalization,
3491      the comparison will have to be swapped when we emit the assembly
3492      code.  */
3493   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3494       && (REG_P (y) || GET_CODE (y) == SUBREG)
3495       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3496           || GET_CODE (x) == LSHIFTRT
3497           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3498     return CC_SWPmode;
3499
3500   /* Similarly for a negated operand, but we can only do this for
3501      equalities.  */
3502   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3503       && (REG_P (y) || GET_CODE (y) == SUBREG)
3504       && (code == EQ || code == NE)
3505       && GET_CODE (x) == NEG)
3506     return CC_Zmode;
3507
3508   /* A compare of a mode narrower than SI mode against zero can be done
3509      by extending the value in the comparison.  */
3510   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3511       && y == const0_rtx)
3512     /* Only use sign-extension if we really need it.  */
3513     return ((code == GT || code == GE || code == LE || code == LT)
3514             ? CC_SESWPmode : CC_ZESWPmode);
3515
3516   /* For everything else, return CCmode.  */
3517   return CCmode;
3518 }
3519
3520 int
3521 aarch64_get_condition_code (rtx x)
3522 {
3523   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3524   enum rtx_code comp_code = GET_CODE (x);
3525
3526   if (GET_MODE_CLASS (mode) != MODE_CC)
3527     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3528
3529   switch (mode)
3530     {
3531     case CCFPmode:
3532     case CCFPEmode:
3533       switch (comp_code)
3534         {
3535         case GE: return AARCH64_GE;
3536         case GT: return AARCH64_GT;
3537         case LE: return AARCH64_LS;
3538         case LT: return AARCH64_MI;
3539         case NE: return AARCH64_NE;
3540         case EQ: return AARCH64_EQ;
3541         case ORDERED: return AARCH64_VC;
3542         case UNORDERED: return AARCH64_VS;
3543         case UNLT: return AARCH64_LT;
3544         case UNLE: return AARCH64_LE;
3545         case UNGT: return AARCH64_HI;
3546         case UNGE: return AARCH64_PL;
3547         default: return -1;
3548         }
3549       break;
3550
3551     case CCmode:
3552       switch (comp_code)
3553         {
3554         case NE: return AARCH64_NE;
3555         case EQ: return AARCH64_EQ;
3556         case GE: return AARCH64_GE;
3557         case GT: return AARCH64_GT;
3558         case LE: return AARCH64_LE;
3559         case LT: return AARCH64_LT;
3560         case GEU: return AARCH64_CS;
3561         case GTU: return AARCH64_HI;
3562         case LEU: return AARCH64_LS;
3563         case LTU: return AARCH64_CC;
3564         default: return -1;
3565         }
3566       break;
3567
3568     case CC_SWPmode:
3569     case CC_ZESWPmode:
3570     case CC_SESWPmode:
3571       switch (comp_code)
3572         {
3573         case NE: return AARCH64_NE;
3574         case EQ: return AARCH64_EQ;
3575         case GE: return AARCH64_LE;
3576         case GT: return AARCH64_LT;
3577         case LE: return AARCH64_GE;
3578         case LT: return AARCH64_GT;
3579         case GEU: return AARCH64_LS;
3580         case GTU: return AARCH64_CC;
3581         case LEU: return AARCH64_CS;
3582         case LTU: return AARCH64_HI;
3583         default: return -1;
3584         }
3585       break;
3586
3587     case CC_NZmode:
3588       switch (comp_code)
3589         {
3590         case NE: return AARCH64_NE;
3591         case EQ: return AARCH64_EQ;
3592         case GE: return AARCH64_PL;
3593         case LT: return AARCH64_MI;
3594         default: return -1;
3595         }
3596       break;
3597
3598     case CC_Zmode:
3599       switch (comp_code)
3600         {
3601         case NE: return AARCH64_NE;
3602         case EQ: return AARCH64_EQ;
3603         default: return -1;
3604         }
3605       break;
3606
3607     default:
3608       return -1;
3609       break;
3610     }
3611 }
3612
3613 bool
3614 aarch64_const_vec_all_same_in_range_p (rtx x,
3615                                   HOST_WIDE_INT minval,
3616                                   HOST_WIDE_INT maxval)
3617 {
3618   HOST_WIDE_INT firstval;
3619   int count, i;
3620
3621   if (GET_CODE (x) != CONST_VECTOR
3622       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3623     return false;
3624
3625   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3626   if (firstval < minval || firstval > maxval)
3627     return false;
3628
3629   count = CONST_VECTOR_NUNITS (x);
3630   for (i = 1; i < count; i++)
3631     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3632       return false;
3633
3634   return true;
3635 }
3636
3637 bool
3638 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3639 {
3640   return aarch64_const_vec_all_same_in_range_p (x, val, val);
3641 }
3642
3643 static unsigned
3644 bit_count (unsigned HOST_WIDE_INT value)
3645 {
3646   unsigned count = 0;
3647
3648   while (value)
3649     {
3650       count++;
3651       value &= value - 1;
3652     }
3653
3654   return count;
3655 }
3656
3657 void
3658 aarch64_print_operand (FILE *f, rtx x, char code)
3659 {
3660   switch (code)
3661     {
3662     /* An integer or symbol address without a preceding # sign.  */
3663     case 'c':
3664       switch (GET_CODE (x))
3665         {
3666         case CONST_INT:
3667           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3668           break;
3669
3670         case SYMBOL_REF:
3671           output_addr_const (f, x);
3672           break;
3673
3674         case CONST:
3675           if (GET_CODE (XEXP (x, 0)) == PLUS
3676               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3677             {
3678               output_addr_const (f, x);
3679               break;
3680             }
3681           /* Fall through.  */
3682
3683         default:
3684           output_operand_lossage ("Unsupported operand for code '%c'", code);
3685         }
3686       break;
3687
3688     case 'e':
3689       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3690       {
3691         int n;
3692
3693         if (!CONST_INT_P (x)
3694             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3695           {
3696             output_operand_lossage ("invalid operand for '%%%c'", code);
3697             return;
3698           }
3699
3700         switch (n)
3701           {
3702           case 3:
3703             fputc ('b', f);
3704             break;
3705           case 4:
3706             fputc ('h', f);
3707             break;
3708           case 5:
3709             fputc ('w', f);
3710             break;
3711           default:
3712             output_operand_lossage ("invalid operand for '%%%c'", code);
3713             return;
3714           }
3715       }
3716       break;
3717
3718     case 'p':
3719       {
3720         int n;
3721
3722         /* Print N such that 2^N == X.  */
3723         if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
3724           {
3725             output_operand_lossage ("invalid operand for '%%%c'", code);
3726             return;
3727           }
3728
3729         asm_fprintf (f, "%d", n);
3730       }
3731       break;
3732
3733     case 'P':
3734       /* Print the number of non-zero bits in X (a const_int).  */
3735       if (!CONST_INT_P (x))
3736         {
3737           output_operand_lossage ("invalid operand for '%%%c'", code);
3738           return;
3739         }
3740
3741       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3742       break;
3743
3744     case 'H':
3745       /* Print the higher numbered register of a pair (TImode) of regs.  */
3746       if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
3747         {
3748           output_operand_lossage ("invalid operand for '%%%c'", code);
3749           return;
3750         }
3751
3752       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3753       break;
3754
3755     case 'm':
3756       {
3757         int cond_code;
3758         /* Print a condition (eq, ne, etc).  */
3759
3760         /* CONST_TRUE_RTX means always -- that's the default.  */
3761         if (x == const_true_rtx)
3762           return;
3763
3764         if (!COMPARISON_P (x))
3765           {
3766             output_operand_lossage ("invalid operand for '%%%c'", code);
3767             return;
3768           }
3769
3770         cond_code = aarch64_get_condition_code (x);
3771         gcc_assert (cond_code >= 0);
3772         fputs (aarch64_condition_codes[cond_code], f);
3773       }
3774       break;
3775
3776     case 'M':
3777       {
3778         int cond_code;
3779         /* Print the inverse of a condition (eq <-> ne, etc).  */
3780
3781         /* CONST_TRUE_RTX means never -- that's the default.  */
3782         if (x == const_true_rtx)
3783           {
3784             fputs ("nv", f);
3785             return;
3786           }
3787
3788         if (!COMPARISON_P (x))
3789           {
3790             output_operand_lossage ("invalid operand for '%%%c'", code);
3791             return;
3792           }
3793         cond_code = aarch64_get_condition_code (x);
3794         gcc_assert (cond_code >= 0);
3795         fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3796                                        (cond_code)], f);
3797       }
3798       break;
3799
3800     case 'b':
3801     case 'h':
3802     case 's':
3803     case 'd':
3804     case 'q':
3805       /* Print a scalar FP/SIMD register name.  */
3806       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3807         {
3808           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3809           return;
3810         }
3811       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3812       break;
3813
3814     case 'S':
3815     case 'T':
3816     case 'U':
3817     case 'V':
3818       /* Print the first FP/SIMD register name in a list.  */
3819       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3820         {
3821           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3822           return;
3823         }
3824       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3825       break;
3826
3827     case 'X':
3828       /* Print bottom 16 bits of integer constant in hex.  */
3829       if (!CONST_INT_P (x))
3830         {
3831           output_operand_lossage ("invalid operand for '%%%c'", code);
3832           return;
3833         }
3834       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3835       break;
3836
3837     case 'w':
3838     case 'x':
3839       /* Print a general register name or the zero register (32-bit or
3840          64-bit).  */
3841       if (x == const0_rtx
3842           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3843         {
3844           asm_fprintf (f, "%czr", code);
3845           break;
3846         }
3847
3848       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3849         {
3850           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3851           break;
3852         }
3853
3854       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3855         {
3856           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3857           break;
3858         }
3859
3860       /* Fall through */
3861
3862     case 0:
3863       /* Print a normal operand, if it's a general register, then we
3864          assume DImode.  */
3865       if (x == NULL)
3866         {
3867           output_operand_lossage ("missing operand");
3868           return;
3869         }
3870
3871       switch (GET_CODE (x))
3872         {
3873         case REG:
3874           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3875           break;
3876
3877         case MEM:
3878           aarch64_memory_reference_mode = GET_MODE (x);
3879           output_address (XEXP (x, 0));
3880           break;
3881
3882         case LABEL_REF:
3883         case SYMBOL_REF:
3884           output_addr_const (asm_out_file, x);
3885           break;
3886
3887         case CONST_INT:
3888           asm_fprintf (f, "%wd", INTVAL (x));
3889           break;
3890
3891         case CONST_VECTOR:
3892           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3893             {
3894               gcc_assert (
3895                   aarch64_const_vec_all_same_in_range_p (x,
3896                                                          HOST_WIDE_INT_MIN,
3897                                                          HOST_WIDE_INT_MAX));
3898               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3899             }
3900           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3901             {
3902               fputc ('0', f);
3903             }
3904           else
3905             gcc_unreachable ();
3906           break;
3907
3908         case CONST_DOUBLE:
3909           /* CONST_DOUBLE can represent a double-width integer.
3910              In this case, the mode of x is VOIDmode.  */
3911           if (GET_MODE (x) == VOIDmode)
3912             ; /* Do Nothing.  */
3913           else if (aarch64_float_const_zero_rtx_p (x))
3914             {
3915               fputc ('0', f);
3916               break;
3917             }
3918           else if (aarch64_float_const_representable_p (x))
3919             {
3920 #define buf_size 20
3921               char float_buf[buf_size] = {'\0'};
3922               REAL_VALUE_TYPE r;
3923               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3924               real_to_decimal_for_mode (float_buf, &r,
3925                                         buf_size, buf_size,
3926                                         1, GET_MODE (x));
3927               asm_fprintf (asm_out_file, "%s", float_buf);
3928               break;
3929 #undef buf_size
3930             }
3931           output_operand_lossage ("invalid constant");
3932           return;
3933         default:
3934           output_operand_lossage ("invalid operand");
3935           return;
3936         }
3937       break;
3938
3939     case 'A':
3940       if (GET_CODE (x) == HIGH)
3941         x = XEXP (x, 0);
3942
3943       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3944         {
3945         case SYMBOL_SMALL_GOT:
3946           asm_fprintf (asm_out_file, ":got:");
3947           break;
3948
3949         case SYMBOL_SMALL_TLSGD:
3950           asm_fprintf (asm_out_file, ":tlsgd:");
3951           break;
3952
3953         case SYMBOL_SMALL_TLSDESC:
3954           asm_fprintf (asm_out_file, ":tlsdesc:");
3955           break;
3956
3957         case SYMBOL_SMALL_GOTTPREL:
3958           asm_fprintf (asm_out_file, ":gottprel:");
3959           break;
3960
3961         case SYMBOL_SMALL_TPREL:
3962           asm_fprintf (asm_out_file, ":tprel:");
3963           break;
3964
3965         case SYMBOL_TINY_GOT:
3966           gcc_unreachable ();
3967           break;
3968
3969         default:
3970           break;
3971         }
3972       output_addr_const (asm_out_file, x);
3973       break;
3974
3975     case 'L':
3976       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3977         {
3978         case SYMBOL_SMALL_GOT:
3979           asm_fprintf (asm_out_file, ":lo12:");
3980           break;
3981
3982         case SYMBOL_SMALL_TLSGD:
3983           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3984           break;
3985
3986         case SYMBOL_SMALL_TLSDESC:
3987           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3988           break;
3989
3990         case SYMBOL_SMALL_GOTTPREL:
3991           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3992           break;
3993
3994         case SYMBOL_SMALL_TPREL:
3995           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3996           break;
3997
3998         case SYMBOL_TINY_GOT:
3999           asm_fprintf (asm_out_file, ":got:");
4000           break;
4001
4002         default:
4003           break;
4004         }
4005       output_addr_const (asm_out_file, x);
4006       break;
4007
4008     case 'G':
4009
4010       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4011         {
4012         case SYMBOL_SMALL_TPREL:
4013           asm_fprintf (asm_out_file, ":tprel_hi12:");
4014           break;
4015         default:
4016           break;
4017         }
4018       output_addr_const (asm_out_file, x);
4019       break;
4020
4021     default:
4022       output_operand_lossage ("invalid operand prefix '%%%c'", code);
4023       return;
4024     }
4025 }
4026
4027 void
4028 aarch64_print_operand_address (FILE *f, rtx x)
4029 {
4030   struct aarch64_address_info addr;
4031
4032   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4033                              MEM, true))
4034     switch (addr.type)
4035       {
4036       case ADDRESS_REG_IMM:
4037         if (addr.offset == const0_rtx)
4038           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4039         else
4040           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4041                        INTVAL (addr.offset));
4042         return;
4043
4044       case ADDRESS_REG_REG:
4045         if (addr.shift == 0)
4046           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4047                        reg_names [REGNO (addr.offset)]);
4048         else
4049           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4050                        reg_names [REGNO (addr.offset)], addr.shift);
4051         return;
4052
4053       case ADDRESS_REG_UXTW:
4054         if (addr.shift == 0)
4055           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4056                        REGNO (addr.offset) - R0_REGNUM);
4057         else
4058           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4059                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4060         return;
4061
4062       case ADDRESS_REG_SXTW:
4063         if (addr.shift == 0)
4064           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4065                        REGNO (addr.offset) - R0_REGNUM);
4066         else
4067           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4068                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4069         return;
4070
4071       case ADDRESS_REG_WB:
4072         switch (GET_CODE (x))
4073           {
4074           case PRE_INC:
4075             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4076                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4077             return;
4078           case POST_INC:
4079             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4080                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4081             return;
4082           case PRE_DEC:
4083             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4084                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4085             return;
4086           case POST_DEC:
4087             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4088                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4089             return;
4090           case PRE_MODIFY:
4091             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4092                          INTVAL (addr.offset));
4093             return;
4094           case POST_MODIFY:
4095             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4096                          INTVAL (addr.offset));
4097             return;
4098           default:
4099             break;
4100           }
4101         break;
4102
4103       case ADDRESS_LO_SUM:
4104         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4105         output_addr_const (f, addr.offset);
4106         asm_fprintf (f, "]");
4107         return;
4108
4109       case ADDRESS_SYMBOLIC:
4110         break;
4111       }
4112
4113   output_addr_const (f, x);
4114 }
4115
4116 bool
4117 aarch64_label_mentioned_p (rtx x)
4118 {
4119   const char *fmt;
4120   int i;
4121
4122   if (GET_CODE (x) == LABEL_REF)
4123     return true;
4124
4125   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4126      referencing instruction, but they are constant offsets, not
4127      symbols.  */
4128   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4129     return false;
4130
4131   fmt = GET_RTX_FORMAT (GET_CODE (x));
4132   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4133     {
4134       if (fmt[i] == 'E')
4135         {
4136           int j;
4137
4138           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4139             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4140               return 1;
4141         }
4142       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4143         return 1;
4144     }
4145
4146   return 0;
4147 }
4148
4149 /* Implement REGNO_REG_CLASS.  */
4150
4151 enum reg_class
4152 aarch64_regno_regclass (unsigned regno)
4153 {
4154   if (GP_REGNUM_P (regno))
4155     return GENERAL_REGS;
4156
4157   if (regno == SP_REGNUM)
4158     return STACK_REG;
4159
4160   if (regno == FRAME_POINTER_REGNUM
4161       || regno == ARG_POINTER_REGNUM)
4162     return POINTER_REGS;
4163
4164   if (FP_REGNUM_P (regno))
4165     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4166
4167   return NO_REGS;
4168 }
4169
4170 static rtx
4171 aarch64_legitimize_address (rtx x, rtx /* orig_x  */, enum machine_mode mode)
4172 {
4173   /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4174      where mask is selected by alignment and size of the offset.
4175      We try to pick as large a range for the offset as possible to
4176      maximize the chance of a CSE.  However, for aligned addresses
4177      we limit the range to 4k so that structures with different sized
4178      elements are likely to use the same base.  */
4179
4180   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4181     {
4182       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4183       HOST_WIDE_INT base_offset;
4184
4185       /* Does it look like we'll need a load/store-pair operation?  */
4186       if (GET_MODE_SIZE (mode) > 16
4187           || mode == TImode)
4188         base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4189                        & ~((128 * GET_MODE_SIZE (mode)) - 1));
4190       /* For offsets aren't a multiple of the access size, the limit is
4191          -256...255.  */
4192       else if (offset & (GET_MODE_SIZE (mode) - 1))
4193         base_offset = (offset + 0x100) & ~0x1ff;
4194       else
4195         base_offset = offset & ~0xfff;
4196
4197       if (base_offset == 0)
4198         return x;
4199
4200       offset -= base_offset;
4201       rtx base_reg = gen_reg_rtx (Pmode);
4202       rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4203                            NULL_RTX);
4204       emit_move_insn (base_reg, val);
4205       x = plus_constant (Pmode, base_reg, offset);
4206     }
4207
4208   return x;
4209 }
4210
4211 /* Try a machine-dependent way of reloading an illegitimate address
4212    operand.  If we find one, push the reload and return the new rtx.  */
4213
4214 rtx
4215 aarch64_legitimize_reload_address (rtx *x_p,
4216                                    enum machine_mode mode,
4217                                    int opnum, int type,
4218                                    int ind_levels ATTRIBUTE_UNUSED)
4219 {
4220   rtx x = *x_p;
4221
4222   /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
4223   if (aarch64_vect_struct_mode_p (mode)
4224       && GET_CODE (x) == PLUS
4225       && REG_P (XEXP (x, 0))
4226       && CONST_INT_P (XEXP (x, 1)))
4227     {
4228       rtx orig_rtx = x;
4229       x = copy_rtx (x);
4230       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4231                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4232                    opnum, (enum reload_type) type);
4233       return x;
4234     }
4235
4236   /* We must recognize output that we have already generated ourselves.  */
4237   if (GET_CODE (x) == PLUS
4238       && GET_CODE (XEXP (x, 0)) == PLUS
4239       && REG_P (XEXP (XEXP (x, 0), 0))
4240       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4241       && CONST_INT_P (XEXP (x, 1)))
4242     {
4243       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4244                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4245                    opnum, (enum reload_type) type);
4246       return x;
4247     }
4248
4249   /* We wish to handle large displacements off a base register by splitting
4250      the addend across an add and the mem insn.  This can cut the number of
4251      extra insns needed from 3 to 1.  It is only useful for load/store of a
4252      single register with 12 bit offset field.  */
4253   if (GET_CODE (x) == PLUS
4254       && REG_P (XEXP (x, 0))
4255       && CONST_INT_P (XEXP (x, 1))
4256       && HARD_REGISTER_P (XEXP (x, 0))
4257       && mode != TImode
4258       && mode != TFmode
4259       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4260     {
4261       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4262       HOST_WIDE_INT low = val & 0xfff;
4263       HOST_WIDE_INT high = val - low;
4264       HOST_WIDE_INT offs;
4265       rtx cst;
4266       enum machine_mode xmode = GET_MODE (x);
4267
4268       /* In ILP32, xmode can be either DImode or SImode.  */
4269       gcc_assert (xmode == DImode || xmode == SImode);
4270
4271       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4272          BLKmode alignment.  */
4273       if (GET_MODE_SIZE (mode) == 0)
4274         return NULL_RTX;
4275
4276       offs = low % GET_MODE_SIZE (mode);
4277
4278       /* Align misaligned offset by adjusting high part to compensate.  */
4279       if (offs != 0)
4280         {
4281           if (aarch64_uimm12_shift (high + offs))
4282             {
4283               /* Align down.  */
4284               low = low - offs;
4285               high = high + offs;
4286             }
4287           else
4288             {
4289               /* Align up.  */
4290               offs = GET_MODE_SIZE (mode) - offs;
4291               low = low + offs;
4292               high = high + (low & 0x1000) - offs;
4293               low &= 0xfff;
4294             }
4295         }
4296
4297       /* Check for overflow.  */
4298       if (high + low != val)
4299         return NULL_RTX;
4300
4301       cst = GEN_INT (high);
4302       if (!aarch64_uimm12_shift (high))
4303         cst = force_const_mem (xmode, cst);
4304
4305       /* Reload high part into base reg, leaving the low part
4306          in the mem instruction.
4307          Note that replacing this gen_rtx_PLUS with plus_constant is
4308          wrong in this case because we rely on the
4309          (plus (plus reg c1) c2) structure being preserved so that
4310          XEXP (*p, 0) in push_reload below uses the correct term.  */
4311       x = gen_rtx_PLUS (xmode,
4312                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4313                         GEN_INT (low));
4314
4315       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4316                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4317                    opnum, (enum reload_type) type);
4318       return x;
4319     }
4320
4321   return NULL_RTX;
4322 }
4323
4324
4325 static reg_class_t
4326 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4327                           reg_class_t rclass,
4328                           enum machine_mode mode,
4329                           secondary_reload_info *sri)
4330 {
4331   /* Without the TARGET_SIMD instructions we cannot move a Q register
4332      to a Q register directly.  We need a scratch.  */
4333   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4334       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4335       && reg_class_subset_p (rclass, FP_REGS))
4336     {
4337       if (mode == TFmode)
4338         sri->icode = CODE_FOR_aarch64_reload_movtf;
4339       else if (mode == TImode)
4340         sri->icode = CODE_FOR_aarch64_reload_movti;
4341       return NO_REGS;
4342     }
4343
4344   /* A TFmode or TImode memory access should be handled via an FP_REGS
4345      because AArch64 has richer addressing modes for LDR/STR instructions
4346      than LDP/STP instructions.  */
4347   if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4348       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4349     return FP_REGS;
4350
4351   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4352       return GENERAL_REGS;
4353
4354   return NO_REGS;
4355 }
4356
4357 static bool
4358 aarch64_can_eliminate (const int from, const int to)
4359 {
4360   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4361      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4362
4363   if (frame_pointer_needed)
4364     {
4365       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4366         return true;
4367       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4368         return false;
4369       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4370           && !cfun->calls_alloca)
4371         return true;
4372       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4373         return true;
4374
4375       return false;
4376     }
4377
4378   return true;
4379 }
4380
4381 HOST_WIDE_INT
4382 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4383 {
4384   aarch64_layout_frame ();
4385
4386   if (to == HARD_FRAME_POINTER_REGNUM)
4387     {
4388       if (from == ARG_POINTER_REGNUM)
4389         return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4390
4391       if (from == FRAME_POINTER_REGNUM)
4392         return (cfun->machine->frame.hard_fp_offset
4393                 - cfun->machine->frame.saved_varargs_size);
4394     }
4395
4396   if (to == STACK_POINTER_REGNUM)
4397     {
4398       if (from == FRAME_POINTER_REGNUM)
4399           return (cfun->machine->frame.frame_size
4400                   - cfun->machine->frame.saved_varargs_size);
4401     }
4402
4403   return cfun->machine->frame.frame_size;
4404 }
4405
4406 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4407    previous frame.  */
4408
4409 rtx
4410 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4411 {
4412   if (count != 0)
4413     return const0_rtx;
4414   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4415 }
4416
4417
4418 static void
4419 aarch64_asm_trampoline_template (FILE *f)
4420 {
4421   if (TARGET_ILP32)
4422     {
4423       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4424       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4425     }
4426   else
4427     {
4428       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4429       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4430     }
4431   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4432   assemble_aligned_integer (4, const0_rtx);
4433   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4434   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4435 }
4436
4437 static void
4438 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4439 {
4440   rtx fnaddr, mem, a_tramp;
4441   const int tramp_code_sz = 16;
4442
4443   /* Don't need to copy the trailing D-words, we fill those in below.  */
4444   emit_block_move (m_tramp, assemble_trampoline_template (),
4445                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4446   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4447   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4448   if (GET_MODE (fnaddr) != ptr_mode)
4449     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4450   emit_move_insn (mem, fnaddr);
4451
4452   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4453   emit_move_insn (mem, chain_value);
4454
4455   /* XXX We should really define a "clear_cache" pattern and use
4456      gen_clear_cache().  */
4457   a_tramp = XEXP (m_tramp, 0);
4458   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4459                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4460                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4461                      ptr_mode);
4462 }
4463
4464 static unsigned char
4465 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4466 {
4467   switch (regclass)
4468     {
4469     case CALLER_SAVE_REGS:
4470     case POINTER_REGS:
4471     case GENERAL_REGS:
4472     case ALL_REGS:
4473     case FP_REGS:
4474     case FP_LO_REGS:
4475       return
4476         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4477                                        (GET_MODE_SIZE (mode) + 7) / 8;
4478     case STACK_REG:
4479       return 1;
4480
4481     case NO_REGS:
4482       return 0;
4483
4484     default:
4485       break;
4486     }
4487   gcc_unreachable ();
4488 }
4489
4490 static reg_class_t
4491 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4492 {
4493   if (regclass == POINTER_REGS)
4494     return GENERAL_REGS;
4495
4496   if (regclass == STACK_REG)
4497     {
4498       if (REG_P(x)
4499           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4500           return regclass;
4501
4502       return NO_REGS;
4503     }
4504
4505   /* If it's an integer immediate that MOVI can't handle, then
4506      FP_REGS is not an option, so we return NO_REGS instead.  */
4507   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4508       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4509     return NO_REGS;
4510
4511   /* Register eliminiation can result in a request for
4512      SP+constant->FP_REGS.  We cannot support such operations which
4513      use SP as source and an FP_REG as destination, so reject out
4514      right now.  */
4515   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4516     {
4517       rtx lhs = XEXP (x, 0);
4518
4519       /* Look through a possible SUBREG introduced by ILP32.  */
4520       if (GET_CODE (lhs) == SUBREG)
4521         lhs = SUBREG_REG (lhs);
4522
4523       gcc_assert (REG_P (lhs));
4524       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4525                                       POINTER_REGS));
4526       return NO_REGS;
4527     }
4528
4529   return regclass;
4530 }
4531
4532 void
4533 aarch64_asm_output_labelref (FILE* f, const char *name)
4534 {
4535   asm_fprintf (f, "%U%s", name);
4536 }
4537
4538 static void
4539 aarch64_elf_asm_constructor (rtx symbol, int priority)
4540 {
4541   if (priority == DEFAULT_INIT_PRIORITY)
4542     default_ctor_section_asm_out_constructor (symbol, priority);
4543   else
4544     {
4545       section *s;
4546       char buf[18];
4547       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4548       s = get_section (buf, SECTION_WRITE, NULL);
4549       switch_to_section (s);
4550       assemble_align (POINTER_SIZE);
4551       assemble_aligned_integer (POINTER_BYTES, symbol);
4552     }
4553 }
4554
4555 static void
4556 aarch64_elf_asm_destructor (rtx symbol, int priority)
4557 {
4558   if (priority == DEFAULT_INIT_PRIORITY)
4559     default_dtor_section_asm_out_destructor (symbol, priority);
4560   else
4561     {
4562       section *s;
4563       char buf[18];
4564       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4565       s = get_section (buf, SECTION_WRITE, NULL);
4566       switch_to_section (s);
4567       assemble_align (POINTER_SIZE);
4568       assemble_aligned_integer (POINTER_BYTES, symbol);
4569     }
4570 }
4571
4572 const char*
4573 aarch64_output_casesi (rtx *operands)
4574 {
4575   char buf[100];
4576   char label[100];
4577   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
4578   int index;
4579   static const char *const patterns[4][2] =
4580   {
4581     {
4582       "ldrb\t%w3, [%0,%w1,uxtw]",
4583       "add\t%3, %4, %w3, sxtb #2"
4584     },
4585     {
4586       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4587       "add\t%3, %4, %w3, sxth #2"
4588     },
4589     {
4590       "ldr\t%w3, [%0,%w1,uxtw #2]",
4591       "add\t%3, %4, %w3, sxtw #2"
4592     },
4593     /* We assume that DImode is only generated when not optimizing and
4594        that we don't really need 64-bit address offsets.  That would
4595        imply an object file with 8GB of code in a single function!  */
4596     {
4597       "ldr\t%w3, [%0,%w1,uxtw #2]",
4598       "add\t%3, %4, %w3, sxtw #2"
4599     }
4600   };
4601
4602   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4603
4604   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4605
4606   gcc_assert (index >= 0 && index <= 3);
4607
4608   /* Need to implement table size reduction, by chaning the code below.  */
4609   output_asm_insn (patterns[index][0], operands);
4610   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4611   snprintf (buf, sizeof (buf),
4612             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4613   output_asm_insn (buf, operands);
4614   output_asm_insn (patterns[index][1], operands);
4615   output_asm_insn ("br\t%3", operands);
4616   assemble_label (asm_out_file, label);
4617   return "";
4618 }
4619
4620
4621 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4622    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4623    operator.  */
4624
4625 int
4626 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4627 {
4628   if (shift >= 0 && shift <= 3)
4629     {
4630       int size;
4631       for (size = 8; size <= 32; size *= 2)
4632         {
4633           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4634           if (mask == bits << shift)
4635             return size;
4636         }
4637     }
4638   return 0;
4639 }
4640
4641 static bool
4642 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4643                                    const_rtx x ATTRIBUTE_UNUSED)
4644 {
4645   /* We can't use blocks for constants when we're using a per-function
4646      constant pool.  */
4647   return false;
4648 }
4649
4650 static section *
4651 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4652                             rtx x ATTRIBUTE_UNUSED,
4653                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4654 {
4655   /* Force all constant pool entries into the current function section.  */
4656   return function_section (current_function_decl);
4657 }
4658
4659
4660 /* Costs.  */
4661
4662 /* Helper function for rtx cost calculation.  Strip a shift expression
4663    from X.  Returns the inner operand if successful, or the original
4664    expression on failure.  */
4665 static rtx
4666 aarch64_strip_shift (rtx x)
4667 {
4668   rtx op = x;
4669
4670   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4671      we can convert both to ROR during final output.  */
4672   if ((GET_CODE (op) == ASHIFT
4673        || GET_CODE (op) == ASHIFTRT
4674        || GET_CODE (op) == LSHIFTRT
4675        || GET_CODE (op) == ROTATERT
4676        || GET_CODE (op) == ROTATE)
4677       && CONST_INT_P (XEXP (op, 1)))
4678     return XEXP (op, 0);
4679
4680   if (GET_CODE (op) == MULT
4681       && CONST_INT_P (XEXP (op, 1))
4682       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4683     return XEXP (op, 0);
4684
4685   return x;
4686 }
4687
4688 /* Helper function for rtx cost calculation.  Strip an extend
4689    expression from X.  Returns the inner operand if successful, or the
4690    original expression on failure.  We deal with a number of possible
4691    canonicalization variations here.  */
4692 static rtx
4693 aarch64_strip_extend (rtx x)
4694 {
4695   rtx op = x;
4696
4697   /* Zero and sign extraction of a widened value.  */
4698   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4699       && XEXP (op, 2) == const0_rtx
4700       && GET_CODE (XEXP (op, 0)) == MULT
4701       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4702                                          XEXP (op, 1)))
4703     return XEXP (XEXP (op, 0), 0);
4704
4705   /* It can also be represented (for zero-extend) as an AND with an
4706      immediate.  */
4707   if (GET_CODE (op) == AND
4708       && GET_CODE (XEXP (op, 0)) == MULT
4709       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4710       && CONST_INT_P (XEXP (op, 1))
4711       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4712                            INTVAL (XEXP (op, 1))) != 0)
4713     return XEXP (XEXP (op, 0), 0);
4714
4715   /* Now handle extended register, as this may also have an optional
4716      left shift by 1..4.  */
4717   if (GET_CODE (op) == ASHIFT
4718       && CONST_INT_P (XEXP (op, 1))
4719       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4720     op = XEXP (op, 0);
4721
4722   if (GET_CODE (op) == ZERO_EXTEND
4723       || GET_CODE (op) == SIGN_EXTEND)
4724     op = XEXP (op, 0);
4725
4726   if (op != x)
4727     return op;
4728
4729   return x;
4730 }
4731
4732 /* Helper function for rtx cost calculation.  Calculate the cost of
4733    a MULT, which may be part of a multiply-accumulate rtx.  Return
4734    the calculated cost of the expression, recursing manually in to
4735    operands where needed.  */
4736
4737 static int
4738 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4739 {
4740   rtx op0, op1;
4741   const struct cpu_cost_table *extra_cost
4742     = aarch64_tune_params->insn_extra_cost;
4743   int cost = 0;
4744   bool maybe_fma = (outer == PLUS || outer == MINUS);
4745   enum machine_mode mode = GET_MODE (x);
4746
4747   gcc_checking_assert (code == MULT);
4748
4749   op0 = XEXP (x, 0);
4750   op1 = XEXP (x, 1);
4751
4752   if (VECTOR_MODE_P (mode))
4753     mode = GET_MODE_INNER (mode);
4754
4755   /* Integer multiply/fma.  */
4756   if (GET_MODE_CLASS (mode) == MODE_INT)
4757     {
4758       /* The multiply will be canonicalized as a shift, cost it as such.  */
4759       if (CONST_INT_P (op1)
4760           && exact_log2 (INTVAL (op1)) > 0)
4761         {
4762           if (speed)
4763             {
4764               if (maybe_fma)
4765                 /* ADD (shifted register).  */
4766                 cost += extra_cost->alu.arith_shift;
4767               else
4768                 /* LSL (immediate).  */
4769                 cost += extra_cost->alu.shift;
4770             }
4771
4772           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4773
4774           return cost;
4775         }
4776
4777       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4778       if ((GET_CODE (op0) == ZERO_EXTEND
4779            && GET_CODE (op1) == ZERO_EXTEND)
4780           || (GET_CODE (op0) == SIGN_EXTEND
4781               && GET_CODE (op1) == SIGN_EXTEND))
4782         {
4783           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4784                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4785
4786           if (speed)
4787             {
4788               if (maybe_fma)
4789                 /* MADD/SMADDL/UMADDL.  */
4790                 cost += extra_cost->mult[0].extend_add;
4791               else
4792                 /* MUL/SMULL/UMULL.  */
4793                 cost += extra_cost->mult[0].extend;
4794             }
4795
4796           return cost;
4797         }
4798
4799       /* This is either an integer multiply or an FMA.  In both cases
4800          we want to recurse and cost the operands.  */
4801       cost += rtx_cost (op0, MULT, 0, speed)
4802               + rtx_cost (op1, MULT, 1, speed);
4803
4804       if (speed)
4805         {
4806           if (maybe_fma)
4807             /* MADD.  */
4808             cost += extra_cost->mult[mode == DImode].add;
4809           else
4810             /* MUL.  */
4811             cost += extra_cost->mult[mode == DImode].simple;
4812         }
4813
4814       return cost;
4815     }
4816   else
4817     {
4818       if (speed)
4819         {
4820           /* Floating-point FMA/FMUL can also support negations of the
4821              operands.  */
4822           if (GET_CODE (op0) == NEG)
4823             op0 = XEXP (op0, 0);
4824           if (GET_CODE (op1) == NEG)
4825             op1 = XEXP (op1, 0);
4826
4827           if (maybe_fma)
4828             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4829             cost += extra_cost->fp[mode == DFmode].fma;
4830           else
4831             /* FMUL/FNMUL.  */
4832             cost += extra_cost->fp[mode == DFmode].mult;
4833         }
4834
4835       cost += rtx_cost (op0, MULT, 0, speed)
4836               + rtx_cost (op1, MULT, 1, speed);
4837       return cost;
4838     }
4839 }
4840
4841 static int
4842 aarch64_address_cost (rtx x,
4843                       enum machine_mode mode,
4844                       addr_space_t as ATTRIBUTE_UNUSED,
4845                       bool speed)
4846 {
4847   enum rtx_code c = GET_CODE (x);
4848   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4849   struct aarch64_address_info info;
4850   int cost = 0;
4851   info.shift = 0;
4852
4853   if (!aarch64_classify_address (&info, x, mode, c, false))
4854     {
4855       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4856         {
4857           /* This is a CONST or SYMBOL ref which will be split
4858              in a different way depending on the code model in use.
4859              Cost it through the generic infrastructure.  */
4860           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4861           /* Divide through by the cost of one instruction to
4862              bring it to the same units as the address costs.  */
4863           cost_symbol_ref /= COSTS_N_INSNS (1);
4864           /* The cost is then the cost of preparing the address,
4865              followed by an immediate (possibly 0) offset.  */
4866           return cost_symbol_ref + addr_cost->imm_offset;
4867         }
4868       else
4869         {
4870           /* This is most likely a jump table from a case
4871              statement.  */
4872           return addr_cost->register_offset;
4873         }
4874     }
4875
4876   switch (info.type)
4877     {
4878       case ADDRESS_LO_SUM:
4879       case ADDRESS_SYMBOLIC:
4880       case ADDRESS_REG_IMM:
4881         cost += addr_cost->imm_offset;
4882         break;
4883
4884       case ADDRESS_REG_WB:
4885         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4886           cost += addr_cost->pre_modify;
4887         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4888           cost += addr_cost->post_modify;
4889         else
4890           gcc_unreachable ();
4891
4892         break;
4893
4894       case ADDRESS_REG_REG:
4895         cost += addr_cost->register_offset;
4896         break;
4897
4898       case ADDRESS_REG_UXTW:
4899       case ADDRESS_REG_SXTW:
4900         cost += addr_cost->register_extend;
4901         break;
4902
4903       default:
4904         gcc_unreachable ();
4905     }
4906
4907
4908   if (info.shift > 0)
4909     {
4910       /* For the sake of calculating the cost of the shifted register
4911          component, we can treat same sized modes in the same way.  */
4912       switch (GET_MODE_BITSIZE (mode))
4913         {
4914           case 16:
4915             cost += addr_cost->addr_scale_costs.hi;
4916             break;
4917
4918           case 32:
4919             cost += addr_cost->addr_scale_costs.si;
4920             break;
4921
4922           case 64:
4923             cost += addr_cost->addr_scale_costs.di;
4924             break;
4925
4926           /* We can't tell, or this is a 128-bit vector.  */
4927           default:
4928             cost += addr_cost->addr_scale_costs.ti;
4929             break;
4930         }
4931     }
4932
4933   return cost;
4934 }
4935
4936 /* Return true if the RTX X in mode MODE is a zero or sign extract
4937    usable in an ADD or SUB (extended register) instruction.  */
4938 static bool
4939 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4940 {
4941   /* Catch add with a sign extract.
4942      This is add_<optab><mode>_multp2.  */
4943   if (GET_CODE (x) == SIGN_EXTRACT
4944       || GET_CODE (x) == ZERO_EXTRACT)
4945     {
4946       rtx op0 = XEXP (x, 0);
4947       rtx op1 = XEXP (x, 1);
4948       rtx op2 = XEXP (x, 2);
4949
4950       if (GET_CODE (op0) == MULT
4951           && CONST_INT_P (op1)
4952           && op2 == const0_rtx
4953           && CONST_INT_P (XEXP (op0, 1))
4954           && aarch64_is_extend_from_extract (mode,
4955                                              XEXP (op0, 1),
4956                                              op1))
4957         {
4958           return true;
4959         }
4960     }
4961
4962   return false;
4963 }
4964
4965 static bool
4966 aarch64_frint_unspec_p (unsigned int u)
4967 {
4968   switch (u)
4969     {
4970       case UNSPEC_FRINTZ:
4971       case UNSPEC_FRINTP:
4972       case UNSPEC_FRINTM:
4973       case UNSPEC_FRINTA:
4974       case UNSPEC_FRINTN:
4975       case UNSPEC_FRINTX:
4976       case UNSPEC_FRINTI:
4977         return true;
4978
4979       default:
4980         return false;
4981     }
4982 }
4983
4984 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4985    storing it in *COST.  Result is true if the total cost of the operation
4986    has now been calculated.  */
4987 static bool
4988 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4989 {
4990   rtx inner;
4991   rtx comparator;
4992   enum rtx_code cmpcode;
4993
4994   if (COMPARISON_P (op0))
4995     {
4996       inner = XEXP (op0, 0);
4997       comparator = XEXP (op0, 1);
4998       cmpcode = GET_CODE (op0);
4999     }
5000   else
5001     {
5002       inner = op0;
5003       comparator = const0_rtx;
5004       cmpcode = NE;
5005     }
5006
5007   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5008     {
5009       /* Conditional branch.  */
5010       if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5011         return true;
5012       else
5013         {
5014           if (cmpcode == NE || cmpcode == EQ)
5015             {
5016               if (comparator == const0_rtx)
5017                 {
5018                   /* TBZ/TBNZ/CBZ/CBNZ.  */
5019                   if (GET_CODE (inner) == ZERO_EXTRACT)
5020                     /* TBZ/TBNZ.  */
5021                     *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5022                                        0, speed);
5023                 else
5024                   /* CBZ/CBNZ.  */
5025                   *cost += rtx_cost (inner, cmpcode, 0, speed);
5026
5027                 return true;
5028               }
5029             }
5030           else if (cmpcode == LT || cmpcode == GE)
5031             {
5032               /* TBZ/TBNZ.  */
5033               if (comparator == const0_rtx)
5034                 return true;
5035             }
5036         }
5037     }
5038   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5039     {
5040       /* It's a conditional operation based on the status flags,
5041          so it must be some flavor of CSEL.  */
5042
5043       /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
5044       if (GET_CODE (op1) == NEG
5045           || GET_CODE (op1) == NOT
5046           || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5047         op1 = XEXP (op1, 0);
5048
5049       *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5050       *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5051       return true;
5052     }
5053
5054   /* We don't know what this is, cost all operands.  */
5055   return false;
5056 }
5057
5058 /* Calculate the cost of calculating X, storing it in *COST.  Result
5059    is true if the total cost of the operation has now been calculated.  */
5060 static bool
5061 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5062                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5063 {
5064   rtx op0, op1, op2;
5065   const struct cpu_cost_table *extra_cost
5066     = aarch64_tune_params->insn_extra_cost;
5067   enum machine_mode mode = GET_MODE (x);
5068
5069   /* By default, assume that everything has equivalent cost to the
5070      cheapest instruction.  Any additional costs are applied as a delta
5071      above this default.  */
5072   *cost = COSTS_N_INSNS (1);
5073
5074   /* TODO: The cost infrastructure currently does not handle
5075      vector operations.  Assume that all vector operations
5076      are equally expensive.  */
5077   if (VECTOR_MODE_P (mode))
5078     {
5079       if (speed)
5080         *cost += extra_cost->vect.alu;
5081       return true;
5082     }
5083
5084   switch (code)
5085     {
5086     case SET:
5087       /* The cost depends entirely on the operands to SET.  */
5088       *cost = 0;
5089       op0 = SET_DEST (x);
5090       op1 = SET_SRC (x);
5091
5092       switch (GET_CODE (op0))
5093         {
5094         case MEM:
5095           if (speed)
5096             {
5097               rtx address = XEXP (op0, 0);
5098               if (GET_MODE_CLASS (mode) == MODE_INT)
5099                 *cost += extra_cost->ldst.store;
5100               else if (mode == SFmode)
5101                 *cost += extra_cost->ldst.storef;
5102               else if (mode == DFmode)
5103                 *cost += extra_cost->ldst.stored;
5104
5105               *cost +=
5106                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5107                                                      0, speed));
5108             }
5109
5110           *cost += rtx_cost (op1, SET, 1, speed);
5111           return true;
5112
5113         case SUBREG:
5114           if (! REG_P (SUBREG_REG (op0)))
5115             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5116
5117           /* Fall through.  */
5118         case REG:
5119           /* const0_rtx is in general free, but we will use an
5120              instruction to set a register to 0.  */
5121           if (REG_P (op1) || op1 == const0_rtx)
5122             {
5123               /* The cost is 1 per register copied.  */
5124               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5125                               / UNITS_PER_WORD;
5126               *cost = COSTS_N_INSNS (n_minus_1 + 1);
5127             }
5128           else
5129             /* Cost is just the cost of the RHS of the set.  */
5130             *cost += rtx_cost (op1, SET, 1, speed);
5131           return true;
5132
5133         case ZERO_EXTRACT:
5134         case SIGN_EXTRACT:
5135           /* Bit-field insertion.  Strip any redundant widening of
5136              the RHS to meet the width of the target.  */
5137           if (GET_CODE (op1) == SUBREG)
5138             op1 = SUBREG_REG (op1);
5139           if ((GET_CODE (op1) == ZERO_EXTEND
5140                || GET_CODE (op1) == SIGN_EXTEND)
5141               && CONST_INT_P (XEXP (op0, 1))
5142               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5143                   >= INTVAL (XEXP (op0, 1))))
5144             op1 = XEXP (op1, 0);
5145
5146           if (CONST_INT_P (op1))
5147             {
5148               /* MOV immediate is assumed to always be cheap.  */
5149               *cost = COSTS_N_INSNS (1);
5150             }
5151           else
5152             {
5153               /* BFM.  */
5154               if (speed)
5155                 *cost += extra_cost->alu.bfi;
5156               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5157             }
5158
5159           return true;
5160
5161         default:
5162           /* We can't make sense of this, assume default cost.  */
5163           *cost = COSTS_N_INSNS (1);
5164           return false;
5165         }
5166       return false;
5167
5168     case CONST_INT:
5169       /* If an instruction can incorporate a constant within the
5170          instruction, the instruction's expression avoids calling
5171          rtx_cost() on the constant.  If rtx_cost() is called on a
5172          constant, then it is usually because the constant must be
5173          moved into a register by one or more instructions.
5174
5175          The exception is constant 0, which can be expressed
5176          as XZR/WZR and is therefore free.  The exception to this is
5177          if we have (set (reg) (const0_rtx)) in which case we must cost
5178          the move.  However, we can catch that when we cost the SET, so
5179          we don't need to consider that here.  */
5180       if (x == const0_rtx)
5181         *cost = 0;
5182       else
5183         {
5184           /* To an approximation, building any other constant is
5185              proportionally expensive to the number of instructions
5186              required to build that constant.  This is true whether we
5187              are compiling for SPEED or otherwise.  */
5188           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5189                                                          INTVAL (x),
5190                                                          false));
5191         }
5192       return true;
5193
5194     case CONST_DOUBLE:
5195       if (speed)
5196         {
5197           /* mov[df,sf]_aarch64.  */
5198           if (aarch64_float_const_representable_p (x))
5199             /* FMOV (scalar immediate).  */
5200             *cost += extra_cost->fp[mode == DFmode].fpconst;
5201           else if (!aarch64_float_const_zero_rtx_p (x))
5202             {
5203               /* This will be a load from memory.  */
5204               if (mode == DFmode)
5205                 *cost += extra_cost->ldst.loadd;
5206               else
5207                 *cost += extra_cost->ldst.loadf;
5208             }
5209           else
5210             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5211                or MOV v0.s[0], wzr - neither of which are modeled by the
5212                cost tables.  Just use the default cost.  */
5213             {
5214             }
5215         }
5216
5217       return true;
5218
5219     case MEM:
5220       if (speed)
5221         {
5222           /* For loads we want the base cost of a load, plus an
5223              approximation for the additional cost of the addressing
5224              mode.  */
5225           rtx address = XEXP (x, 0);
5226           if (GET_MODE_CLASS (mode) == MODE_INT)
5227             *cost += extra_cost->ldst.load;
5228           else if (mode == SFmode)
5229             *cost += extra_cost->ldst.loadf;
5230           else if (mode == DFmode)
5231             *cost += extra_cost->ldst.loadd;
5232
5233           *cost +=
5234                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5235                                                      0, speed));
5236         }
5237
5238       return true;
5239
5240     case NEG:
5241       op0 = XEXP (x, 0);
5242
5243       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5244        {
5245           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5246               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5247             {
5248               /* CSETM.  */
5249               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5250               return true;
5251             }
5252
5253           /* Cost this as SUB wzr, X.  */
5254           op0 = CONST0_RTX (GET_MODE (x));
5255           op1 = XEXP (x, 0);
5256           goto cost_minus;
5257         }
5258
5259       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5260         {
5261           /* Support (neg(fma...)) as a single instruction only if
5262              sign of zeros is unimportant.  This matches the decision
5263              making in aarch64.md.  */
5264           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5265             {
5266               /* FNMADD.  */
5267               *cost = rtx_cost (op0, NEG, 0, speed);
5268               return true;
5269             }
5270           if (speed)
5271             /* FNEG.  */
5272             *cost += extra_cost->fp[mode == DFmode].neg;
5273           return false;
5274         }
5275
5276       return false;
5277
5278     case CLRSB:
5279     case CLZ:
5280       if (speed)
5281         *cost += extra_cost->alu.clz;
5282
5283       return false;
5284
5285     case COMPARE:
5286       op0 = XEXP (x, 0);
5287       op1 = XEXP (x, 1);
5288
5289       if (op1 == const0_rtx
5290           && GET_CODE (op0) == AND)
5291         {
5292           x = op0;
5293           goto cost_logic;
5294         }
5295
5296       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5297         {
5298           /* TODO: A write to the CC flags possibly costs extra, this
5299              needs encoding in the cost tables.  */
5300
5301           /* CC_ZESWPmode supports zero extend for free.  */
5302           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5303             op0 = XEXP (op0, 0);
5304
5305           /* ANDS.  */
5306           if (GET_CODE (op0) == AND)
5307             {
5308               x = op0;
5309               goto cost_logic;
5310             }
5311
5312           if (GET_CODE (op0) == PLUS)
5313             {
5314               /* ADDS (and CMN alias).  */
5315               x = op0;
5316               goto cost_plus;
5317             }
5318
5319           if (GET_CODE (op0) == MINUS)
5320             {
5321               /* SUBS.  */
5322               x = op0;
5323               goto cost_minus;
5324             }
5325
5326           if (GET_CODE (op1) == NEG)
5327             {
5328               /* CMN.  */
5329               if (speed)
5330                 *cost += extra_cost->alu.arith;
5331
5332               *cost += rtx_cost (op0, COMPARE, 0, speed);
5333               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5334               return true;
5335             }
5336
5337           /* CMP.
5338
5339              Compare can freely swap the order of operands, and
5340              canonicalization puts the more complex operation first.
5341              But the integer MINUS logic expects the shift/extend
5342              operation in op1.  */
5343           if (! (REG_P (op0)
5344                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5345           {
5346             op0 = XEXP (x, 1);
5347             op1 = XEXP (x, 0);
5348           }
5349           goto cost_minus;
5350         }
5351
5352       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5353         {
5354           /* FCMP.  */
5355           if (speed)
5356             *cost += extra_cost->fp[mode == DFmode].compare;
5357
5358           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5359             {
5360               /* FCMP supports constant 0.0 for no extra cost. */
5361               return true;
5362             }
5363           return false;
5364         }
5365
5366       return false;
5367
5368     case MINUS:
5369       {
5370         op0 = XEXP (x, 0);
5371         op1 = XEXP (x, 1);
5372
5373 cost_minus:
5374         /* Detect valid immediates.  */
5375         if ((GET_MODE_CLASS (mode) == MODE_INT
5376              || (GET_MODE_CLASS (mode) == MODE_CC
5377                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5378             && CONST_INT_P (op1)
5379             && aarch64_uimm12_shift (INTVAL (op1)))
5380           {
5381             *cost += rtx_cost (op0, MINUS, 0, speed);
5382
5383             if (speed)
5384               /* SUB(S) (immediate).  */
5385               *cost += extra_cost->alu.arith;
5386             return true;
5387
5388           }
5389
5390         /* Look for SUB (extended register).  */
5391         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5392           {
5393             if (speed)
5394               *cost += extra_cost->alu.arith_shift;
5395
5396             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5397                                (enum rtx_code) GET_CODE (op1),
5398                                0, speed);
5399             return true;
5400           }
5401
5402         rtx new_op1 = aarch64_strip_extend (op1);
5403
5404         /* Cost this as an FMA-alike operation.  */
5405         if ((GET_CODE (new_op1) == MULT
5406              || GET_CODE (new_op1) == ASHIFT)
5407             && code != COMPARE)
5408           {
5409             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5410                                             (enum rtx_code) code,
5411                                             speed);
5412             *cost += rtx_cost (op0, MINUS, 0, speed);
5413             return true;
5414           }
5415
5416         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5417
5418         if (speed)
5419           {
5420             if (GET_MODE_CLASS (mode) == MODE_INT)
5421               /* SUB(S).  */
5422               *cost += extra_cost->alu.arith;
5423             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5424               /* FSUB.  */
5425               *cost += extra_cost->fp[mode == DFmode].addsub;
5426           }
5427         return true;
5428       }
5429
5430     case PLUS:
5431       {
5432         rtx new_op0;
5433
5434         op0 = XEXP (x, 0);
5435         op1 = XEXP (x, 1);
5436
5437 cost_plus:
5438         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5439             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5440           {
5441             /* CSINC.  */
5442             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5443             *cost += rtx_cost (op1, PLUS, 1, speed);
5444             return true;
5445           }
5446
5447         if (GET_MODE_CLASS (mode) == MODE_INT
5448             && CONST_INT_P (op1)
5449             && aarch64_uimm12_shift (INTVAL (op1)))
5450           {
5451             *cost += rtx_cost (op0, PLUS, 0, speed);
5452
5453             if (speed)
5454               /* ADD (immediate).  */
5455               *cost += extra_cost->alu.arith;
5456             return true;
5457           }
5458
5459         /* Look for ADD (extended register).  */
5460         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5461           {
5462             if (speed)
5463               *cost += extra_cost->alu.arith_shift;
5464
5465             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5466                                (enum rtx_code) GET_CODE (op0),
5467                                0, speed);
5468             return true;
5469           }
5470
5471         /* Strip any extend, leave shifts behind as we will
5472            cost them through mult_cost.  */
5473         new_op0 = aarch64_strip_extend (op0);
5474
5475         if (GET_CODE (new_op0) == MULT
5476             || GET_CODE (new_op0) == ASHIFT)
5477           {
5478             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5479                                             speed);
5480             *cost += rtx_cost (op1, PLUS, 1, speed);
5481             return true;
5482           }
5483
5484         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5485                   + rtx_cost (op1, PLUS, 1, speed));
5486
5487         if (speed)
5488           {
5489             if (GET_MODE_CLASS (mode) == MODE_INT)
5490               /* ADD.  */
5491               *cost += extra_cost->alu.arith;
5492             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5493               /* FADD.  */
5494               *cost += extra_cost->fp[mode == DFmode].addsub;
5495           }
5496         return true;
5497       }
5498
5499     case BSWAP:
5500       *cost = COSTS_N_INSNS (1);
5501
5502       if (speed)
5503         *cost += extra_cost->alu.rev;
5504
5505       return false;
5506
5507     case IOR:
5508       if (aarch_rev16_p (x))
5509         {
5510           *cost = COSTS_N_INSNS (1);
5511
5512           if (speed)
5513             *cost += extra_cost->alu.rev;
5514
5515           return true;
5516         }
5517     /* Fall through.  */
5518     case XOR:
5519     case AND:
5520     cost_logic:
5521       op0 = XEXP (x, 0);
5522       op1 = XEXP (x, 1);
5523
5524       if (code == AND
5525           && GET_CODE (op0) == MULT
5526           && CONST_INT_P (XEXP (op0, 1))
5527           && CONST_INT_P (op1)
5528           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5529                                INTVAL (op1)) != 0)
5530         {
5531           /* This is a UBFM/SBFM.  */
5532           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5533           if (speed)
5534             *cost += extra_cost->alu.bfx;
5535           return true;
5536         }
5537
5538       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5539         {
5540           /* We possibly get the immediate for free, this is not
5541              modelled.  */
5542           if (CONST_INT_P (op1)
5543               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5544             {
5545               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5546
5547               if (speed)
5548                 *cost += extra_cost->alu.logical;
5549
5550               return true;
5551             }
5552           else
5553             {
5554               rtx new_op0 = op0;
5555
5556               /* Handle ORN, EON, or BIC.  */
5557               if (GET_CODE (op0) == NOT)
5558                 op0 = XEXP (op0, 0);
5559
5560               new_op0 = aarch64_strip_shift (op0);
5561
5562               /* If we had a shift on op0 then this is a logical-shift-
5563                  by-register/immediate operation.  Otherwise, this is just
5564                  a logical operation.  */
5565               if (speed)
5566                 {
5567                   if (new_op0 != op0)
5568                     {
5569                       /* Shift by immediate.  */
5570                       if (CONST_INT_P (XEXP (op0, 1)))
5571                         *cost += extra_cost->alu.log_shift;
5572                       else
5573                         *cost += extra_cost->alu.log_shift_reg;
5574                     }
5575                   else
5576                     *cost += extra_cost->alu.logical;
5577                 }
5578
5579               /* In both cases we want to cost both operands.  */
5580               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5581                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5582
5583               return true;
5584             }
5585         }
5586       return false;
5587
5588     case NOT:
5589       /* MVN.  */
5590       if (speed)
5591         *cost += extra_cost->alu.logical;
5592
5593       /* The logical instruction could have the shifted register form,
5594          but the cost is the same if the shift is processed as a separate
5595          instruction, so we don't bother with it here.  */
5596       return false;
5597
5598     case ZERO_EXTEND:
5599
5600       op0 = XEXP (x, 0);
5601       /* If a value is written in SI mode, then zero extended to DI
5602          mode, the operation will in general be free as a write to
5603          a 'w' register implicitly zeroes the upper bits of an 'x'
5604          register.  However, if this is
5605
5606            (set (reg) (zero_extend (reg)))
5607
5608          we must cost the explicit register move.  */
5609       if (mode == DImode
5610           && GET_MODE (op0) == SImode
5611           && outer == SET)
5612         {
5613           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5614
5615           if (!op_cost && speed)
5616             /* MOV.  */
5617             *cost += extra_cost->alu.extend;
5618           else
5619             /* Free, the cost is that of the SI mode operation.  */
5620             *cost = op_cost;
5621
5622           return true;
5623         }
5624       else if (MEM_P (XEXP (x, 0)))
5625         {
5626           /* All loads can zero extend to any size for free.  */
5627           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5628           return true;
5629         }
5630
5631       /* UXTB/UXTH.  */
5632       if (speed)
5633         *cost += extra_cost->alu.extend;
5634
5635       return false;
5636
5637     case SIGN_EXTEND:
5638       if (MEM_P (XEXP (x, 0)))
5639         {
5640           /* LDRSH.  */
5641           if (speed)
5642             {
5643               rtx address = XEXP (XEXP (x, 0), 0);
5644               *cost += extra_cost->ldst.load_sign_extend;
5645
5646               *cost +=
5647                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5648                                                      0, speed));
5649             }
5650           return true;
5651         }
5652
5653       if (speed)
5654         *cost += extra_cost->alu.extend;
5655       return false;
5656
5657     case ASHIFT:
5658       op0 = XEXP (x, 0);
5659       op1 = XEXP (x, 1);
5660
5661       if (CONST_INT_P (op1))
5662         {
5663           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
5664              aliases.  */
5665           if (speed)
5666             *cost += extra_cost->alu.shift;
5667
5668           /* We can incorporate zero/sign extend for free.  */
5669           if (GET_CODE (op0) == ZERO_EXTEND
5670               || GET_CODE (op0) == SIGN_EXTEND)
5671             op0 = XEXP (op0, 0);
5672
5673           *cost += rtx_cost (op0, ASHIFT, 0, speed);
5674           return true;
5675         }
5676       else
5677         {
5678           /* LSLV.  */
5679           if (speed)
5680             *cost += extra_cost->alu.shift_reg;
5681
5682           return false;  /* All arguments need to be in registers.  */
5683         }
5684
5685     case ROTATE:
5686     case ROTATERT:
5687     case LSHIFTRT:
5688     case ASHIFTRT:
5689       op0 = XEXP (x, 0);
5690       op1 = XEXP (x, 1);
5691
5692       if (CONST_INT_P (op1))
5693         {
5694           /* ASR (immediate) and friends.  */
5695           if (speed)
5696             *cost += extra_cost->alu.shift;
5697
5698           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5699           return true;
5700         }
5701       else
5702         {
5703
5704           /* ASR (register) and friends.  */
5705           if (speed)
5706             *cost += extra_cost->alu.shift_reg;
5707
5708           return false;  /* All arguments need to be in registers.  */
5709         }
5710
5711     case SYMBOL_REF:
5712
5713       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5714         {
5715           /* LDR.  */
5716           if (speed)
5717             *cost += extra_cost->ldst.load;
5718         }
5719       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5720                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5721         {
5722           /* ADRP, followed by ADD.  */
5723           *cost += COSTS_N_INSNS (1);
5724           if (speed)
5725             *cost += 2 * extra_cost->alu.arith;
5726         }
5727       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5728                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5729         {
5730           /* ADR.  */
5731           if (speed)
5732             *cost += extra_cost->alu.arith;
5733         }
5734
5735       if (flag_pic)
5736         {
5737           /* One extra load instruction, after accessing the GOT.  */
5738           *cost += COSTS_N_INSNS (1);
5739           if (speed)
5740             *cost += extra_cost->ldst.load;
5741         }
5742       return true;
5743
5744     case HIGH:
5745     case LO_SUM:
5746       /* ADRP/ADD (immediate).  */
5747       if (speed)
5748         *cost += extra_cost->alu.arith;
5749       return true;
5750
5751     case ZERO_EXTRACT:
5752     case SIGN_EXTRACT:
5753       /* UBFX/SBFX.  */
5754       if (speed)
5755         *cost += extra_cost->alu.bfx;
5756
5757       /* We can trust that the immediates used will be correct (there
5758          are no by-register forms), so we need only cost op0.  */
5759       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5760       return true;
5761
5762     case MULT:
5763       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5764       /* aarch64_rtx_mult_cost always handles recursion to its
5765          operands.  */
5766       return true;
5767
5768     case MOD:
5769     case UMOD:
5770       if (speed)
5771         {
5772           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5773             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5774                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5775           else if (GET_MODE (x) == DFmode)
5776             *cost += (extra_cost->fp[1].mult
5777                       + extra_cost->fp[1].div);
5778           else if (GET_MODE (x) == SFmode)
5779             *cost += (extra_cost->fp[0].mult
5780                       + extra_cost->fp[0].div);
5781         }
5782       return false;  /* All arguments need to be in registers.  */
5783
5784     case DIV:
5785     case UDIV:
5786     case SQRT:
5787       if (speed)
5788         {
5789           if (GET_MODE_CLASS (mode) == MODE_INT)
5790             /* There is no integer SQRT, so only DIV and UDIV can get
5791                here.  */
5792             *cost += extra_cost->mult[mode == DImode].idiv;
5793           else
5794             *cost += extra_cost->fp[mode == DFmode].div;
5795         }
5796       return false;  /* All arguments need to be in registers.  */
5797
5798     case IF_THEN_ELSE:
5799       return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5800                                          XEXP (x, 2), cost, speed);
5801
5802     case EQ:
5803     case NE:
5804     case GT:
5805     case GTU:
5806     case LT:
5807     case LTU:
5808     case GE:
5809     case GEU:
5810     case LE:
5811     case LEU:
5812
5813       return false; /* All arguments must be in registers.  */
5814
5815     case FMA:
5816       op0 = XEXP (x, 0);
5817       op1 = XEXP (x, 1);
5818       op2 = XEXP (x, 2);
5819
5820       if (speed)
5821         *cost += extra_cost->fp[mode == DFmode].fma;
5822
5823       /* FMSUB, FNMADD, and FNMSUB are free.  */
5824       if (GET_CODE (op0) == NEG)
5825         op0 = XEXP (op0, 0);
5826
5827       if (GET_CODE (op2) == NEG)
5828         op2 = XEXP (op2, 0);
5829
5830       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5831          and the by-element operand as operand 0.  */
5832       if (GET_CODE (op1) == NEG)
5833         op1 = XEXP (op1, 0);
5834
5835       /* Catch vector-by-element operations.  The by-element operand can
5836          either be (vec_duplicate (vec_select (x))) or just
5837          (vec_select (x)), depending on whether we are multiplying by
5838          a vector or a scalar.
5839
5840          Canonicalization is not very good in these cases, FMA4 will put the
5841          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
5842       if (GET_CODE (op0) == VEC_DUPLICATE)
5843         op0 = XEXP (op0, 0);
5844       else if (GET_CODE (op1) == VEC_DUPLICATE)
5845         op1 = XEXP (op1, 0);
5846
5847       if (GET_CODE (op0) == VEC_SELECT)
5848         op0 = XEXP (op0, 0);
5849       else if (GET_CODE (op1) == VEC_SELECT)
5850         op1 = XEXP (op1, 0);
5851
5852       /* If the remaining parameters are not registers,
5853          get the cost to put them into registers.  */
5854       *cost += rtx_cost (op0, FMA, 0, speed);
5855       *cost += rtx_cost (op1, FMA, 1, speed);
5856       *cost += rtx_cost (op2, FMA, 2, speed);
5857       return true;
5858
5859     case FLOAT_EXTEND:
5860       if (speed)
5861         *cost += extra_cost->fp[mode == DFmode].widen;
5862       return false;
5863
5864     case FLOAT_TRUNCATE:
5865       if (speed)
5866         *cost += extra_cost->fp[mode == DFmode].narrow;
5867       return false;
5868
5869     case FIX:
5870     case UNSIGNED_FIX:
5871       x = XEXP (x, 0);
5872       /* Strip the rounding part.  They will all be implemented
5873          by the fcvt* family of instructions anyway.  */
5874       if (GET_CODE (x) == UNSPEC)
5875         {
5876           unsigned int uns_code = XINT (x, 1);
5877
5878           if (uns_code == UNSPEC_FRINTA
5879               || uns_code == UNSPEC_FRINTM
5880               || uns_code == UNSPEC_FRINTN
5881               || uns_code == UNSPEC_FRINTP
5882               || uns_code == UNSPEC_FRINTZ)
5883             x = XVECEXP (x, 0, 0);
5884         }
5885
5886       if (speed)
5887         *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5888
5889       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5890       return true;
5891
5892     case ABS:
5893       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5894         {
5895           /* FABS and FNEG are analogous.  */
5896           if (speed)
5897             *cost += extra_cost->fp[mode == DFmode].neg;
5898         }
5899       else
5900         {
5901           /* Integer ABS will either be split to
5902              two arithmetic instructions, or will be an ABS
5903              (scalar), which we don't model.  */
5904           *cost = COSTS_N_INSNS (2);
5905           if (speed)
5906             *cost += 2 * extra_cost->alu.arith;
5907         }
5908       return false;
5909
5910     case SMAX:
5911     case SMIN:
5912       if (speed)
5913         {
5914           /* FMAXNM/FMINNM/FMAX/FMIN.
5915              TODO: This may not be accurate for all implementations, but
5916              we do not model this in the cost tables.  */
5917           *cost += extra_cost->fp[mode == DFmode].addsub;
5918         }
5919       return false;
5920
5921     case UNSPEC:
5922       /* The floating point round to integer frint* instructions.  */
5923       if (aarch64_frint_unspec_p (XINT (x, 1)))
5924         {
5925           if (speed)
5926             *cost += extra_cost->fp[mode == DFmode].roundint;
5927
5928           return false;
5929         }
5930
5931       if (XINT (x, 1) == UNSPEC_RBIT)
5932         {
5933           if (speed)
5934             *cost += extra_cost->alu.rev;
5935
5936           return false;
5937         }
5938       break;
5939
5940     case TRUNCATE:
5941
5942       /* Decompose <su>muldi3_highpart.  */
5943       if (/* (truncate:DI  */
5944           mode == DImode
5945           /*   (lshiftrt:TI  */
5946           && GET_MODE (XEXP (x, 0)) == TImode
5947           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5948           /*      (mult:TI  */
5949           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5950           /*        (ANY_EXTEND:TI (reg:DI))
5951                     (ANY_EXTEND:TI (reg:DI)))  */
5952           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5953                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5954               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5955                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5956           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5957           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5958           /*     (const_int 64)  */
5959           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5960           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5961         {
5962           /* UMULH/SMULH.  */
5963           if (speed)
5964             *cost += extra_cost->mult[mode == DImode].extend;
5965           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5966                              MULT, 0, speed);
5967           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5968                              MULT, 1, speed);
5969           return true;
5970         }
5971
5972       /* Fall through.  */
5973     default:
5974       break;
5975     }
5976
5977   if (dump_file && (dump_flags & TDF_DETAILS))
5978     fprintf (dump_file,
5979       "\nFailed to cost RTX.  Assuming default cost.\n");
5980
5981   return true;
5982 }
5983
5984 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5985    calculated for X.  This cost is stored in *COST.  Returns true
5986    if the total cost of X was calculated.  */
5987 static bool
5988 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5989                    int param, int *cost, bool speed)
5990 {
5991   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5992
5993   if (dump_file && (dump_flags & TDF_DETAILS))
5994     {
5995       print_rtl_single (dump_file, x);
5996       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5997                speed ? "Hot" : "Cold",
5998                *cost, result ? "final" : "partial");
5999     }
6000
6001   return result;
6002 }
6003
6004 static int
6005 aarch64_register_move_cost (enum machine_mode mode,
6006                             reg_class_t from_i, reg_class_t to_i)
6007 {
6008   enum reg_class from = (enum reg_class) from_i;
6009   enum reg_class to = (enum reg_class) to_i;
6010   const struct cpu_regmove_cost *regmove_cost
6011     = aarch64_tune_params->regmove_cost;
6012
6013   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
6014   if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6015     to = GENERAL_REGS;
6016
6017   if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6018     from = GENERAL_REGS;
6019
6020   /* Moving between GPR and stack cost is the same as GP2GP.  */
6021   if ((from == GENERAL_REGS && to == STACK_REG)
6022       || (to == GENERAL_REGS && from == STACK_REG))
6023     return regmove_cost->GP2GP;
6024
6025   /* To/From the stack register, we move via the gprs.  */
6026   if (to == STACK_REG || from == STACK_REG)
6027     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6028             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6029
6030   if (GET_MODE_SIZE (mode) == 16)
6031     {
6032       /* 128-bit operations on general registers require 2 instructions.  */
6033       if (from == GENERAL_REGS && to == GENERAL_REGS)
6034         return regmove_cost->GP2GP * 2;
6035       else if (from == GENERAL_REGS)
6036         return regmove_cost->GP2FP * 2;
6037       else if (to == GENERAL_REGS)
6038         return regmove_cost->FP2GP * 2;
6039
6040       /* When AdvSIMD instructions are disabled it is not possible to move
6041          a 128-bit value directly between Q registers.  This is handled in
6042          secondary reload.  A general register is used as a scratch to move
6043          the upper DI value and the lower DI value is moved directly,
6044          hence the cost is the sum of three moves. */
6045       if (! TARGET_SIMD)
6046         return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6047
6048       return regmove_cost->FP2FP;
6049     }
6050
6051   if (from == GENERAL_REGS && to == GENERAL_REGS)
6052     return regmove_cost->GP2GP;
6053   else if (from == GENERAL_REGS)
6054     return regmove_cost->GP2FP;
6055   else if (to == GENERAL_REGS)
6056     return regmove_cost->FP2GP;
6057
6058   return regmove_cost->FP2FP;
6059 }
6060
6061 static int
6062 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
6063                           reg_class_t rclass ATTRIBUTE_UNUSED,
6064                           bool in ATTRIBUTE_UNUSED)
6065 {
6066   return aarch64_tune_params->memmov_cost;
6067 }
6068
6069 /* Return the number of instructions that can be issued per cycle.  */
6070 static int
6071 aarch64_sched_issue_rate (void)
6072 {
6073   return aarch64_tune_params->issue_rate;
6074 }
6075
6076 /* Vectorizer cost model target hooks.  */
6077
6078 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6079 static int
6080 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6081                                     tree vectype,
6082                                     int misalign ATTRIBUTE_UNUSED)
6083 {
6084   unsigned elements;
6085
6086   switch (type_of_cost)
6087     {
6088       case scalar_stmt:
6089         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6090
6091       case scalar_load:
6092         return aarch64_tune_params->vec_costs->scalar_load_cost;
6093
6094       case scalar_store:
6095         return aarch64_tune_params->vec_costs->scalar_store_cost;
6096
6097       case vector_stmt:
6098         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6099
6100       case vector_load:
6101         return aarch64_tune_params->vec_costs->vec_align_load_cost;
6102
6103       case vector_store:
6104         return aarch64_tune_params->vec_costs->vec_store_cost;
6105
6106       case vec_to_scalar:
6107         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6108
6109       case scalar_to_vec:
6110         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6111
6112       case unaligned_load:
6113         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6114
6115       case unaligned_store:
6116         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6117
6118       case cond_branch_taken:
6119         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6120
6121       case cond_branch_not_taken:
6122         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6123
6124       case vec_perm:
6125       case vec_promote_demote:
6126         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6127
6128       case vec_construct:
6129         elements = TYPE_VECTOR_SUBPARTS (vectype);
6130         return elements / 2 + 1;
6131
6132       default:
6133         gcc_unreachable ();
6134     }
6135 }
6136
6137 /* Implement targetm.vectorize.add_stmt_cost.  */
6138 static unsigned
6139 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6140                        struct _stmt_vec_info *stmt_info, int misalign,
6141                        enum vect_cost_model_location where)
6142 {
6143   unsigned *cost = (unsigned *) data;
6144   unsigned retval = 0;
6145
6146   if (flag_vect_cost_model)
6147     {
6148       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6149       int stmt_cost =
6150             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6151
6152       /* Statements in an inner loop relative to the loop being
6153          vectorized are weighted more heavily.  The value here is
6154          a function (linear for now) of the loop nest level.  */
6155       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6156         {
6157           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6158           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
6159           unsigned nest_level = loop_depth (loop);
6160
6161           count *= nest_level;
6162         }
6163
6164       retval = (unsigned) (count * stmt_cost);
6165       cost[where] += retval;
6166     }
6167
6168   return retval;
6169 }
6170
6171 static void initialize_aarch64_code_model (void);
6172
6173 /* Parse the architecture extension string.  */
6174
6175 static void
6176 aarch64_parse_extension (char *str)
6177 {
6178   /* The extension string is parsed left to right.  */
6179   const struct aarch64_option_extension *opt = NULL;
6180
6181   /* Flag to say whether we are adding or removing an extension.  */
6182   int adding_ext = -1;
6183
6184   while (str != NULL && *str != 0)
6185     {
6186       char *ext;
6187       size_t len;
6188
6189       str++;
6190       ext = strchr (str, '+');
6191
6192       if (ext != NULL)
6193         len = ext - str;
6194       else
6195         len = strlen (str);
6196
6197       if (len >= 2 && strncmp (str, "no", 2) == 0)
6198         {
6199           adding_ext = 0;
6200           len -= 2;
6201           str += 2;
6202         }
6203       else if (len > 0)
6204         adding_ext = 1;
6205
6206       if (len == 0)
6207         {
6208           error ("missing feature modifier after %qs", "+no");
6209           return;
6210         }
6211
6212       /* Scan over the extensions table trying to find an exact match.  */
6213       for (opt = all_extensions; opt->name != NULL; opt++)
6214         {
6215           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6216             {
6217               /* Add or remove the extension.  */
6218               if (adding_ext)
6219                 aarch64_isa_flags |= opt->flags_on;
6220               else
6221                 aarch64_isa_flags &= ~(opt->flags_off);
6222               break;
6223             }
6224         }
6225
6226       if (opt->name == NULL)
6227         {
6228           /* Extension not found in list.  */
6229           error ("unknown feature modifier %qs", str);
6230           return;
6231         }
6232
6233       str = ext;
6234     };
6235
6236   return;
6237 }
6238
6239 /* Parse the ARCH string.  */
6240
6241 static void
6242 aarch64_parse_arch (void)
6243 {
6244   char *ext;
6245   const struct processor *arch;
6246   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6247   size_t len;
6248
6249   strcpy (str, aarch64_arch_string);
6250
6251   ext = strchr (str, '+');
6252
6253   if (ext != NULL)
6254     len = ext - str;
6255   else
6256     len = strlen (str);
6257
6258   if (len == 0)
6259     {
6260       error ("missing arch name in -march=%qs", str);
6261       return;
6262     }
6263
6264   /* Loop through the list of supported ARCHs to find a match.  */
6265   for (arch = all_architectures; arch->name != NULL; arch++)
6266     {
6267       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6268         {
6269           selected_arch = arch;
6270           aarch64_isa_flags = selected_arch->flags;
6271
6272           if (!selected_cpu)
6273             selected_cpu = &all_cores[selected_arch->core];
6274
6275           if (ext != NULL)
6276             {
6277               /* ARCH string contains at least one extension.  */
6278               aarch64_parse_extension (ext);
6279             }
6280
6281           if (strcmp (selected_arch->arch, selected_cpu->arch))
6282             {
6283               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6284                        selected_cpu->name, selected_arch->name);
6285             }
6286
6287           return;
6288         }
6289     }
6290
6291   /* ARCH name not found in list.  */
6292   error ("unknown value %qs for -march", str);
6293   return;
6294 }
6295
6296 /* Parse the CPU string.  */
6297
6298 static void
6299 aarch64_parse_cpu (void)
6300 {
6301   char *ext;
6302   const struct processor *cpu;
6303   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6304   size_t len;
6305
6306   strcpy (str, aarch64_cpu_string);
6307
6308   ext = strchr (str, '+');
6309
6310   if (ext != NULL)
6311     len = ext - str;
6312   else
6313     len = strlen (str);
6314
6315   if (len == 0)
6316     {
6317       error ("missing cpu name in -mcpu=%qs", str);
6318       return;
6319     }
6320
6321   /* Loop through the list of supported CPUs to find a match.  */
6322   for (cpu = all_cores; cpu->name != NULL; cpu++)
6323     {
6324       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6325         {
6326           selected_cpu = cpu;
6327           selected_tune = cpu;
6328           aarch64_isa_flags = selected_cpu->flags;
6329
6330           if (ext != NULL)
6331             {
6332               /* CPU string contains at least one extension.  */
6333               aarch64_parse_extension (ext);
6334             }
6335
6336           return;
6337         }
6338     }
6339
6340   /* CPU name not found in list.  */
6341   error ("unknown value %qs for -mcpu", str);
6342   return;
6343 }
6344
6345 /* Parse the TUNE string.  */
6346
6347 static void
6348 aarch64_parse_tune (void)
6349 {
6350   const struct processor *cpu;
6351   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6352   strcpy (str, aarch64_tune_string);
6353
6354   /* Loop through the list of supported CPUs to find a match.  */
6355   for (cpu = all_cores; cpu->name != NULL; cpu++)
6356     {
6357       if (strcmp (cpu->name, str) == 0)
6358         {
6359           selected_tune = cpu;
6360           return;
6361         }
6362     }
6363
6364   /* CPU name not found in list.  */
6365   error ("unknown value %qs for -mtune", str);
6366   return;
6367 }
6368
6369
6370 /* Implement TARGET_OPTION_OVERRIDE.  */
6371
6372 static void
6373 aarch64_override_options (void)
6374 {
6375   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6376      If either of -march or -mtune is given, they override their
6377      respective component of -mcpu.
6378
6379      So, first parse AARCH64_CPU_STRING, then the others, be careful
6380      with -march as, if -mcpu is not present on the command line, march
6381      must set a sensible default CPU.  */
6382   if (aarch64_cpu_string)
6383     {
6384       aarch64_parse_cpu ();
6385     }
6386
6387   if (aarch64_arch_string)
6388     {
6389       aarch64_parse_arch ();
6390     }
6391
6392   if (aarch64_tune_string)
6393     {
6394       aarch64_parse_tune ();
6395     }
6396
6397 #ifndef HAVE_AS_MABI_OPTION
6398   /* The compiler may have been configured with 2.23.* binutils, which does
6399      not have support for ILP32.  */
6400   if (TARGET_ILP32)
6401     error ("Assembler does not support -mabi=ilp32");
6402 #endif
6403
6404   initialize_aarch64_code_model ();
6405
6406   aarch64_build_bitmask_table ();
6407
6408   /* This target defaults to strict volatile bitfields.  */
6409   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6410     flag_strict_volatile_bitfields = 1;
6411
6412   /* If the user did not specify a processor, choose the default
6413      one for them.  This will be the CPU set during configuration using
6414      --with-cpu, otherwise it is "generic".  */
6415   if (!selected_cpu)
6416     {
6417       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6418       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6419     }
6420
6421   gcc_assert (selected_cpu);
6422
6423   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
6424   if (!selected_tune)
6425     selected_tune = &all_cores[selected_cpu->core];
6426
6427   aarch64_tune_flags = selected_tune->flags;
6428   aarch64_tune = selected_tune->core;
6429   aarch64_tune_params = selected_tune->tune;
6430
6431   if (aarch64_fix_a53_err835769 == 2)
6432     {
6433 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6434       aarch64_fix_a53_err835769 = 1;
6435 #else
6436       aarch64_fix_a53_err835769 = 0;
6437 #endif
6438     }
6439
6440   aarch64_override_options_after_change ();
6441 }
6442
6443 /* Implement targetm.override_options_after_change.  */
6444
6445 static void
6446 aarch64_override_options_after_change (void)
6447 {
6448   if (flag_omit_frame_pointer)
6449     flag_omit_leaf_frame_pointer = false;
6450   else if (flag_omit_leaf_frame_pointer)
6451     flag_omit_frame_pointer = true;
6452 }
6453
6454 static struct machine_function *
6455 aarch64_init_machine_status (void)
6456 {
6457   struct machine_function *machine;
6458   machine = ggc_cleared_alloc<machine_function> ();
6459   return machine;
6460 }
6461
6462 void
6463 aarch64_init_expanders (void)
6464 {
6465   init_machine_status = aarch64_init_machine_status;
6466 }
6467
6468 /* A checking mechanism for the implementation of the various code models.  */
6469 static void
6470 initialize_aarch64_code_model (void)
6471 {
6472    if (flag_pic)
6473      {
6474        switch (aarch64_cmodel_var)
6475          {
6476          case AARCH64_CMODEL_TINY:
6477            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6478            break;
6479          case AARCH64_CMODEL_SMALL:
6480            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6481            break;
6482          case AARCH64_CMODEL_LARGE:
6483            sorry ("code model %qs with -f%s", "large",
6484                   flag_pic > 1 ? "PIC" : "pic");
6485          default:
6486            gcc_unreachable ();
6487          }
6488      }
6489    else
6490      aarch64_cmodel = aarch64_cmodel_var;
6491 }
6492
6493 /* Return true if SYMBOL_REF X binds locally.  */
6494
6495 static bool
6496 aarch64_symbol_binds_local_p (const_rtx x)
6497 {
6498   return (SYMBOL_REF_DECL (x)
6499           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6500           : SYMBOL_REF_LOCAL_P (x));
6501 }
6502
6503 /* Return true if SYMBOL_REF X is thread local */
6504 static bool
6505 aarch64_tls_symbol_p (rtx x)
6506 {
6507   if (! TARGET_HAVE_TLS)
6508     return false;
6509
6510   if (GET_CODE (x) != SYMBOL_REF)
6511     return false;
6512
6513   return SYMBOL_REF_TLS_MODEL (x) != 0;
6514 }
6515
6516 /* Classify a TLS symbol into one of the TLS kinds.  */
6517 enum aarch64_symbol_type
6518 aarch64_classify_tls_symbol (rtx x)
6519 {
6520   enum tls_model tls_kind = tls_symbolic_operand_type (x);
6521
6522   switch (tls_kind)
6523     {
6524     case TLS_MODEL_GLOBAL_DYNAMIC:
6525     case TLS_MODEL_LOCAL_DYNAMIC:
6526       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6527
6528     case TLS_MODEL_INITIAL_EXEC:
6529       return SYMBOL_SMALL_GOTTPREL;
6530
6531     case TLS_MODEL_LOCAL_EXEC:
6532       return SYMBOL_SMALL_TPREL;
6533
6534     case TLS_MODEL_EMULATED:
6535     case TLS_MODEL_NONE:
6536       return SYMBOL_FORCE_TO_MEM;
6537
6538     default:
6539       gcc_unreachable ();
6540     }
6541 }
6542
6543 /* Return the method that should be used to access SYMBOL_REF or
6544    LABEL_REF X in context CONTEXT.  */
6545
6546 enum aarch64_symbol_type
6547 aarch64_classify_symbol (rtx x,
6548                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6549 {
6550   if (GET_CODE (x) == LABEL_REF)
6551     {
6552       switch (aarch64_cmodel)
6553         {
6554         case AARCH64_CMODEL_LARGE:
6555           return SYMBOL_FORCE_TO_MEM;
6556
6557         case AARCH64_CMODEL_TINY_PIC:
6558         case AARCH64_CMODEL_TINY:
6559           return SYMBOL_TINY_ABSOLUTE;
6560
6561         case AARCH64_CMODEL_SMALL_PIC:
6562         case AARCH64_CMODEL_SMALL:
6563           return SYMBOL_SMALL_ABSOLUTE;
6564
6565         default:
6566           gcc_unreachable ();
6567         }
6568     }
6569
6570   if (GET_CODE (x) == SYMBOL_REF)
6571     {
6572       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6573           return SYMBOL_FORCE_TO_MEM;
6574
6575       if (aarch64_tls_symbol_p (x))
6576         return aarch64_classify_tls_symbol (x);
6577
6578       switch (aarch64_cmodel)
6579         {
6580         case AARCH64_CMODEL_TINY:
6581           if (SYMBOL_REF_WEAK (x))
6582             return SYMBOL_FORCE_TO_MEM;
6583           return SYMBOL_TINY_ABSOLUTE;
6584
6585         case AARCH64_CMODEL_SMALL:
6586           if (SYMBOL_REF_WEAK (x))
6587             return SYMBOL_FORCE_TO_MEM;
6588           return SYMBOL_SMALL_ABSOLUTE;
6589
6590         case AARCH64_CMODEL_TINY_PIC:
6591           if (!aarch64_symbol_binds_local_p (x))
6592             return SYMBOL_TINY_GOT;
6593           return SYMBOL_TINY_ABSOLUTE;
6594
6595         case AARCH64_CMODEL_SMALL_PIC:
6596           if (!aarch64_symbol_binds_local_p (x))
6597             return SYMBOL_SMALL_GOT;
6598           return SYMBOL_SMALL_ABSOLUTE;
6599
6600         default:
6601           gcc_unreachable ();
6602         }
6603     }
6604
6605   /* By default push everything into the constant pool.  */
6606   return SYMBOL_FORCE_TO_MEM;
6607 }
6608
6609 bool
6610 aarch64_constant_address_p (rtx x)
6611 {
6612   return (CONSTANT_P (x) && memory_address_p (DImode, x));
6613 }
6614
6615 bool
6616 aarch64_legitimate_pic_operand_p (rtx x)
6617 {
6618   if (GET_CODE (x) == SYMBOL_REF
6619       || (GET_CODE (x) == CONST
6620           && GET_CODE (XEXP (x, 0)) == PLUS
6621           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6622      return false;
6623
6624   return true;
6625 }
6626
6627 /* Return true if X holds either a quarter-precision or
6628      floating-point +0.0 constant.  */
6629 static bool
6630 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6631 {
6632   if (!CONST_DOUBLE_P (x))
6633     return false;
6634
6635   /* TODO: We could handle moving 0.0 to a TFmode register,
6636      but first we would like to refactor the movtf_aarch64
6637      to be more amicable to split moves properly and
6638      correctly gate on TARGET_SIMD.  For now - reject all
6639      constants which are not to SFmode or DFmode registers.  */
6640   if (!(mode == SFmode || mode == DFmode))
6641     return false;
6642
6643   if (aarch64_float_const_zero_rtx_p (x))
6644     return true;
6645   return aarch64_float_const_representable_p (x);
6646 }
6647
6648 static bool
6649 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6650 {
6651   /* Do not allow vector struct mode constants.  We could support
6652      0 and -1 easily, but they need support in aarch64-simd.md.  */
6653   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6654     return false;
6655
6656   /* This could probably go away because
6657      we now decompose CONST_INTs according to expand_mov_immediate.  */
6658   if ((GET_CODE (x) == CONST_VECTOR
6659        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6660       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6661         return !targetm.cannot_force_const_mem (mode, x);
6662
6663   if (GET_CODE (x) == HIGH
6664       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6665     return true;
6666
6667   return aarch64_constant_address_p (x);
6668 }
6669
6670 rtx
6671 aarch64_load_tp (rtx target)
6672 {
6673   if (!target
6674       || GET_MODE (target) != Pmode
6675       || !register_operand (target, Pmode))
6676     target = gen_reg_rtx (Pmode);
6677
6678   /* Can return in any reg.  */
6679   emit_insn (gen_aarch64_load_tp_hard (target));
6680   return target;
6681 }
6682
6683 /* On AAPCS systems, this is the "struct __va_list".  */
6684 static GTY(()) tree va_list_type;
6685
6686 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6687    Return the type to use as __builtin_va_list.
6688
6689    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6690
6691    struct __va_list
6692    {
6693      void *__stack;
6694      void *__gr_top;
6695      void *__vr_top;
6696      int   __gr_offs;
6697      int   __vr_offs;
6698    };  */
6699
6700 static tree
6701 aarch64_build_builtin_va_list (void)
6702 {
6703   tree va_list_name;
6704   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6705
6706   /* Create the type.  */
6707   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6708   /* Give it the required name.  */
6709   va_list_name = build_decl (BUILTINS_LOCATION,
6710                              TYPE_DECL,
6711                              get_identifier ("__va_list"),
6712                              va_list_type);
6713   DECL_ARTIFICIAL (va_list_name) = 1;
6714   TYPE_NAME (va_list_type) = va_list_name;
6715   TYPE_STUB_DECL (va_list_type) = va_list_name;
6716
6717   /* Create the fields.  */
6718   f_stack = build_decl (BUILTINS_LOCATION,
6719                         FIELD_DECL, get_identifier ("__stack"),
6720                         ptr_type_node);
6721   f_grtop = build_decl (BUILTINS_LOCATION,
6722                         FIELD_DECL, get_identifier ("__gr_top"),
6723                         ptr_type_node);
6724   f_vrtop = build_decl (BUILTINS_LOCATION,
6725                         FIELD_DECL, get_identifier ("__vr_top"),
6726                         ptr_type_node);
6727   f_groff = build_decl (BUILTINS_LOCATION,
6728                         FIELD_DECL, get_identifier ("__gr_offs"),
6729                         integer_type_node);
6730   f_vroff = build_decl (BUILTINS_LOCATION,
6731                         FIELD_DECL, get_identifier ("__vr_offs"),
6732                         integer_type_node);
6733
6734   DECL_ARTIFICIAL (f_stack) = 1;
6735   DECL_ARTIFICIAL (f_grtop) = 1;
6736   DECL_ARTIFICIAL (f_vrtop) = 1;
6737   DECL_ARTIFICIAL (f_groff) = 1;
6738   DECL_ARTIFICIAL (f_vroff) = 1;
6739
6740   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6741   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6742   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6743   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6744   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6745
6746   TYPE_FIELDS (va_list_type) = f_stack;
6747   DECL_CHAIN (f_stack) = f_grtop;
6748   DECL_CHAIN (f_grtop) = f_vrtop;
6749   DECL_CHAIN (f_vrtop) = f_groff;
6750   DECL_CHAIN (f_groff) = f_vroff;
6751
6752   /* Compute its layout.  */
6753   layout_type (va_list_type);
6754
6755   return va_list_type;
6756 }
6757
6758 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6759 static void
6760 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6761 {
6762   const CUMULATIVE_ARGS *cum;
6763   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6764   tree stack, grtop, vrtop, groff, vroff;
6765   tree t;
6766   int gr_save_area_size;
6767   int vr_save_area_size;
6768   int vr_offset;
6769
6770   cum = &crtl->args.info;
6771   gr_save_area_size
6772     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6773   vr_save_area_size
6774     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6775
6776   if (TARGET_GENERAL_REGS_ONLY)
6777     {
6778       if (cum->aapcs_nvrn > 0)
6779         sorry ("%qs and floating point or vector arguments",
6780                "-mgeneral-regs-only");
6781       vr_save_area_size = 0;
6782     }
6783
6784   f_stack = TYPE_FIELDS (va_list_type_node);
6785   f_grtop = DECL_CHAIN (f_stack);
6786   f_vrtop = DECL_CHAIN (f_grtop);
6787   f_groff = DECL_CHAIN (f_vrtop);
6788   f_vroff = DECL_CHAIN (f_groff);
6789
6790   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6791                   NULL_TREE);
6792   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6793                   NULL_TREE);
6794   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6795                   NULL_TREE);
6796   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6797                   NULL_TREE);
6798   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6799                   NULL_TREE);
6800
6801   /* Emit code to initialize STACK, which points to the next varargs stack
6802      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6803      by named arguments.  STACK is 8-byte aligned.  */
6804   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6805   if (cum->aapcs_stack_size > 0)
6806     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6807   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6808   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6809
6810   /* Emit code to initialize GRTOP, the top of the GR save area.
6811      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6812   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6813   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6814   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6815
6816   /* Emit code to initialize VRTOP, the top of the VR save area.
6817      This address is gr_save_area_bytes below GRTOP, rounded
6818      down to the next 16-byte boundary.  */
6819   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6820   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6821                              STACK_BOUNDARY / BITS_PER_UNIT);
6822
6823   if (vr_offset)
6824     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6825   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6826   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6827
6828   /* Emit code to initialize GROFF, the offset from GRTOP of the
6829      next GPR argument.  */
6830   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6831               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6832   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6833
6834   /* Likewise emit code to initialize VROFF, the offset from FTOP
6835      of the next VR argument.  */
6836   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6837               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6838   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6839 }
6840
6841 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6842
6843 static tree
6844 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6845                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6846 {
6847   tree addr;
6848   bool indirect_p;
6849   bool is_ha;           /* is HFA or HVA.  */
6850   bool dw_align;        /* double-word align.  */
6851   enum machine_mode ag_mode = VOIDmode;
6852   int nregs;
6853   enum machine_mode mode;
6854
6855   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6856   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6857   HOST_WIDE_INT size, rsize, adjust, align;
6858   tree t, u, cond1, cond2;
6859
6860   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6861   if (indirect_p)
6862     type = build_pointer_type (type);
6863
6864   mode = TYPE_MODE (type);
6865
6866   f_stack = TYPE_FIELDS (va_list_type_node);
6867   f_grtop = DECL_CHAIN (f_stack);
6868   f_vrtop = DECL_CHAIN (f_grtop);
6869   f_groff = DECL_CHAIN (f_vrtop);
6870   f_vroff = DECL_CHAIN (f_groff);
6871
6872   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6873                   f_stack, NULL_TREE);
6874   size = int_size_in_bytes (type);
6875   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6876
6877   dw_align = false;
6878   adjust = 0;
6879   if (aarch64_vfp_is_call_or_return_candidate (mode,
6880                                                type,
6881                                                &ag_mode,
6882                                                &nregs,
6883                                                &is_ha))
6884     {
6885       /* TYPE passed in fp/simd registers.  */
6886       if (TARGET_GENERAL_REGS_ONLY)
6887         sorry ("%qs and floating point or vector arguments",
6888                "-mgeneral-regs-only");
6889
6890       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6891                       unshare_expr (valist), f_vrtop, NULL_TREE);
6892       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6893                       unshare_expr (valist), f_vroff, NULL_TREE);
6894
6895       rsize = nregs * UNITS_PER_VREG;
6896
6897       if (is_ha)
6898         {
6899           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6900             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6901         }
6902       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6903                && size < UNITS_PER_VREG)
6904         {
6905           adjust = UNITS_PER_VREG - size;
6906         }
6907     }
6908   else
6909     {
6910       /* TYPE passed in general registers.  */
6911       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6912                       unshare_expr (valist), f_grtop, NULL_TREE);
6913       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6914                       unshare_expr (valist), f_groff, NULL_TREE);
6915       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6916       nregs = rsize / UNITS_PER_WORD;
6917
6918       if (align > 8)
6919         dw_align = true;
6920
6921       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6922           && size < UNITS_PER_WORD)
6923         {
6924           adjust = UNITS_PER_WORD  - size;
6925         }
6926     }
6927
6928   /* Get a local temporary for the field value.  */
6929   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6930
6931   /* Emit code to branch if off >= 0.  */
6932   t = build2 (GE_EXPR, boolean_type_node, off,
6933               build_int_cst (TREE_TYPE (off), 0));
6934   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6935
6936   if (dw_align)
6937     {
6938       /* Emit: offs = (offs + 15) & -16.  */
6939       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6940                   build_int_cst (TREE_TYPE (off), 15));
6941       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6942                   build_int_cst (TREE_TYPE (off), -16));
6943       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6944     }
6945   else
6946     roundup = NULL;
6947
6948   /* Update ap.__[g|v]r_offs  */
6949   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6950               build_int_cst (TREE_TYPE (off), rsize));
6951   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6952
6953   /* String up.  */
6954   if (roundup)
6955     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6956
6957   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6958   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6959               build_int_cst (TREE_TYPE (f_off), 0));
6960   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6961
6962   /* String up: make sure the assignment happens before the use.  */
6963   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6964   COND_EXPR_ELSE (cond1) = t;
6965
6966   /* Prepare the trees handling the argument that is passed on the stack;
6967      the top level node will store in ON_STACK.  */
6968   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6969   if (align > 8)
6970     {
6971       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6972       t = fold_convert (intDI_type_node, arg);
6973       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6974                   build_int_cst (TREE_TYPE (t), 15));
6975       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6976                   build_int_cst (TREE_TYPE (t), -16));
6977       t = fold_convert (TREE_TYPE (arg), t);
6978       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6979     }
6980   else
6981     roundup = NULL;
6982   /* Advance ap.__stack  */
6983   t = fold_convert (intDI_type_node, arg);
6984   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6985               build_int_cst (TREE_TYPE (t), size + 7));
6986   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6987               build_int_cst (TREE_TYPE (t), -8));
6988   t = fold_convert (TREE_TYPE (arg), t);
6989   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6990   /* String up roundup and advance.  */
6991   if (roundup)
6992     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6993   /* String up with arg */
6994   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6995   /* Big-endianness related address adjustment.  */
6996   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6997       && size < UNITS_PER_WORD)
6998   {
6999     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7000                 size_int (UNITS_PER_WORD - size));
7001     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7002   }
7003
7004   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7005   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7006
7007   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
7008   t = off;
7009   if (adjust)
7010     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7011                 build_int_cst (TREE_TYPE (off), adjust));
7012
7013   t = fold_convert (sizetype, t);
7014   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7015
7016   if (is_ha)
7017     {
7018       /* type ha; // treat as "struct {ftype field[n];}"
7019          ... [computing offs]
7020          for (i = 0; i <nregs; ++i, offs += 16)
7021            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7022          return ha;  */
7023       int i;
7024       tree tmp_ha, field_t, field_ptr_t;
7025
7026       /* Declare a local variable.  */
7027       tmp_ha = create_tmp_var_raw (type, "ha");
7028       gimple_add_tmp_var (tmp_ha);
7029
7030       /* Establish the base type.  */
7031       switch (ag_mode)
7032         {
7033         case SFmode:
7034           field_t = float_type_node;
7035           field_ptr_t = float_ptr_type_node;
7036           break;
7037         case DFmode:
7038           field_t = double_type_node;
7039           field_ptr_t = double_ptr_type_node;
7040           break;
7041         case TFmode:
7042           field_t = long_double_type_node;
7043           field_ptr_t = long_double_ptr_type_node;
7044           break;
7045 /* The half precision and quad precision are not fully supported yet.  Enable
7046    the following code after the support is complete.  Need to find the correct
7047    type node for __fp16 *.  */
7048 #if 0
7049         case HFmode:
7050           field_t = float_type_node;
7051           field_ptr_t = float_ptr_type_node;
7052           break;
7053 #endif
7054         case V2SImode:
7055         case V4SImode:
7056             {
7057               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7058               field_t = build_vector_type_for_mode (innertype, ag_mode);
7059               field_ptr_t = build_pointer_type (field_t);
7060             }
7061           break;
7062         default:
7063           gcc_assert (0);
7064         }
7065
7066       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
7067       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7068       addr = t;
7069       t = fold_convert (field_ptr_t, addr);
7070       t = build2 (MODIFY_EXPR, field_t,
7071                   build1 (INDIRECT_REF, field_t, tmp_ha),
7072                   build1 (INDIRECT_REF, field_t, t));
7073
7074       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
7075       for (i = 1; i < nregs; ++i)
7076         {
7077           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7078           u = fold_convert (field_ptr_t, addr);
7079           u = build2 (MODIFY_EXPR, field_t,
7080                       build2 (MEM_REF, field_t, tmp_ha,
7081                               build_int_cst (field_ptr_t,
7082                                              (i *
7083                                               int_size_in_bytes (field_t)))),
7084                       build1 (INDIRECT_REF, field_t, u));
7085           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7086         }
7087
7088       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7089       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7090     }
7091
7092   COND_EXPR_ELSE (cond2) = t;
7093   addr = fold_convert (build_pointer_type (type), cond1);
7094   addr = build_va_arg_indirect_ref (addr);
7095
7096   if (indirect_p)
7097     addr = build_va_arg_indirect_ref (addr);
7098
7099   return addr;
7100 }
7101
7102 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
7103
7104 static void
7105 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7106                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7107                                 int no_rtl)
7108 {
7109   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7110   CUMULATIVE_ARGS local_cum;
7111   int gr_saved, vr_saved;
7112
7113   /* The caller has advanced CUM up to, but not beyond, the last named
7114      argument.  Advance a local copy of CUM past the last "real" named
7115      argument, to find out how many registers are left over.  */
7116   local_cum = *cum;
7117   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7118
7119   /* Found out how many registers we need to save.  */
7120   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7121   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7122
7123   if (TARGET_GENERAL_REGS_ONLY)
7124     {
7125       if (local_cum.aapcs_nvrn > 0)
7126         sorry ("%qs and floating point or vector arguments",
7127                "-mgeneral-regs-only");
7128       vr_saved = 0;
7129     }
7130
7131   if (!no_rtl)
7132     {
7133       if (gr_saved > 0)
7134         {
7135           rtx ptr, mem;
7136
7137           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
7138           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7139                                - gr_saved * UNITS_PER_WORD);
7140           mem = gen_frame_mem (BLKmode, ptr);
7141           set_mem_alias_set (mem, get_varargs_alias_set ());
7142
7143           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7144                                mem, gr_saved);
7145         }
7146       if (vr_saved > 0)
7147         {
7148           /* We can't use move_block_from_reg, because it will use
7149              the wrong mode, storing D regs only.  */
7150           enum machine_mode mode = TImode;
7151           int off, i;
7152
7153           /* Set OFF to the offset from virtual_incoming_args_rtx of
7154              the first vector register.  The VR save area lies below
7155              the GR one, and is aligned to 16 bytes.  */
7156           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7157                                    STACK_BOUNDARY / BITS_PER_UNIT);
7158           off -= vr_saved * UNITS_PER_VREG;
7159
7160           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7161             {
7162               rtx ptr, mem;
7163
7164               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7165               mem = gen_frame_mem (mode, ptr);
7166               set_mem_alias_set (mem, get_varargs_alias_set ());
7167               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7168               off += UNITS_PER_VREG;
7169             }
7170         }
7171     }
7172
7173   /* We don't save the size into *PRETEND_SIZE because we want to avoid
7174      any complication of having crtl->args.pretend_args_size changed.  */
7175   cfun->machine->frame.saved_varargs_size
7176     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7177                       STACK_BOUNDARY / BITS_PER_UNIT)
7178        + vr_saved * UNITS_PER_VREG);
7179 }
7180
7181 static void
7182 aarch64_conditional_register_usage (void)
7183 {
7184   int i;
7185   if (!TARGET_FLOAT)
7186     {
7187       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7188         {
7189           fixed_regs[i] = 1;
7190           call_used_regs[i] = 1;
7191         }
7192     }
7193 }
7194
7195 /* Walk down the type tree of TYPE counting consecutive base elements.
7196    If *MODEP is VOIDmode, then set it to the first valid floating point
7197    type.  If a non-floating point type is found, or if a floating point
7198    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7199    otherwise return the count in the sub-tree.  */
7200 static int
7201 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7202 {
7203   enum machine_mode mode;
7204   HOST_WIDE_INT size;
7205
7206   switch (TREE_CODE (type))
7207     {
7208     case REAL_TYPE:
7209       mode = TYPE_MODE (type);
7210       if (mode != DFmode && mode != SFmode && mode != TFmode)
7211         return -1;
7212
7213       if (*modep == VOIDmode)
7214         *modep = mode;
7215
7216       if (*modep == mode)
7217         return 1;
7218
7219       break;
7220
7221     case COMPLEX_TYPE:
7222       mode = TYPE_MODE (TREE_TYPE (type));
7223       if (mode != DFmode && mode != SFmode && mode != TFmode)
7224         return -1;
7225
7226       if (*modep == VOIDmode)
7227         *modep = mode;
7228
7229       if (*modep == mode)
7230         return 2;
7231
7232       break;
7233
7234     case VECTOR_TYPE:
7235       /* Use V2SImode and V4SImode as representatives of all 64-bit
7236          and 128-bit vector types.  */
7237       size = int_size_in_bytes (type);
7238       switch (size)
7239         {
7240         case 8:
7241           mode = V2SImode;
7242           break;
7243         case 16:
7244           mode = V4SImode;
7245           break;
7246         default:
7247           return -1;
7248         }
7249
7250       if (*modep == VOIDmode)
7251         *modep = mode;
7252
7253       /* Vector modes are considered to be opaque: two vectors are
7254          equivalent for the purposes of being homogeneous aggregates
7255          if they are the same size.  */
7256       if (*modep == mode)
7257         return 1;
7258
7259       break;
7260
7261     case ARRAY_TYPE:
7262       {
7263         int count;
7264         tree index = TYPE_DOMAIN (type);
7265
7266         /* Can't handle incomplete types nor sizes that are not
7267            fixed.  */
7268         if (!COMPLETE_TYPE_P (type)
7269             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7270           return -1;
7271
7272         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7273         if (count == -1
7274             || !index
7275             || !TYPE_MAX_VALUE (index)
7276             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7277             || !TYPE_MIN_VALUE (index)
7278             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7279             || count < 0)
7280           return -1;
7281
7282         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7283                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7284
7285         /* There must be no padding.  */
7286         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7287           return -1;
7288
7289         return count;
7290       }
7291
7292     case RECORD_TYPE:
7293       {
7294         int count = 0;
7295         int sub_count;
7296         tree field;
7297
7298         /* Can't handle incomplete types nor sizes that are not
7299            fixed.  */
7300         if (!COMPLETE_TYPE_P (type)
7301             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7302           return -1;
7303
7304         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7305           {
7306             if (TREE_CODE (field) != FIELD_DECL)
7307               continue;
7308
7309             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7310             if (sub_count < 0)
7311               return -1;
7312             count += sub_count;
7313           }
7314
7315         /* There must be no padding.  */
7316         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7317           return -1;
7318
7319         return count;
7320       }
7321
7322     case UNION_TYPE:
7323     case QUAL_UNION_TYPE:
7324       {
7325         /* These aren't very interesting except in a degenerate case.  */
7326         int count = 0;
7327         int sub_count;
7328         tree field;
7329
7330         /* Can't handle incomplete types nor sizes that are not
7331            fixed.  */
7332         if (!COMPLETE_TYPE_P (type)
7333             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7334           return -1;
7335
7336         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7337           {
7338             if (TREE_CODE (field) != FIELD_DECL)
7339               continue;
7340
7341             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7342             if (sub_count < 0)
7343               return -1;
7344             count = count > sub_count ? count : sub_count;
7345           }
7346
7347         /* There must be no padding.  */
7348         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7349           return -1;
7350
7351         return count;
7352       }
7353
7354     default:
7355       break;
7356     }
7357
7358   return -1;
7359 }
7360
7361 /* Return true if we use LRA instead of reload pass.  */
7362 static bool
7363 aarch64_lra_p (void)
7364 {
7365   return aarch64_lra_flag;
7366 }
7367
7368 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7369    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7370    array types.  The C99 floating-point complex types are also considered
7371    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7372    types, which are GCC extensions and out of the scope of AAPCS64, are
7373    treated as composite types here as well.
7374
7375    Note that MODE itself is not sufficient in determining whether a type
7376    is such a composite type or not.  This is because
7377    stor-layout.c:compute_record_mode may have already changed the MODE
7378    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7379    structure with only one field may have its MODE set to the mode of the
7380    field.  Also an integer mode whose size matches the size of the
7381    RECORD_TYPE type may be used to substitute the original mode
7382    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7383    solely relied on.  */
7384
7385 static bool
7386 aarch64_composite_type_p (const_tree type,
7387                           enum machine_mode mode)
7388 {
7389   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7390     return true;
7391
7392   if (mode == BLKmode
7393       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7394       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7395     return true;
7396
7397   return false;
7398 }
7399
7400 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7401    type as described in AAPCS64 \S 4.1.2.
7402
7403    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7404
7405 static bool
7406 aarch64_short_vector_p (const_tree type,
7407                         enum machine_mode mode)
7408 {
7409   HOST_WIDE_INT size = -1;
7410
7411   if (type && TREE_CODE (type) == VECTOR_TYPE)
7412     size = int_size_in_bytes (type);
7413   else if (!aarch64_composite_type_p (type, mode)
7414            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7415                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7416     size = GET_MODE_SIZE (mode);
7417
7418   return (size == 8 || size == 16) ? true : false;
7419 }
7420
7421 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7422    shall be passed or returned in simd/fp register(s) (providing these
7423    parameter passing registers are available).
7424
7425    Upon successful return, *COUNT returns the number of needed registers,
7426    *BASE_MODE returns the mode of the individual register and when IS_HAF
7427    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7428    floating-point aggregate or a homogeneous short-vector aggregate.  */
7429
7430 static bool
7431 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7432                                          const_tree type,
7433                                          enum machine_mode *base_mode,
7434                                          int *count,
7435                                          bool *is_ha)
7436 {
7437   enum machine_mode new_mode = VOIDmode;
7438   bool composite_p = aarch64_composite_type_p (type, mode);
7439
7440   if (is_ha != NULL) *is_ha = false;
7441
7442   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7443       || aarch64_short_vector_p (type, mode))
7444     {
7445       *count = 1;
7446       new_mode = mode;
7447     }
7448   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7449     {
7450       if (is_ha != NULL) *is_ha = true;
7451       *count = 2;
7452       new_mode = GET_MODE_INNER (mode);
7453     }
7454   else if (type && composite_p)
7455     {
7456       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7457
7458       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7459         {
7460           if (is_ha != NULL) *is_ha = true;
7461           *count = ag_count;
7462         }
7463       else
7464         return false;
7465     }
7466   else
7467     return false;
7468
7469   *base_mode = new_mode;
7470   return true;
7471 }
7472
7473 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7474
7475 static rtx
7476 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7477                           int incoming ATTRIBUTE_UNUSED)
7478 {
7479   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7480 }
7481
7482 /* Implements target hook vector_mode_supported_p.  */
7483 static bool
7484 aarch64_vector_mode_supported_p (enum machine_mode mode)
7485 {
7486   if (TARGET_SIMD
7487       && (mode == V4SImode  || mode == V8HImode
7488           || mode == V16QImode || mode == V2DImode
7489           || mode == V2SImode  || mode == V4HImode
7490           || mode == V8QImode || mode == V2SFmode
7491           || mode == V4SFmode || mode == V2DFmode
7492           || mode == V1DFmode))
7493     return true;
7494
7495   return false;
7496 }
7497
7498 /* Return appropriate SIMD container
7499    for MODE within a vector of WIDTH bits.  */
7500 static enum machine_mode
7501 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7502 {
7503   gcc_assert (width == 64 || width == 128);
7504   if (TARGET_SIMD)
7505     {
7506       if (width == 128)
7507         switch (mode)
7508           {
7509           case DFmode:
7510             return V2DFmode;
7511           case SFmode:
7512             return V4SFmode;
7513           case SImode:
7514             return V4SImode;
7515           case HImode:
7516             return V8HImode;
7517           case QImode:
7518             return V16QImode;
7519           case DImode:
7520             return V2DImode;
7521           default:
7522             break;
7523           }
7524       else
7525         switch (mode)
7526           {
7527           case SFmode:
7528             return V2SFmode;
7529           case SImode:
7530             return V2SImode;
7531           case HImode:
7532             return V4HImode;
7533           case QImode:
7534             return V8QImode;
7535           default:
7536             break;
7537           }
7538     }
7539   return word_mode;
7540 }
7541
7542 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
7543 static enum machine_mode
7544 aarch64_preferred_simd_mode (enum machine_mode mode)
7545 {
7546   return aarch64_simd_container_mode (mode, 128);
7547 }
7548
7549 /* Return the bitmask of possible vector sizes for the vectorizer
7550    to iterate over.  */
7551 static unsigned int
7552 aarch64_autovectorize_vector_sizes (void)
7553 {
7554   return (16 | 8);
7555 }
7556
7557 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7558    vector types in order to conform to the AAPCS64 (see "Procedure
7559    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
7560    qualify for emission with the mangled names defined in that document,
7561    a vector type must not only be of the correct mode but also be
7562    composed of AdvSIMD vector element types (e.g.
7563    _builtin_aarch64_simd_qi); these types are registered by
7564    aarch64_init_simd_builtins ().  In other words, vector types defined
7565    in other ways e.g. via vector_size attribute will get default
7566    mangled names.  */
7567 typedef struct
7568 {
7569   enum machine_mode mode;
7570   const char *element_type_name;
7571   const char *mangled_name;
7572 } aarch64_simd_mangle_map_entry;
7573
7574 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7575   /* 64-bit containerized types.  */
7576   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
7577   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
7578   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
7579   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
7580   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
7581   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
7582   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
7583   { DImode,    "__builtin_aarch64_simd_di",     "11__Int64x1_t" },
7584   { DImode,    "__builtin_aarch64_simd_udi",    "12__Uint64x1_t" },
7585   { V1DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x1_t" },
7586   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
7587   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7588   /* 128-bit containerized types.  */
7589   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
7590   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
7591   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
7592   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
7593   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
7594   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
7595   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
7596   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
7597   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
7598   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
7599   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
7600   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7601   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7602   { VOIDmode, NULL, NULL }
7603 };
7604
7605 /* Implement TARGET_MANGLE_TYPE.  */
7606
7607 static const char *
7608 aarch64_mangle_type (const_tree type)
7609 {
7610   /* The AArch64 ABI documents say that "__va_list" has to be
7611      managled as if it is in the "std" namespace.  */
7612   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7613     return "St9__va_list";
7614
7615   /* Check the mode of the vector type, and the name of the vector
7616      element type, against the table.  */
7617   if (TREE_CODE (type) == VECTOR_TYPE)
7618     {
7619       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7620
7621       while (pos->mode != VOIDmode)
7622         {
7623           tree elt_type = TREE_TYPE (type);
7624
7625           if (pos->mode == TYPE_MODE (type)
7626               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7627               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7628                           pos->element_type_name))
7629             return pos->mangled_name;
7630
7631           pos++;
7632         }
7633     }
7634
7635   /* Use the default mangling.  */
7636   return NULL;
7637 }
7638
7639 static int
7640 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
7641 {
7642   return MEM_P (*x);
7643 }
7644
7645 static bool
7646 is_memory_op (rtx_insn *mem_insn)
7647 {
7648    rtx pattern = PATTERN (mem_insn);
7649    return for_each_rtx (&pattern, is_mem_p, NULL);
7650 }
7651
7652 /* Find the first rtx_insn before insn that will generate an assembly
7653    instruction.  */
7654
7655 static rtx_insn *
7656 aarch64_prev_real_insn (rtx_insn *insn)
7657 {
7658   if (!insn)
7659     return NULL;
7660
7661   do
7662     {
7663       insn = prev_real_insn (insn);
7664     }
7665   while (insn && recog_memoized (insn) < 0);
7666
7667   return insn;
7668 }
7669
7670 static bool
7671 is_madd_op (enum attr_type t1)
7672 {
7673   unsigned int i;
7674   /* A number of these may be AArch32 only.  */
7675   enum attr_type mlatypes[] = {
7676     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7677     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7678     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7679   };
7680
7681   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7682     {
7683       if (t1 == mlatypes[i])
7684         return true;
7685     }
7686
7687   return false;
7688 }
7689
7690 /* Check if there is a register dependency between a load and the insn
7691    for which we hold recog_data.  */
7692
7693 static bool
7694 dep_between_memop_and_curr (rtx memop)
7695 {
7696   rtx load_reg;
7697   int opno;
7698
7699   if (!memop)
7700     return false;
7701
7702   if (!REG_P (SET_DEST (memop)))
7703     return false;
7704
7705   load_reg = SET_DEST (memop);
7706   for (opno = 0; opno < recog_data.n_operands; opno++)
7707     {
7708       rtx operand = recog_data.operand[opno];
7709       if (REG_P (operand)
7710           && reg_overlap_mentioned_p (load_reg, operand))
7711         return true;
7712
7713     }
7714   return false;
7715 }
7716
7717 bool
7718 aarch64_madd_needs_nop (rtx_insn* insn)
7719 {
7720   enum attr_type attr_type;
7721   rtx_insn *prev;
7722   rtx body;
7723
7724   if (!aarch64_fix_a53_err835769)
7725     return false;
7726
7727   if (recog_memoized (insn) < 0)
7728     return false;
7729
7730   attr_type = get_attr_type (insn);
7731   if (!is_madd_op (attr_type))
7732     return false;
7733
7734   prev = aarch64_prev_real_insn (insn);
7735   if (!prev)
7736     return false;
7737
7738   body = single_set (prev);
7739
7740   /* If the previous insn is a memory op and there is no dependency between
7741      it and the madd, emit a nop between them.  If we know the previous insn is
7742      a memory op but body is NULL, emit the nop to be safe, it's probably a
7743      load/store pair insn.  */
7744   if (is_memory_op (prev)
7745       && GET_MODE (recog_data.operand[0]) == DImode
7746       && (!dep_between_memop_and_curr (body)))
7747     return true;
7748
7749   return false;
7750
7751 }
7752
7753 void
7754 aarch64_final_prescan_insn (rtx_insn *insn)
7755 {
7756   if (aarch64_madd_needs_nop (insn))
7757     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7758 }
7759
7760
7761 /* Return the equivalent letter for size.  */
7762 static char
7763 sizetochar (int size)
7764 {
7765   switch (size)
7766     {
7767     case 64: return 'd';
7768     case 32: return 's';
7769     case 16: return 'h';
7770     case 8 : return 'b';
7771     default: gcc_unreachable ();
7772     }
7773 }
7774
7775 /* Return true iff x is a uniform vector of floating-point
7776    constants, and the constant can be represented in
7777    quarter-precision form.  Note, as aarch64_float_const_representable
7778    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7779 static bool
7780 aarch64_vect_float_const_representable_p (rtx x)
7781 {
7782   int i = 0;
7783   REAL_VALUE_TYPE r0, ri;
7784   rtx x0, xi;
7785
7786   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7787     return false;
7788
7789   x0 = CONST_VECTOR_ELT (x, 0);
7790   if (!CONST_DOUBLE_P (x0))
7791     return false;
7792
7793   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7794
7795   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7796     {
7797       xi = CONST_VECTOR_ELT (x, i);
7798       if (!CONST_DOUBLE_P (xi))
7799         return false;
7800
7801       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7802       if (!REAL_VALUES_EQUAL (r0, ri))
7803         return false;
7804     }
7805
7806   return aarch64_float_const_representable_p (x0);
7807 }
7808
7809 /* Return true for valid and false for invalid.  */
7810 bool
7811 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7812                               struct simd_immediate_info *info)
7813 {
7814 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7815   matches = 1;                                          \
7816   for (i = 0; i < idx; i += (STRIDE))                   \
7817     if (!(TEST))                                        \
7818       matches = 0;                                      \
7819   if (matches)                                          \
7820     {                                                   \
7821       immtype = (CLASS);                                \
7822       elsize = (ELSIZE);                                \
7823       eshift = (SHIFT);                                 \
7824       emvn = (NEG);                                     \
7825       break;                                            \
7826     }
7827
7828   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7829   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7830   unsigned char bytes[16];
7831   int immtype = -1, matches;
7832   unsigned int invmask = inverse ? 0xff : 0;
7833   int eshift, emvn;
7834
7835   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7836     {
7837       if (! (aarch64_simd_imm_zero_p (op, mode)
7838              || aarch64_vect_float_const_representable_p (op)))
7839         return false;
7840
7841       if (info)
7842         {
7843           info->value = CONST_VECTOR_ELT (op, 0);
7844           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7845           info->mvn = false;
7846           info->shift = 0;
7847         }
7848
7849       return true;
7850     }
7851
7852   /* Splat vector constant out into a byte vector.  */
7853   for (i = 0; i < n_elts; i++)
7854     {
7855       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7856          it must be laid out in the vector register in reverse order.  */
7857       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7858       unsigned HOST_WIDE_INT elpart;
7859       unsigned int part, parts;
7860
7861       if (CONST_INT_P (el))
7862         {
7863           elpart = INTVAL (el);
7864           parts = 1;
7865         }
7866       else if (GET_CODE (el) == CONST_DOUBLE)
7867         {
7868           elpart = CONST_DOUBLE_LOW (el);
7869           parts = 2;
7870         }
7871       else
7872         gcc_unreachable ();
7873
7874       for (part = 0; part < parts; part++)
7875         {
7876           unsigned int byte;
7877           for (byte = 0; byte < innersize; byte++)
7878             {
7879               bytes[idx++] = (elpart & 0xff) ^ invmask;
7880               elpart >>= BITS_PER_UNIT;
7881             }
7882           if (GET_CODE (el) == CONST_DOUBLE)
7883             elpart = CONST_DOUBLE_HIGH (el);
7884         }
7885     }
7886
7887   /* Sanity check.  */
7888   gcc_assert (idx == GET_MODE_SIZE (mode));
7889
7890   do
7891     {
7892       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7893              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7894
7895       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7896              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7897
7898       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7899              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7900
7901       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7902              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7903
7904       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7905
7906       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7907
7908       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7909              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7910
7911       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7912              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7913
7914       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7915              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7916
7917       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7918              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7919
7920       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7921
7922       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7923
7924       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7925              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7926
7927       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7928              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7929
7930       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7931              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7932
7933       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7934              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7935
7936       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7937
7938       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7939              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7940     }
7941   while (0);
7942
7943   if (immtype == -1)
7944     return false;
7945
7946   if (info)
7947     {
7948       info->element_width = elsize;
7949       info->mvn = emvn != 0;
7950       info->shift = eshift;
7951
7952       unsigned HOST_WIDE_INT imm = 0;
7953
7954       if (immtype >= 12 && immtype <= 15)
7955         info->msl = true;
7956
7957       /* Un-invert bytes of recognized vector, if necessary.  */
7958       if (invmask != 0)
7959         for (i = 0; i < idx; i++)
7960           bytes[i] ^= invmask;
7961
7962       if (immtype == 17)
7963         {
7964           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7965           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7966
7967           for (i = 0; i < 8; i++)
7968             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7969               << (i * BITS_PER_UNIT);
7970
7971
7972           info->value = GEN_INT (imm);
7973         }
7974       else
7975         {
7976           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7977             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7978
7979           /* Construct 'abcdefgh' because the assembler cannot handle
7980              generic constants.  */
7981           if (info->mvn)
7982             imm = ~imm;
7983           imm = (imm >> info->shift) & 0xff;
7984           info->value = GEN_INT (imm);
7985         }
7986     }
7987
7988   return true;
7989 #undef CHECK
7990 }
7991
7992 /* Check of immediate shift constants are within range.  */
7993 bool
7994 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7995 {
7996   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7997   if (left)
7998     return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
7999   else
8000     return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8001 }
8002
8003 /* Return true if X is a uniform vector where all elements
8004    are either the floating-point constant 0.0 or the
8005    integer constant 0.  */
8006 bool
8007 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
8008 {
8009   return x == CONST0_RTX (mode);
8010 }
8011
8012 bool
8013 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
8014 {
8015   HOST_WIDE_INT imm = INTVAL (x);
8016   int i;
8017
8018   for (i = 0; i < 8; i++)
8019     {
8020       unsigned int byte = imm & 0xff;
8021       if (byte != 0xff && byte != 0)
8022        return false;
8023       imm >>= 8;
8024     }
8025
8026   return true;
8027 }
8028
8029 bool
8030 aarch64_mov_operand_p (rtx x,
8031                        enum aarch64_symbol_context context,
8032                        enum machine_mode mode)
8033 {
8034   if (GET_CODE (x) == HIGH
8035       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8036     return true;
8037
8038   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
8039     return true;
8040
8041   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8042     return true;
8043
8044   return aarch64_classify_symbolic_expression (x, context)
8045     == SYMBOL_TINY_ABSOLUTE;
8046 }
8047
8048 /* Return a const_int vector of VAL.  */
8049 rtx
8050 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
8051 {
8052   int nunits = GET_MODE_NUNITS (mode);
8053   rtvec v = rtvec_alloc (nunits);
8054   int i;
8055
8056   for (i=0; i < nunits; i++)
8057     RTVEC_ELT (v, i) = GEN_INT (val);
8058
8059   return gen_rtx_CONST_VECTOR (mode, v);
8060 }
8061
8062 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
8063
8064 bool
8065 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
8066 {
8067   enum machine_mode vmode;
8068
8069   gcc_assert (!VECTOR_MODE_P (mode));
8070   vmode = aarch64_preferred_simd_mode (mode);
8071   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8072   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8073 }
8074
8075 /* Construct and return a PARALLEL RTX vector with elements numbering the
8076    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8077    the vector - from the perspective of the architecture.  This does not
8078    line up with GCC's perspective on lane numbers, so we end up with
8079    different masks depending on our target endian-ness.  The diagram
8080    below may help.  We must draw the distinction when building masks
8081    which select one half of the vector.  An instruction selecting
8082    architectural low-lanes for a big-endian target, must be described using
8083    a mask selecting GCC high-lanes.
8084
8085                  Big-Endian             Little-Endian
8086
8087 GCC             0   1   2   3           3   2   1   0
8088               | x | x | x | x |       | x | x | x | x |
8089 Architecture    3   2   1   0           3   2   1   0
8090
8091 Low Mask:         { 2, 3 }                { 0, 1 }
8092 High Mask:        { 0, 1 }                { 2, 3 }
8093 */
8094
8095 rtx
8096 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
8097 {
8098   int nunits = GET_MODE_NUNITS (mode);
8099   rtvec v = rtvec_alloc (nunits / 2);
8100   int high_base = nunits / 2;
8101   int low_base = 0;
8102   int base;
8103   rtx t1;
8104   int i;
8105
8106   if (BYTES_BIG_ENDIAN)
8107     base = high ? low_base : high_base;
8108   else
8109     base = high ? high_base : low_base;
8110
8111   for (i = 0; i < nunits / 2; i++)
8112     RTVEC_ELT (v, i) = GEN_INT (base + i);
8113
8114   t1 = gen_rtx_PARALLEL (mode, v);
8115   return t1;
8116 }
8117
8118 /* Check OP for validity as a PARALLEL RTX vector with elements
8119    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8120    from the perspective of the architecture.  See the diagram above
8121    aarch64_simd_vect_par_cnst_half for more details.  */
8122
8123 bool
8124 aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
8125                                        bool high)
8126 {
8127   rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8128   HOST_WIDE_INT count_op = XVECLEN (op, 0);
8129   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8130   int i = 0;
8131
8132   if (!VECTOR_MODE_P (mode))
8133     return false;
8134
8135   if (count_op != count_ideal)
8136     return false;
8137
8138   for (i = 0; i < count_ideal; i++)
8139     {
8140       rtx elt_op = XVECEXP (op, 0, i);
8141       rtx elt_ideal = XVECEXP (ideal, 0, i);
8142
8143       if (!CONST_INT_P (elt_op)
8144           || INTVAL (elt_ideal) != INTVAL (elt_op))
8145         return false;
8146     }
8147   return true;
8148 }
8149
8150 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
8151    HIGH (exclusive).  */
8152 void
8153 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8154 {
8155   HOST_WIDE_INT lane;
8156   gcc_assert (CONST_INT_P (operand));
8157   lane = INTVAL (operand);
8158
8159   if (lane < low || lane >= high)
8160     error ("lane out of range");
8161 }
8162
8163 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8164    registers).  */
8165 void
8166 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
8167                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8168                             rtx op1)
8169 {
8170   rtx mem = gen_rtx_MEM (mode, destaddr);
8171   rtx tmp1 = gen_reg_rtx (mode);
8172   rtx tmp2 = gen_reg_rtx (mode);
8173
8174   emit_insn (intfn (tmp1, op1, tmp2));
8175
8176   emit_move_insn (mem, tmp1);
8177   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8178   emit_move_insn (mem, tmp2);
8179 }
8180
8181 /* Return TRUE if OP is a valid vector addressing mode.  */
8182 bool
8183 aarch64_simd_mem_operand_p (rtx op)
8184 {
8185   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8186                         || REG_P (XEXP (op, 0)));
8187 }
8188
8189 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8190    not to early-clobber SRC registers in the process.
8191
8192    We assume that the operands described by SRC and DEST represent a
8193    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
8194    number of components into which the copy has been decomposed.  */
8195 void
8196 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8197                                 rtx *src, unsigned int count)
8198 {
8199   unsigned int i;
8200
8201   if (!reg_overlap_mentioned_p (operands[0], operands[1])
8202       || REGNO (operands[0]) < REGNO (operands[1]))
8203     {
8204       for (i = 0; i < count; i++)
8205         {
8206           operands[2 * i] = dest[i];
8207           operands[2 * i + 1] = src[i];
8208         }
8209     }
8210   else
8211     {
8212       for (i = 0; i < count; i++)
8213         {
8214           operands[2 * i] = dest[count - i - 1];
8215           operands[2 * i + 1] = src[count - i - 1];
8216         }
8217     }
8218 }
8219
8220 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8221    one of VSTRUCT modes: OI, CI or XI.  */
8222 int
8223 aarch64_simd_attr_length_move (rtx_insn *insn)
8224 {
8225   enum machine_mode mode;
8226
8227   extract_insn_cached (insn);
8228
8229   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8230     {
8231       mode = GET_MODE (recog_data.operand[0]);
8232       switch (mode)
8233         {
8234         case OImode:
8235           return 8;
8236         case CImode:
8237           return 12;
8238         case XImode:
8239           return 16;
8240         default:
8241           gcc_unreachable ();
8242         }
8243     }
8244   return 4;
8245 }
8246
8247 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
8248    alignment of a vector to 128 bits.  */
8249 static HOST_WIDE_INT
8250 aarch64_simd_vector_alignment (const_tree type)
8251 {
8252   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8253   return MIN (align, 128);
8254 }
8255
8256 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
8257 static bool
8258 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8259 {
8260   if (is_packed)
8261     return false;
8262
8263   /* We guarantee alignment for vectors up to 128-bits.  */
8264   if (tree_int_cst_compare (TYPE_SIZE (type),
8265                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8266     return false;
8267
8268   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
8269   return true;
8270 }
8271
8272 /* If VALS is a vector constant that can be loaded into a register
8273    using DUP, generate instructions to do so and return an RTX to
8274    assign to the register.  Otherwise return NULL_RTX.  */
8275 static rtx
8276 aarch64_simd_dup_constant (rtx vals)
8277 {
8278   enum machine_mode mode = GET_MODE (vals);
8279   enum machine_mode inner_mode = GET_MODE_INNER (mode);
8280   int n_elts = GET_MODE_NUNITS (mode);
8281   bool all_same = true;
8282   rtx x;
8283   int i;
8284
8285   if (GET_CODE (vals) != CONST_VECTOR)
8286     return NULL_RTX;
8287
8288   for (i = 1; i < n_elts; ++i)
8289     {
8290       x = CONST_VECTOR_ELT (vals, i);
8291       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8292         all_same = false;
8293     }
8294
8295   if (!all_same)
8296     return NULL_RTX;
8297
8298   /* We can load this constant by using DUP and a constant in a
8299      single ARM register.  This will be cheaper than a vector
8300      load.  */
8301   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8302   return gen_rtx_VEC_DUPLICATE (mode, x);
8303 }
8304
8305
8306 /* Generate code to load VALS, which is a PARALLEL containing only
8307    constants (for vec_init) or CONST_VECTOR, efficiently into a
8308    register.  Returns an RTX to copy into the register, or NULL_RTX
8309    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
8310 static rtx
8311 aarch64_simd_make_constant (rtx vals)
8312 {
8313   enum machine_mode mode = GET_MODE (vals);
8314   rtx const_dup;
8315   rtx const_vec = NULL_RTX;
8316   int n_elts = GET_MODE_NUNITS (mode);
8317   int n_const = 0;
8318   int i;
8319
8320   if (GET_CODE (vals) == CONST_VECTOR)
8321     const_vec = vals;
8322   else if (GET_CODE (vals) == PARALLEL)
8323     {
8324       /* A CONST_VECTOR must contain only CONST_INTs and
8325          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8326          Only store valid constants in a CONST_VECTOR.  */
8327       for (i = 0; i < n_elts; ++i)
8328         {
8329           rtx x = XVECEXP (vals, 0, i);
8330           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8331             n_const++;
8332         }
8333       if (n_const == n_elts)
8334         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8335     }
8336   else
8337     gcc_unreachable ();
8338
8339   if (const_vec != NULL_RTX
8340       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8341     /* Load using MOVI/MVNI.  */
8342     return const_vec;
8343   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8344     /* Loaded using DUP.  */
8345     return const_dup;
8346   else if (const_vec != NULL_RTX)
8347     /* Load from constant pool. We can not take advantage of single-cycle
8348        LD1 because we need a PC-relative addressing mode.  */
8349     return const_vec;
8350   else
8351     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8352        We can not construct an initializer.  */
8353     return NULL_RTX;
8354 }
8355
8356 void
8357 aarch64_expand_vector_init (rtx target, rtx vals)
8358 {
8359   enum machine_mode mode = GET_MODE (target);
8360   enum machine_mode inner_mode = GET_MODE_INNER (mode);
8361   int n_elts = GET_MODE_NUNITS (mode);
8362   int n_var = 0, one_var = -1;
8363   bool all_same = true;
8364   rtx x, mem;
8365   int i;
8366
8367   x = XVECEXP (vals, 0, 0);
8368   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8369     n_var = 1, one_var = 0;
8370
8371   for (i = 1; i < n_elts; ++i)
8372     {
8373       x = XVECEXP (vals, 0, i);
8374       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8375         ++n_var, one_var = i;
8376
8377       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8378         all_same = false;
8379     }
8380
8381   if (n_var == 0)
8382     {
8383       rtx constant = aarch64_simd_make_constant (vals);
8384       if (constant != NULL_RTX)
8385         {
8386           emit_move_insn (target, constant);
8387           return;
8388         }
8389     }
8390
8391   /* Splat a single non-constant element if we can.  */
8392   if (all_same)
8393     {
8394       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8395       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8396       return;
8397     }
8398
8399   /* One field is non-constant.  Load constant then overwrite varying
8400      field.  This is more efficient than using the stack.  */
8401   if (n_var == 1)
8402     {
8403       rtx copy = copy_rtx (vals);
8404       rtx index = GEN_INT (one_var);
8405       enum insn_code icode;
8406
8407       /* Load constant part of vector, substitute neighboring value for
8408          varying element.  */
8409       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8410       aarch64_expand_vector_init (target, copy);
8411
8412       /* Insert variable.  */
8413       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8414       icode = optab_handler (vec_set_optab, mode);
8415       gcc_assert (icode != CODE_FOR_nothing);
8416       emit_insn (GEN_FCN (icode) (target, x, index));
8417       return;
8418     }
8419
8420   /* Construct the vector in memory one field at a time
8421      and load the whole vector.  */
8422   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8423   for (i = 0; i < n_elts; i++)
8424     emit_move_insn (adjust_address_nv (mem, inner_mode,
8425                                     i * GET_MODE_SIZE (inner_mode)),
8426                     XVECEXP (vals, 0, i));
8427   emit_move_insn (target, mem);
8428
8429 }
8430
8431 static unsigned HOST_WIDE_INT
8432 aarch64_shift_truncation_mask (enum machine_mode mode)
8433 {
8434   return
8435     (aarch64_vector_mode_supported_p (mode)
8436      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8437 }
8438
8439 #ifndef TLS_SECTION_ASM_FLAG
8440 #define TLS_SECTION_ASM_FLAG 'T'
8441 #endif
8442
8443 void
8444 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8445                                tree decl ATTRIBUTE_UNUSED)
8446 {
8447   char flagchars[10], *f = flagchars;
8448
8449   /* If we have already declared this section, we can use an
8450      abbreviated form to switch back to it -- unless this section is
8451      part of a COMDAT groups, in which case GAS requires the full
8452      declaration every time.  */
8453   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8454       && (flags & SECTION_DECLARED))
8455     {
8456       fprintf (asm_out_file, "\t.section\t%s\n", name);
8457       return;
8458     }
8459
8460   if (!(flags & SECTION_DEBUG))
8461     *f++ = 'a';
8462   if (flags & SECTION_WRITE)
8463     *f++ = 'w';
8464   if (flags & SECTION_CODE)
8465     *f++ = 'x';
8466   if (flags & SECTION_SMALL)
8467     *f++ = 's';
8468   if (flags & SECTION_MERGE)
8469     *f++ = 'M';
8470   if (flags & SECTION_STRINGS)
8471     *f++ = 'S';
8472   if (flags & SECTION_TLS)
8473     *f++ = TLS_SECTION_ASM_FLAG;
8474   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8475     *f++ = 'G';
8476   *f = '\0';
8477
8478   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8479
8480   if (!(flags & SECTION_NOTYPE))
8481     {
8482       const char *type;
8483       const char *format;
8484
8485       if (flags & SECTION_BSS)
8486         type = "nobits";
8487       else
8488         type = "progbits";
8489
8490 #ifdef TYPE_OPERAND_FMT
8491       format = "," TYPE_OPERAND_FMT;
8492 #else
8493       format = ",@%s";
8494 #endif
8495
8496       fprintf (asm_out_file, format, type);
8497
8498       if (flags & SECTION_ENTSIZE)
8499         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8500       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8501         {
8502           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8503             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8504           else
8505             fprintf (asm_out_file, ",%s,comdat",
8506                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8507         }
8508     }
8509
8510   putc ('\n', asm_out_file);
8511 }
8512
8513 /* Select a format to encode pointers in exception handling data.  */
8514 int
8515 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8516 {
8517    int type;
8518    switch (aarch64_cmodel)
8519      {
8520      case AARCH64_CMODEL_TINY:
8521      case AARCH64_CMODEL_TINY_PIC:
8522      case AARCH64_CMODEL_SMALL:
8523      case AARCH64_CMODEL_SMALL_PIC:
8524        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8525           for everything.  */
8526        type = DW_EH_PE_sdata4;
8527        break;
8528      default:
8529        /* No assumptions here.  8-byte relocs required.  */
8530        type = DW_EH_PE_sdata8;
8531        break;
8532      }
8533    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8534 }
8535
8536 /* Emit load exclusive.  */
8537
8538 static void
8539 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8540                              rtx mem, rtx model_rtx)
8541 {
8542   rtx (*gen) (rtx, rtx, rtx);
8543
8544   switch (mode)
8545     {
8546     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8547     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8548     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8549     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8550     default:
8551       gcc_unreachable ();
8552     }
8553
8554   emit_insn (gen (rval, mem, model_rtx));
8555 }
8556
8557 /* Emit store exclusive.  */
8558
8559 static void
8560 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8561                               rtx rval, rtx mem, rtx model_rtx)
8562 {
8563   rtx (*gen) (rtx, rtx, rtx, rtx);
8564
8565   switch (mode)
8566     {
8567     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8568     case HImode: gen = gen_aarch64_store_exclusivehi; break;
8569     case SImode: gen = gen_aarch64_store_exclusivesi; break;
8570     case DImode: gen = gen_aarch64_store_exclusivedi; break;
8571     default:
8572       gcc_unreachable ();
8573     }
8574
8575   emit_insn (gen (bval, rval, mem, model_rtx));
8576 }
8577
8578 /* Mark the previous jump instruction as unlikely.  */
8579
8580 static void
8581 aarch64_emit_unlikely_jump (rtx insn)
8582 {
8583   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8584
8585   insn = emit_jump_insn (insn);
8586   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8587 }
8588
8589 /* Expand a compare and swap pattern.  */
8590
8591 void
8592 aarch64_expand_compare_and_swap (rtx operands[])
8593 {
8594   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8595   enum machine_mode mode, cmp_mode;
8596   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8597
8598   bval = operands[0];
8599   rval = operands[1];
8600   mem = operands[2];
8601   oldval = operands[3];
8602   newval = operands[4];
8603   is_weak = operands[5];
8604   mod_s = operands[6];
8605   mod_f = operands[7];
8606   mode = GET_MODE (mem);
8607   cmp_mode = mode;
8608
8609   /* Normally the succ memory model must be stronger than fail, but in the
8610      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8611      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
8612
8613   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8614       && INTVAL (mod_s) == MEMMODEL_RELEASE)
8615     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8616
8617   switch (mode)
8618     {
8619     case QImode:
8620     case HImode:
8621       /* For short modes, we're going to perform the comparison in SImode,
8622          so do the zero-extension now.  */
8623       cmp_mode = SImode;
8624       rval = gen_reg_rtx (SImode);
8625       oldval = convert_modes (SImode, mode, oldval, true);
8626       /* Fall through.  */
8627
8628     case SImode:
8629     case DImode:
8630       /* Force the value into a register if needed.  */
8631       if (!aarch64_plus_operand (oldval, mode))
8632         oldval = force_reg (cmp_mode, oldval);
8633       break;
8634
8635     default:
8636       gcc_unreachable ();
8637     }
8638
8639   switch (mode)
8640     {
8641     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8642     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8643     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8644     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8645     default:
8646       gcc_unreachable ();
8647     }
8648
8649   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8650
8651   if (mode == QImode || mode == HImode)
8652     emit_move_insn (operands[1], gen_lowpart (mode, rval));
8653
8654   x = gen_rtx_REG (CCmode, CC_REGNUM);
8655   x = gen_rtx_EQ (SImode, x, const0_rtx);
8656   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8657 }
8658
8659 /* Split a compare and swap pattern.  */
8660
8661 void
8662 aarch64_split_compare_and_swap (rtx operands[])
8663 {
8664   rtx rval, mem, oldval, newval, scratch;
8665   enum machine_mode mode;
8666   bool is_weak;
8667   rtx_code_label *label1, *label2;
8668   rtx x, cond;
8669
8670   rval = operands[0];
8671   mem = operands[1];
8672   oldval = operands[2];
8673   newval = operands[3];
8674   is_weak = (operands[4] != const0_rtx);
8675   scratch = operands[7];
8676   mode = GET_MODE (mem);
8677
8678   label1 = NULL;
8679   if (!is_weak)
8680     {
8681       label1 = gen_label_rtx ();
8682       emit_label (label1);
8683     }
8684   label2 = gen_label_rtx ();
8685
8686   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8687
8688   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8689   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8690   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8691                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8692   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8693
8694   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8695
8696   if (!is_weak)
8697     {
8698       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8699       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8700                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8701       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8702     }
8703   else
8704     {
8705       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8706       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8707       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8708     }
8709
8710   emit_label (label2);
8711 }
8712
8713 /* Split an atomic operation.  */
8714
8715 void
8716 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8717                      rtx value, rtx model_rtx, rtx cond)
8718 {
8719   enum machine_mode mode = GET_MODE (mem);
8720   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8721   rtx_code_label *label;
8722   rtx x;
8723
8724   label = gen_label_rtx ();
8725   emit_label (label);
8726
8727   if (new_out)
8728     new_out = gen_lowpart (wmode, new_out);
8729   if (old_out)
8730     old_out = gen_lowpart (wmode, old_out);
8731   else
8732     old_out = new_out;
8733   value = simplify_gen_subreg (wmode, value, mode, 0);
8734
8735   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8736
8737   switch (code)
8738     {
8739     case SET:
8740       new_out = value;
8741       break;
8742
8743     case NOT:
8744       x = gen_rtx_AND (wmode, old_out, value);
8745       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8746       x = gen_rtx_NOT (wmode, new_out);
8747       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8748       break;
8749
8750     case MINUS:
8751       if (CONST_INT_P (value))
8752         {
8753           value = GEN_INT (-INTVAL (value));
8754           code = PLUS;
8755         }
8756       /* Fall through.  */
8757
8758     default:
8759       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8760       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8761       break;
8762     }
8763
8764   aarch64_emit_store_exclusive (mode, cond, mem,
8765                                 gen_lowpart (mode, new_out), model_rtx);
8766
8767   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8768   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8769                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8770   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8771 }
8772
8773 static void
8774 aarch64_print_extension (void)
8775 {
8776   const struct aarch64_option_extension *opt = NULL;
8777
8778   for (opt = all_extensions; opt->name != NULL; opt++)
8779     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8780       asm_fprintf (asm_out_file, "+%s", opt->name);
8781
8782   asm_fprintf (asm_out_file, "\n");
8783 }
8784
8785 static void
8786 aarch64_start_file (void)
8787 {
8788   if (selected_arch)
8789     {
8790       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8791       aarch64_print_extension ();
8792     }
8793   else if (selected_cpu)
8794     {
8795       const char *truncated_name
8796             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8797       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8798       aarch64_print_extension ();
8799     }
8800   default_file_start();
8801 }
8802
8803 /* Target hook for c_mode_for_suffix.  */
8804 static enum machine_mode
8805 aarch64_c_mode_for_suffix (char suffix)
8806 {
8807   if (suffix == 'q')
8808     return TFmode;
8809
8810   return VOIDmode;
8811 }
8812
8813 /* We can only represent floating point constants which will fit in
8814    "quarter-precision" values.  These values are characterised by
8815    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8816    by:
8817
8818    (-1)^s * (n/16) * 2^r
8819
8820    Where:
8821      's' is the sign bit.
8822      'n' is an integer in the range 16 <= n <= 31.
8823      'r' is an integer in the range -3 <= r <= 4.  */
8824
8825 /* Return true iff X can be represented by a quarter-precision
8826    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8827 bool
8828 aarch64_float_const_representable_p (rtx x)
8829 {
8830   /* This represents our current view of how many bits
8831      make up the mantissa.  */
8832   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8833   int exponent;
8834   unsigned HOST_WIDE_INT mantissa, mask;
8835   REAL_VALUE_TYPE r, m;
8836   bool fail;
8837
8838   if (!CONST_DOUBLE_P (x))
8839     return false;
8840
8841   if (GET_MODE (x) == VOIDmode)
8842     return false;
8843
8844   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8845
8846   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8847      know if we have +zero until we analyse the mantissa, but we
8848      can reject the other invalid values.  */
8849   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8850       || REAL_VALUE_MINUS_ZERO (r))
8851     return false;
8852
8853   /* Extract exponent.  */
8854   r = real_value_abs (&r);
8855   exponent = REAL_EXP (&r);
8856
8857   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8858      highest (sign) bit, with a fixed binary point at bit point_pos.
8859      m1 holds the low part of the mantissa, m2 the high part.
8860      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8861      bits for the mantissa, this can fail (low bits will be lost).  */
8862   real_ldexp (&m, &r, point_pos - exponent);
8863   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8864
8865   /* If the low part of the mantissa has bits set we cannot represent
8866      the value.  */
8867   if (w.elt (0) != 0)
8868     return false;
8869   /* We have rejected the lower HOST_WIDE_INT, so update our
8870      understanding of how many bits lie in the mantissa and
8871      look only at the high HOST_WIDE_INT.  */
8872   mantissa = w.elt (1);
8873   point_pos -= HOST_BITS_PER_WIDE_INT;
8874
8875   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8876   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8877   if ((mantissa & mask) != 0)
8878     return false;
8879
8880   /* Having filtered unrepresentable values, we may now remove all
8881      but the highest 5 bits.  */
8882   mantissa >>= point_pos - 5;
8883
8884   /* We cannot represent the value 0.0, so reject it.  This is handled
8885      elsewhere.  */
8886   if (mantissa == 0)
8887     return false;
8888
8889   /* Then, as bit 4 is always set, we can mask it off, leaving
8890      the mantissa in the range [0, 15].  */
8891   mantissa &= ~(1 << 4);
8892   gcc_assert (mantissa <= 15);
8893
8894   /* GCC internally does not use IEEE754-like encoding (where normalized
8895      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8896      Our mantissa values are shifted 4 places to the left relative to
8897      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8898      by 5 places to correct for GCC's representation.  */
8899   exponent = 5 - exponent;
8900
8901   return (exponent >= 0 && exponent <= 7);
8902 }
8903
8904 char*
8905 aarch64_output_simd_mov_immediate (rtx const_vector,
8906                                    enum machine_mode mode,
8907                                    unsigned width)
8908 {
8909   bool is_valid;
8910   static char templ[40];
8911   const char *mnemonic;
8912   const char *shift_op;
8913   unsigned int lane_count = 0;
8914   char element_char;
8915
8916   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8917
8918   /* This will return true to show const_vector is legal for use as either
8919      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8920      also update INFO to show how the immediate should be generated.  */
8921   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8922   gcc_assert (is_valid);
8923
8924   element_char = sizetochar (info.element_width);
8925   lane_count = width / info.element_width;
8926
8927   mode = GET_MODE_INNER (mode);
8928   if (mode == SFmode || mode == DFmode)
8929     {
8930       gcc_assert (info.shift == 0 && ! info.mvn);
8931       if (aarch64_float_const_zero_rtx_p (info.value))
8932         info.value = GEN_INT (0);
8933       else
8934         {
8935 #define buf_size 20
8936           REAL_VALUE_TYPE r;
8937           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8938           char float_buf[buf_size] = {'\0'};
8939           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8940 #undef buf_size
8941
8942           if (lane_count == 1)
8943             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8944           else
8945             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8946                       lane_count, element_char, float_buf);
8947           return templ;
8948         }
8949     }
8950
8951   mnemonic = info.mvn ? "mvni" : "movi";
8952   shift_op = info.msl ? "msl" : "lsl";
8953
8954   if (lane_count == 1)
8955     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8956               mnemonic, UINTVAL (info.value));
8957   else if (info.shift)
8958     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8959               ", %s %d", mnemonic, lane_count, element_char,
8960               UINTVAL (info.value), shift_op, info.shift);
8961   else
8962     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8963               mnemonic, lane_count, element_char, UINTVAL (info.value));
8964   return templ;
8965 }
8966
8967 char*
8968 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8969                                           enum machine_mode mode)
8970 {
8971   enum machine_mode vmode;
8972
8973   gcc_assert (!VECTOR_MODE_P (mode));
8974   vmode = aarch64_simd_container_mode (mode, 64);
8975   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8976   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8977 }
8978
8979 /* Split operands into moves from op[1] + op[2] into op[0].  */
8980
8981 void
8982 aarch64_split_combinev16qi (rtx operands[3])
8983 {
8984   unsigned int dest = REGNO (operands[0]);
8985   unsigned int src1 = REGNO (operands[1]);
8986   unsigned int src2 = REGNO (operands[2]);
8987   enum machine_mode halfmode = GET_MODE (operands[1]);
8988   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8989   rtx destlo, desthi;
8990
8991   gcc_assert (halfmode == V16QImode);
8992
8993   if (src1 == dest && src2 == dest + halfregs)
8994     {
8995       /* No-op move.  Can't split to nothing; emit something.  */
8996       emit_note (NOTE_INSN_DELETED);
8997       return;
8998     }
8999
9000   /* Preserve register attributes for variable tracking.  */
9001   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9002   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9003                                GET_MODE_SIZE (halfmode));
9004
9005   /* Special case of reversed high/low parts.  */
9006   if (reg_overlap_mentioned_p (operands[2], destlo)
9007       && reg_overlap_mentioned_p (operands[1], desthi))
9008     {
9009       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9010       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9011       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9012     }
9013   else if (!reg_overlap_mentioned_p (operands[2], destlo))
9014     {
9015       /* Try to avoid unnecessary moves if part of the result
9016          is in the right place already.  */
9017       if (src1 != dest)
9018         emit_move_insn (destlo, operands[1]);
9019       if (src2 != dest + halfregs)
9020         emit_move_insn (desthi, operands[2]);
9021     }
9022   else
9023     {
9024       if (src2 != dest + halfregs)
9025         emit_move_insn (desthi, operands[2]);
9026       if (src1 != dest)
9027         emit_move_insn (destlo, operands[1]);
9028     }
9029 }
9030
9031 /* vec_perm support.  */
9032
9033 #define MAX_VECT_LEN 16
9034
9035 struct expand_vec_perm_d
9036 {
9037   rtx target, op0, op1;
9038   unsigned char perm[MAX_VECT_LEN];
9039   enum machine_mode vmode;
9040   unsigned char nelt;
9041   bool one_vector_p;
9042   bool testing_p;
9043 };
9044
9045 /* Generate a variable permutation.  */
9046
9047 static void
9048 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9049 {
9050   enum machine_mode vmode = GET_MODE (target);
9051   bool one_vector_p = rtx_equal_p (op0, op1);
9052
9053   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9054   gcc_checking_assert (GET_MODE (op0) == vmode);
9055   gcc_checking_assert (GET_MODE (op1) == vmode);
9056   gcc_checking_assert (GET_MODE (sel) == vmode);
9057   gcc_checking_assert (TARGET_SIMD);
9058
9059   if (one_vector_p)
9060     {
9061       if (vmode == V8QImode)
9062         {
9063           /* Expand the argument to a V16QI mode by duplicating it.  */
9064           rtx pair = gen_reg_rtx (V16QImode);
9065           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9066           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9067         }
9068       else
9069         {
9070           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9071         }
9072     }
9073   else
9074     {
9075       rtx pair;
9076
9077       if (vmode == V8QImode)
9078         {
9079           pair = gen_reg_rtx (V16QImode);
9080           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9081           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9082         }
9083       else
9084         {
9085           pair = gen_reg_rtx (OImode);
9086           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9087           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9088         }
9089     }
9090 }
9091
9092 void
9093 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9094 {
9095   enum machine_mode vmode = GET_MODE (target);
9096   unsigned int nelt = GET_MODE_NUNITS (vmode);
9097   bool one_vector_p = rtx_equal_p (op0, op1);
9098   rtx mask;
9099
9100   /* The TBL instruction does not use a modulo index, so we must take care
9101      of that ourselves.  */
9102   mask = aarch64_simd_gen_const_vector_dup (vmode,
9103       one_vector_p ? nelt - 1 : 2 * nelt - 1);
9104   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9105
9106   /* For big-endian, we also need to reverse the index within the vector
9107      (but not which vector).  */
9108   if (BYTES_BIG_ENDIAN)
9109     {
9110       /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
9111       if (!one_vector_p)
9112         mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9113       sel = expand_simple_binop (vmode, XOR, sel, mask,
9114                                  NULL, 0, OPTAB_LIB_WIDEN);
9115     }
9116   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9117 }
9118
9119 /* Recognize patterns suitable for the TRN instructions.  */
9120 static bool
9121 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9122 {
9123   unsigned int i, odd, mask, nelt = d->nelt;
9124   rtx out, in0, in1, x;
9125   rtx (*gen) (rtx, rtx, rtx);
9126   enum machine_mode vmode = d->vmode;
9127
9128   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9129     return false;
9130
9131   /* Note that these are little-endian tests.
9132      We correct for big-endian later.  */
9133   if (d->perm[0] == 0)
9134     odd = 0;
9135   else if (d->perm[0] == 1)
9136     odd = 1;
9137   else
9138     return false;
9139   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9140
9141   for (i = 0; i < nelt; i += 2)
9142     {
9143       if (d->perm[i] != i + odd)
9144         return false;
9145       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9146         return false;
9147     }
9148
9149   /* Success!  */
9150   if (d->testing_p)
9151     return true;
9152
9153   in0 = d->op0;
9154   in1 = d->op1;
9155   if (BYTES_BIG_ENDIAN)
9156     {
9157       x = in0, in0 = in1, in1 = x;
9158       odd = !odd;
9159     }
9160   out = d->target;
9161
9162   if (odd)
9163     {
9164       switch (vmode)
9165         {
9166         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9167         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9168         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9169         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9170         case V4SImode: gen = gen_aarch64_trn2v4si; break;
9171         case V2SImode: gen = gen_aarch64_trn2v2si; break;
9172         case V2DImode: gen = gen_aarch64_trn2v2di; break;
9173         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9174         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9175         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9176         default:
9177           return false;
9178         }
9179     }
9180   else
9181     {
9182       switch (vmode)
9183         {
9184         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9185         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9186         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9187         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9188         case V4SImode: gen = gen_aarch64_trn1v4si; break;
9189         case V2SImode: gen = gen_aarch64_trn1v2si; break;
9190         case V2DImode: gen = gen_aarch64_trn1v2di; break;
9191         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9192         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9193         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9194         default:
9195           return false;
9196         }
9197     }
9198
9199   emit_insn (gen (out, in0, in1));
9200   return true;
9201 }
9202
9203 /* Recognize patterns suitable for the UZP instructions.  */
9204 static bool
9205 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9206 {
9207   unsigned int i, odd, mask, nelt = d->nelt;
9208   rtx out, in0, in1, x;
9209   rtx (*gen) (rtx, rtx, rtx);
9210   enum machine_mode vmode = d->vmode;
9211
9212   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9213     return false;
9214
9215   /* Note that these are little-endian tests.
9216      We correct for big-endian later.  */
9217   if (d->perm[0] == 0)
9218     odd = 0;
9219   else if (d->perm[0] == 1)
9220     odd = 1;
9221   else
9222     return false;
9223   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9224
9225   for (i = 0; i < nelt; i++)
9226     {
9227       unsigned elt = (i * 2 + odd) & mask;
9228       if (d->perm[i] != elt)
9229         return false;
9230     }
9231
9232   /* Success!  */
9233   if (d->testing_p)
9234     return true;
9235
9236   in0 = d->op0;
9237   in1 = d->op1;
9238   if (BYTES_BIG_ENDIAN)
9239     {
9240       x = in0, in0 = in1, in1 = x;
9241       odd = !odd;
9242     }
9243   out = d->target;
9244
9245   if (odd)
9246     {
9247       switch (vmode)
9248         {
9249         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9250         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9251         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9252         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9253         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9254         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9255         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9256         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9257         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9258         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9259         default:
9260           return false;
9261         }
9262     }
9263   else
9264     {
9265       switch (vmode)
9266         {
9267         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9268         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9269         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9270         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9271         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9272         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9273         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9274         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9275         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9276         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9277         default:
9278           return false;
9279         }
9280     }
9281
9282   emit_insn (gen (out, in0, in1));
9283   return true;
9284 }
9285
9286 /* Recognize patterns suitable for the ZIP instructions.  */
9287 static bool
9288 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9289 {
9290   unsigned int i, high, mask, nelt = d->nelt;
9291   rtx out, in0, in1, x;
9292   rtx (*gen) (rtx, rtx, rtx);
9293   enum machine_mode vmode = d->vmode;
9294
9295   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9296     return false;
9297
9298   /* Note that these are little-endian tests.
9299      We correct for big-endian later.  */
9300   high = nelt / 2;
9301   if (d->perm[0] == high)
9302     /* Do Nothing.  */
9303     ;
9304   else if (d->perm[0] == 0)
9305     high = 0;
9306   else
9307     return false;
9308   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9309
9310   for (i = 0; i < nelt / 2; i++)
9311     {
9312       unsigned elt = (i + high) & mask;
9313       if (d->perm[i * 2] != elt)
9314         return false;
9315       elt = (elt + nelt) & mask;
9316       if (d->perm[i * 2 + 1] != elt)
9317         return false;
9318     }
9319
9320   /* Success!  */
9321   if (d->testing_p)
9322     return true;
9323
9324   in0 = d->op0;
9325   in1 = d->op1;
9326   if (BYTES_BIG_ENDIAN)
9327     {
9328       x = in0, in0 = in1, in1 = x;
9329       high = !high;
9330     }
9331   out = d->target;
9332
9333   if (high)
9334     {
9335       switch (vmode)
9336         {
9337         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9338         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9339         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9340         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9341         case V4SImode: gen = gen_aarch64_zip2v4si; break;
9342         case V2SImode: gen = gen_aarch64_zip2v2si; break;
9343         case V2DImode: gen = gen_aarch64_zip2v2di; break;
9344         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9345         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9346         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9347         default:
9348           return false;
9349         }
9350     }
9351   else
9352     {
9353       switch (vmode)
9354         {
9355         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9356         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9357         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9358         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9359         case V4SImode: gen = gen_aarch64_zip1v4si; break;
9360         case V2SImode: gen = gen_aarch64_zip1v2si; break;
9361         case V2DImode: gen = gen_aarch64_zip1v2di; break;
9362         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9363         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9364         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9365         default:
9366           return false;
9367         }
9368     }
9369
9370   emit_insn (gen (out, in0, in1));
9371   return true;
9372 }
9373
9374 /* Recognize patterns for the EXT insn.  */
9375
9376 static bool
9377 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9378 {
9379   unsigned int i, nelt = d->nelt;
9380   rtx (*gen) (rtx, rtx, rtx, rtx);
9381   rtx offset;
9382
9383   unsigned int location = d->perm[0]; /* Always < nelt.  */
9384
9385   /* Check if the extracted indices are increasing by one.  */
9386   for (i = 1; i < nelt; i++)
9387     {
9388       unsigned int required = location + i;
9389       if (d->one_vector_p)
9390         {
9391           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9392           required &= (nelt - 1);
9393         }
9394       if (d->perm[i] != required)
9395         return false;
9396     }
9397
9398   switch (d->vmode)
9399     {
9400     case V16QImode: gen = gen_aarch64_extv16qi; break;
9401     case V8QImode: gen = gen_aarch64_extv8qi; break;
9402     case V4HImode: gen = gen_aarch64_extv4hi; break;
9403     case V8HImode: gen = gen_aarch64_extv8hi; break;
9404     case V2SImode: gen = gen_aarch64_extv2si; break;
9405     case V4SImode: gen = gen_aarch64_extv4si; break;
9406     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9407     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9408     case V2DImode: gen = gen_aarch64_extv2di; break;
9409     case V2DFmode: gen = gen_aarch64_extv2df; break;
9410     default:
9411       return false;
9412     }
9413
9414   /* Success! */
9415   if (d->testing_p)
9416     return true;
9417
9418   /* The case where (location == 0) is a no-op for both big- and little-endian,
9419      and is removed by the mid-end at optimization levels -O1 and higher.  */
9420
9421   if (BYTES_BIG_ENDIAN && (location != 0))
9422     {
9423       /* After setup, we want the high elements of the first vector (stored
9424          at the LSB end of the register), and the low elements of the second
9425          vector (stored at the MSB end of the register). So swap.  */
9426       rtx temp = d->op0;
9427       d->op0 = d->op1;
9428       d->op1 = temp;
9429       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9430       location = nelt - location;
9431     }
9432
9433   offset = GEN_INT (location);
9434   emit_insn (gen (d->target, d->op0, d->op1, offset));
9435   return true;
9436 }
9437
9438 /* Recognize patterns for the REV insns.  */
9439
9440 static bool
9441 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9442 {
9443   unsigned int i, j, diff, nelt = d->nelt;
9444   rtx (*gen) (rtx, rtx);
9445
9446   if (!d->one_vector_p)
9447     return false;
9448
9449   diff = d->perm[0];
9450   switch (diff)
9451     {
9452     case 7:
9453       switch (d->vmode)
9454         {
9455         case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9456         case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
9457         default:
9458           return false;
9459         }
9460       break;
9461     case 3:
9462       switch (d->vmode)
9463         {
9464         case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9465         case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
9466         case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
9467         case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
9468         default:
9469           return false;
9470         }
9471       break;
9472     case 1:
9473       switch (d->vmode)
9474         {
9475         case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9476         case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
9477         case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
9478         case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
9479         case V4SImode: gen = gen_aarch64_rev64v4si;  break;
9480         case V2SImode: gen = gen_aarch64_rev64v2si;  break;
9481         case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
9482         case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
9483         default:
9484           return false;
9485         }
9486       break;
9487     default:
9488       return false;
9489     }
9490
9491   for (i = 0; i < nelt ; i += diff + 1)
9492     for (j = 0; j <= diff; j += 1)
9493       {
9494         /* This is guaranteed to be true as the value of diff
9495            is 7, 3, 1 and we should have enough elements in the
9496            queue to generate this.  Getting a vector mask with a
9497            value of diff other than these values implies that
9498            something is wrong by the time we get here.  */
9499         gcc_assert (i + j < nelt);
9500         if (d->perm[i + j] != i + diff - j)
9501           return false;
9502       }
9503
9504   /* Success! */
9505   if (d->testing_p)
9506     return true;
9507
9508   emit_insn (gen (d->target, d->op0));
9509   return true;
9510 }
9511
9512 static bool
9513 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9514 {
9515   rtx (*gen) (rtx, rtx, rtx);
9516   rtx out = d->target;
9517   rtx in0;
9518   enum machine_mode vmode = d->vmode;
9519   unsigned int i, elt, nelt = d->nelt;
9520   rtx lane;
9521
9522   elt = d->perm[0];
9523   for (i = 1; i < nelt; i++)
9524     {
9525       if (elt != d->perm[i])
9526         return false;
9527     }
9528
9529   /* The generic preparation in aarch64_expand_vec_perm_const_1
9530      swaps the operand order and the permute indices if it finds
9531      d->perm[0] to be in the second operand.  Thus, we can always
9532      use d->op0 and need not do any extra arithmetic to get the
9533      correct lane number.  */
9534   in0 = d->op0;
9535   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
9536
9537   switch (vmode)
9538     {
9539     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9540     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9541     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9542     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9543     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9544     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9545     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9546     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9547     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9548     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9549     default:
9550       return false;
9551     }
9552
9553   emit_insn (gen (out, in0, lane));
9554   return true;
9555 }
9556
9557 static bool
9558 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9559 {
9560   rtx rperm[MAX_VECT_LEN], sel;
9561   enum machine_mode vmode = d->vmode;
9562   unsigned int i, nelt = d->nelt;
9563
9564   if (d->testing_p)
9565     return true;
9566
9567   /* Generic code will try constant permutation twice.  Once with the
9568      original mode and again with the elements lowered to QImode.
9569      So wait and don't do the selector expansion ourselves.  */
9570   if (vmode != V8QImode && vmode != V16QImode)
9571     return false;
9572
9573   for (i = 0; i < nelt; ++i)
9574     {
9575       int nunits = GET_MODE_NUNITS (vmode);
9576
9577       /* If big-endian and two vectors we end up with a weird mixed-endian
9578          mode on NEON.  Reverse the index within each word but not the word
9579          itself.  */
9580       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9581                                            : d->perm[i]);
9582     }
9583   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9584   sel = force_reg (vmode, sel);
9585
9586   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9587   return true;
9588 }
9589
9590 static bool
9591 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9592 {
9593   /* The pattern matching functions above are written to look for a small
9594      number to begin the sequence (0, 1, N/2).  If we begin with an index
9595      from the second operand, we can swap the operands.  */
9596   if (d->perm[0] >= d->nelt)
9597     {
9598       unsigned i, nelt = d->nelt;
9599       rtx x;
9600
9601       gcc_assert (nelt == (nelt & -nelt));
9602       for (i = 0; i < nelt; ++i)
9603         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
9604
9605       x = d->op0;
9606       d->op0 = d->op1;
9607       d->op1 = x;
9608     }
9609
9610   if (TARGET_SIMD)
9611     {
9612       if (aarch64_evpc_rev (d))
9613         return true;
9614       else if (aarch64_evpc_ext (d))
9615         return true;
9616       else if (aarch64_evpc_dup (d))
9617         return true;
9618       else if (aarch64_evpc_zip (d))
9619         return true;
9620       else if (aarch64_evpc_uzp (d))
9621         return true;
9622       else if (aarch64_evpc_trn (d))
9623         return true;
9624       return aarch64_evpc_tbl (d);
9625     }
9626   return false;
9627 }
9628
9629 /* Expand a vec_perm_const pattern.  */
9630
9631 bool
9632 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9633 {
9634   struct expand_vec_perm_d d;
9635   int i, nelt, which;
9636
9637   d.target = target;
9638   d.op0 = op0;
9639   d.op1 = op1;
9640
9641   d.vmode = GET_MODE (target);
9642   gcc_assert (VECTOR_MODE_P (d.vmode));
9643   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9644   d.testing_p = false;
9645
9646   for (i = which = 0; i < nelt; ++i)
9647     {
9648       rtx e = XVECEXP (sel, 0, i);
9649       int ei = INTVAL (e) & (2 * nelt - 1);
9650       which |= (ei < nelt ? 1 : 2);
9651       d.perm[i] = ei;
9652     }
9653
9654   switch (which)
9655     {
9656     default:
9657       gcc_unreachable ();
9658
9659     case 3:
9660       d.one_vector_p = false;
9661       if (!rtx_equal_p (op0, op1))
9662         break;
9663
9664       /* The elements of PERM do not suggest that only the first operand
9665          is used, but both operands are identical.  Allow easier matching
9666          of the permutation by folding the permutation into the single
9667          input vector.  */
9668       /* Fall Through.  */
9669     case 2:
9670       for (i = 0; i < nelt; ++i)
9671         d.perm[i] &= nelt - 1;
9672       d.op0 = op1;
9673       d.one_vector_p = true;
9674       break;
9675
9676     case 1:
9677       d.op1 = op0;
9678       d.one_vector_p = true;
9679       break;
9680     }
9681
9682   return aarch64_expand_vec_perm_const_1 (&d);
9683 }
9684
9685 static bool
9686 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9687                                      const unsigned char *sel)
9688 {
9689   struct expand_vec_perm_d d;
9690   unsigned int i, nelt, which;
9691   bool ret;
9692
9693   d.vmode = vmode;
9694   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9695   d.testing_p = true;
9696   memcpy (d.perm, sel, nelt);
9697
9698   /* Calculate whether all elements are in one vector.  */
9699   for (i = which = 0; i < nelt; ++i)
9700     {
9701       unsigned char e = d.perm[i];
9702       gcc_assert (e < 2 * nelt);
9703       which |= (e < nelt ? 1 : 2);
9704     }
9705
9706   /* If all elements are from the second vector, reindex as if from the
9707      first vector.  */
9708   if (which == 2)
9709     for (i = 0; i < nelt; ++i)
9710       d.perm[i] -= nelt;
9711
9712   /* Check whether the mask can be applied to a single vector.  */
9713   d.one_vector_p = (which != 3);
9714
9715   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9716   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9717   if (!d.one_vector_p)
9718     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9719
9720   start_sequence ();
9721   ret = aarch64_expand_vec_perm_const_1 (&d);
9722   end_sequence ();
9723
9724   return ret;
9725 }
9726
9727 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
9728 bool
9729 aarch64_cannot_change_mode_class (enum machine_mode from,
9730                                   enum machine_mode to,
9731                                   enum reg_class rclass)
9732 {
9733   /* Full-reg subregs are allowed on general regs or any class if they are
9734      the same size.  */
9735   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9736       || !reg_classes_intersect_p (FP_REGS, rclass))
9737     return false;
9738
9739   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
9740      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9741      2. Scalar to Scalar for integer modes or same size float modes.
9742      3. Vector to Vector modes.
9743      4. On little-endian only, Vector-Structure to Vector modes.  */
9744   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9745     {
9746       if (aarch64_vector_mode_supported_p (from)
9747           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9748         return false;
9749
9750       if (GET_MODE_NUNITS (from) == 1
9751           && GET_MODE_NUNITS (to) == 1
9752           && (GET_MODE_CLASS (from) == MODE_INT
9753               || from == to))
9754         return false;
9755
9756       if (aarch64_vector_mode_supported_p (from)
9757           && aarch64_vector_mode_supported_p (to))
9758         return false;
9759
9760       /* Within an vector structure straddling multiple vector registers
9761          we are in a mixed-endian representation.  As such, we can't
9762          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
9763          switch between vectors and vector structures cheaply.  */
9764       if (!BYTES_BIG_ENDIAN)
9765         if ((aarch64_vector_mode_supported_p (from)
9766               && aarch64_vect_struct_mode_p (to))
9767             || (aarch64_vector_mode_supported_p (to)
9768               && aarch64_vect_struct_mode_p (from)))
9769           return false;
9770     }
9771
9772   return true;
9773 }
9774
9775 /* Implement MODES_TIEABLE_P.  */
9776
9777 bool
9778 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9779 {
9780   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9781     return true;
9782
9783   /* We specifically want to allow elements of "structure" modes to
9784      be tieable to the structure.  This more general condition allows
9785      other rarer situations too.  */
9786   if (TARGET_SIMD
9787       && aarch64_vector_mode_p (mode1)
9788       && aarch64_vector_mode_p (mode2))
9789     return true;
9790
9791   return false;
9792 }
9793
9794 /* Return a new RTX holding the result of moving POINTER forward by
9795    AMOUNT bytes.  */
9796
9797 static rtx
9798 aarch64_move_pointer (rtx pointer, int amount)
9799 {
9800   rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9801
9802   return adjust_automodify_address (pointer, GET_MODE (pointer),
9803                                     next, amount);
9804 }
9805
9806 /* Return a new RTX holding the result of moving POINTER forward by the
9807    size of the mode it points to.  */
9808
9809 static rtx
9810 aarch64_progress_pointer (rtx pointer)
9811 {
9812   HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9813
9814   return aarch64_move_pointer (pointer, amount);
9815 }
9816
9817 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9818    MODE bytes.  */
9819
9820 static void
9821 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9822                                               enum machine_mode mode)
9823 {
9824   rtx reg = gen_reg_rtx (mode);
9825
9826   /* "Cast" the pointers to the correct mode.  */
9827   *src = adjust_address (*src, mode, 0);
9828   *dst = adjust_address (*dst, mode, 0);
9829   /* Emit the memcpy.  */
9830   emit_move_insn (reg, *src);
9831   emit_move_insn (*dst, reg);
9832   /* Move the pointers forward.  */
9833   *src = aarch64_progress_pointer (*src);
9834   *dst = aarch64_progress_pointer (*dst);
9835 }
9836
9837 /* Expand movmem, as if from a __builtin_memcpy.  Return true if
9838    we succeed, otherwise return false.  */
9839
9840 bool
9841 aarch64_expand_movmem (rtx *operands)
9842 {
9843   unsigned int n;
9844   rtx dst = operands[0];
9845   rtx src = operands[1];
9846   rtx base;
9847   bool speed_p = !optimize_function_for_size_p (cfun);
9848
9849   /* When optimizing for size, give a better estimate of the length of a
9850      memcpy call, but use the default otherwise.  */
9851   unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9852
9853   /* We can't do anything smart if the amount to copy is not constant.  */
9854   if (!CONST_INT_P (operands[2]))
9855     return false;
9856
9857   n = UINTVAL (operands[2]);
9858
9859   /* Try to keep the number of instructions low.  For cases below 16 bytes we
9860      need to make at most two moves.  For cases above 16 bytes it will be one
9861      move for each 16 byte chunk, then at most two additional moves.  */
9862   if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9863     return false;
9864
9865   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9866   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9867
9868   base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9869   src = adjust_automodify_address (src, VOIDmode, base, 0);
9870
9871   /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9872      1-byte chunk.  */
9873   if (n < 4)
9874     {
9875       if (n >= 2)
9876         {
9877           aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9878           n -= 2;
9879         }
9880
9881       if (n == 1)
9882         aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9883
9884       return true;
9885     }
9886
9887   /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
9888      4-byte chunk, partially overlapping with the previously copied chunk.  */
9889   if (n < 8)
9890     {
9891       aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9892       n -= 4;
9893       if (n > 0)
9894         {
9895           int move = n - 4;
9896
9897           src = aarch64_move_pointer (src, move);
9898           dst = aarch64_move_pointer (dst, move);
9899           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9900         }
9901       return true;
9902     }
9903
9904   /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
9905      them, then (if applicable) an 8-byte chunk.  */
9906   while (n >= 8)
9907     {
9908       if (n / 16)
9909         {
9910           aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9911           n -= 16;
9912         }
9913       else
9914         {
9915           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9916           n -= 8;
9917         }
9918     }
9919
9920   /* Finish the final bytes of the copy.  We can always do this in one
9921      instruction.  We either copy the exact amount we need, or partially
9922      overlap with the previous chunk we copied and copy 8-bytes.  */
9923   if (n == 0)
9924     return true;
9925   else if (n == 1)
9926     aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9927   else if (n == 2)
9928     aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9929   else if (n == 4)
9930     aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9931   else
9932     {
9933       if (n == 3)
9934         {
9935           src = aarch64_move_pointer (src, -1);
9936           dst = aarch64_move_pointer (dst, -1);
9937           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9938         }
9939       else
9940         {
9941           int move = n - 8;
9942
9943           src = aarch64_move_pointer (src, move);
9944           dst = aarch64_move_pointer (dst, move);
9945           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9946         }
9947     }
9948
9949   return true;
9950 }
9951
9952 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
9953
9954 static unsigned HOST_WIDE_INT
9955 aarch64_asan_shadow_offset (void)
9956 {
9957   return (HOST_WIDE_INT_1 << 36);
9958 }
9959
9960 #undef TARGET_ADDRESS_COST
9961 #define TARGET_ADDRESS_COST aarch64_address_cost
9962
9963 /* This hook will determines whether unnamed bitfields affect the alignment
9964    of the containing structure.  The hook returns true if the structure
9965    should inherit the alignment requirements of an unnamed bitfield's
9966    type.  */
9967 #undef TARGET_ALIGN_ANON_BITFIELD
9968 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9969
9970 #undef TARGET_ASM_ALIGNED_DI_OP
9971 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9972
9973 #undef TARGET_ASM_ALIGNED_HI_OP
9974 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9975
9976 #undef TARGET_ASM_ALIGNED_SI_OP
9977 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9978
9979 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9980 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9981   hook_bool_const_tree_hwi_hwi_const_tree_true
9982
9983 #undef TARGET_ASM_FILE_START
9984 #define TARGET_ASM_FILE_START aarch64_start_file
9985
9986 #undef TARGET_ASM_OUTPUT_MI_THUNK
9987 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9988
9989 #undef TARGET_ASM_SELECT_RTX_SECTION
9990 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9991
9992 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9993 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9994
9995 #undef TARGET_BUILD_BUILTIN_VA_LIST
9996 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9997
9998 #undef TARGET_CALLEE_COPIES
9999 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10000
10001 #undef TARGET_CAN_ELIMINATE
10002 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10003
10004 #undef TARGET_CANNOT_FORCE_CONST_MEM
10005 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10006
10007 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10008 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10009
10010 /* Only the least significant bit is used for initialization guard
10011    variables.  */
10012 #undef TARGET_CXX_GUARD_MASK_BIT
10013 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
10014
10015 #undef TARGET_C_MODE_FOR_SUFFIX
10016 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
10017
10018 #ifdef TARGET_BIG_ENDIAN_DEFAULT
10019 #undef  TARGET_DEFAULT_TARGET_FLAGS
10020 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
10021 #endif
10022
10023 #undef TARGET_CLASS_MAX_NREGS
10024 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
10025
10026 #undef TARGET_BUILTIN_DECL
10027 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
10028
10029 #undef  TARGET_EXPAND_BUILTIN
10030 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
10031
10032 #undef TARGET_EXPAND_BUILTIN_VA_START
10033 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
10034
10035 #undef TARGET_FOLD_BUILTIN
10036 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
10037
10038 #undef TARGET_FUNCTION_ARG
10039 #define TARGET_FUNCTION_ARG aarch64_function_arg
10040
10041 #undef TARGET_FUNCTION_ARG_ADVANCE
10042 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
10043
10044 #undef TARGET_FUNCTION_ARG_BOUNDARY
10045 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
10046
10047 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10048 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
10049
10050 #undef TARGET_FUNCTION_VALUE
10051 #define TARGET_FUNCTION_VALUE aarch64_function_value
10052
10053 #undef TARGET_FUNCTION_VALUE_REGNO_P
10054 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10055
10056 #undef TARGET_FRAME_POINTER_REQUIRED
10057 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
10058
10059 #undef TARGET_GIMPLE_FOLD_BUILTIN
10060 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
10061
10062 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10063 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
10064
10065 #undef  TARGET_INIT_BUILTINS
10066 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
10067
10068 #undef TARGET_LEGITIMATE_ADDRESS_P
10069 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
10070
10071 #undef TARGET_LEGITIMATE_CONSTANT_P
10072 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
10073
10074 #undef TARGET_LIBGCC_CMP_RETURN_MODE
10075 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
10076
10077 #undef TARGET_LRA_P
10078 #define TARGET_LRA_P aarch64_lra_p
10079
10080 #undef TARGET_MANGLE_TYPE
10081 #define TARGET_MANGLE_TYPE aarch64_mangle_type
10082
10083 #undef TARGET_MEMORY_MOVE_COST
10084 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
10085
10086 #undef TARGET_MUST_PASS_IN_STACK
10087 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10088
10089 /* This target hook should return true if accesses to volatile bitfields
10090    should use the narrowest mode possible.  It should return false if these
10091    accesses should use the bitfield container type.  */
10092 #undef TARGET_NARROW_VOLATILE_BITFIELD
10093 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
10094
10095 #undef  TARGET_OPTION_OVERRIDE
10096 #define TARGET_OPTION_OVERRIDE aarch64_override_options
10097
10098 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10099 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10100   aarch64_override_options_after_change
10101
10102 #undef TARGET_PASS_BY_REFERENCE
10103 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
10104
10105 #undef TARGET_PREFERRED_RELOAD_CLASS
10106 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
10107
10108 #undef TARGET_SECONDARY_RELOAD
10109 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
10110
10111 #undef TARGET_SHIFT_TRUNCATION_MASK
10112 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
10113
10114 #undef TARGET_SETUP_INCOMING_VARARGS
10115 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
10116
10117 #undef TARGET_STRUCT_VALUE_RTX
10118 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
10119
10120 #undef TARGET_REGISTER_MOVE_COST
10121 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
10122
10123 #undef TARGET_RETURN_IN_MEMORY
10124 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
10125
10126 #undef TARGET_RETURN_IN_MSB
10127 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
10128
10129 #undef TARGET_RTX_COSTS
10130 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
10131
10132 #undef TARGET_SCHED_ISSUE_RATE
10133 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
10134
10135 #undef TARGET_TRAMPOLINE_INIT
10136 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
10137
10138 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10139 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
10140
10141 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10142 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
10143
10144 #undef TARGET_ARRAY_MODE_SUPPORTED_P
10145 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
10146
10147 #undef TARGET_VECTORIZE_ADD_STMT_COST
10148 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10149
10150 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10151 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10152   aarch64_builtin_vectorization_cost
10153
10154 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10155 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10156
10157 #undef TARGET_VECTORIZE_BUILTINS
10158 #define TARGET_VECTORIZE_BUILTINS
10159
10160 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10161 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10162   aarch64_builtin_vectorized_function
10163
10164 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10165 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10166   aarch64_autovectorize_vector_sizes
10167
10168 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10169 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10170   aarch64_atomic_assign_expand_fenv
10171
10172 /* Section anchor support.  */
10173
10174 #undef TARGET_MIN_ANCHOR_OFFSET
10175 #define TARGET_MIN_ANCHOR_OFFSET -256
10176
10177 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10178    byte offset; we can do much more for larger data types, but have no way
10179    to determine the size of the access.  We assume accesses are aligned.  */
10180 #undef TARGET_MAX_ANCHOR_OFFSET
10181 #define TARGET_MAX_ANCHOR_OFFSET 4095
10182
10183 #undef TARGET_VECTOR_ALIGNMENT
10184 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10185
10186 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10187 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10188   aarch64_simd_vector_alignment_reachable
10189
10190 /* vec_perm support.  */
10191
10192 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10193 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10194   aarch64_vectorize_vec_perm_const_ok
10195
10196
10197 #undef TARGET_FIXED_CONDITION_CODE_REGS
10198 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10199
10200 #undef TARGET_FLAGS_REGNUM
10201 #define TARGET_FLAGS_REGNUM CC_REGNUM
10202
10203 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10204 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10205
10206 #undef TARGET_ASAN_SHADOW_OFFSET
10207 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10208
10209 #undef TARGET_LEGITIMIZE_ADDRESS
10210 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
10211
10212 struct gcc_target targetm = TARGET_INITIALIZER;
10213
10214 #include "gt-aarch64.h"