gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145
 146 /* The processor for which instructions should be scheduled.  */
 147 enum aarch64_processor aarch64_tune = cortexa53;
 148
 149 /* The current tuning set.  */
 150 const struct tune_params *aarch64_tune_params;
 151
 152 /* Mask to specify which instructions we are allowed to generate.  */
 153 unsigned long aarch64_isa_flags = 0;
 154
 155 /* Mask to specify which instruction scheduling options should be used.  */
 156 unsigned long aarch64_tune_flags = 0;
 157
 158 /* Tuning parameters.  */
 159
 160 #if HAVE_DESIGNATED_INITIALIZERS
 161 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 162 #else
 163 #define NAMED_PARAM(NAME, VAL) (VAL)
 164 #endif
 165
 166 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 167 __extension__
 168 #endif
 169
 170 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 171 __extension__
 172 #endif
 173 static const struct cpu_addrcost_table generic_addrcost_table =
 174 {
 175 #if HAVE_DESIGNATED_INITIALIZERS
 176   .addr_scale_costs =
 177 #endif
 178     {
 179       NAMED_PARAM (qi, 0),
 180       NAMED_PARAM (hi, 0),
 181       NAMED_PARAM (si, 0),
 182       NAMED_PARAM (ti, 0),
 183     },
 184   NAMED_PARAM (pre_modify, 0),
 185   NAMED_PARAM (post_modify, 0),
 186   NAMED_PARAM (register_offset, 0),
 187   NAMED_PARAM (register_extend, 0),
 188   NAMED_PARAM (imm_offset, 0)
 189 };
 190
 191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 192 __extension__
 193 #endif
 194 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 195 {
 196 #if HAVE_DESIGNATED_INITIALIZERS
 197   .addr_scale_costs =
 198 #endif
 199     {
 200       NAMED_PARAM (qi, 0),
 201       NAMED_PARAM (hi, 1),
 202       NAMED_PARAM (si, 0),
 203       NAMED_PARAM (ti, 1),
 204     },
 205   NAMED_PARAM (pre_modify, 0),
 206   NAMED_PARAM (post_modify, 0),
 207   NAMED_PARAM (register_offset, 0),
 208   NAMED_PARAM (register_extend, 0),
 209   NAMED_PARAM (imm_offset, 0),
 210 };
 211
 212 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 213 __extension__
 214 #endif
 215 static const struct cpu_regmove_cost generic_regmove_cost =
 216 {
 217   NAMED_PARAM (GP2GP, 1),
 218   NAMED_PARAM (GP2FP, 2),
 219   NAMED_PARAM (FP2GP, 2),
 220   /* We currently do not provide direct support for TFmode Q->Q move.
 221      Therefore we need to raise the cost above 2 in order to have
 222      reload handle the situation.  */
 223   NAMED_PARAM (FP2FP, 4)
 224 };
 225
 226 /* Generic costs for vector insn classes.  */
 227 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 228 __extension__
 229 #endif
 230 static const struct cpu_vector_cost generic_vector_cost =
 231 {
 232   NAMED_PARAM (scalar_stmt_cost, 1),
 233   NAMED_PARAM (scalar_load_cost, 1),
 234   NAMED_PARAM (scalar_store_cost, 1),
 235   NAMED_PARAM (vec_stmt_cost, 1),
 236   NAMED_PARAM (vec_to_scalar_cost, 1),
 237   NAMED_PARAM (scalar_to_vec_cost, 1),
 238   NAMED_PARAM (vec_align_load_cost, 1),
 239   NAMED_PARAM (vec_unalign_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_store_cost, 1),
 241   NAMED_PARAM (vec_store_cost, 1),
 242   NAMED_PARAM (cond_taken_branch_cost, 3),
 243   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 244 };
 245
 246 /* Generic costs for vector insn classes.  */
 247 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 248 __extension__
 249 #endif
 250 static const struct cpu_vector_cost cortexa57_vector_cost =
 251 {
 252   NAMED_PARAM (scalar_stmt_cost, 1),
 253   NAMED_PARAM (scalar_load_cost, 4),
 254   NAMED_PARAM (scalar_store_cost, 1),
 255   NAMED_PARAM (vec_stmt_cost, 3),
 256   NAMED_PARAM (vec_to_scalar_cost, 8),
 257   NAMED_PARAM (scalar_to_vec_cost, 8),
 258   NAMED_PARAM (vec_align_load_cost, 5),
 259   NAMED_PARAM (vec_unalign_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_store_cost, 1),
 261   NAMED_PARAM (vec_store_cost, 1),
 262   NAMED_PARAM (cond_taken_branch_cost, 1),
 263   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 264 };
 265
 266 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 267 __extension__
 268 #endif
 269 static const struct tune_params generic_tunings =
 270 {
 271   &cortexa57_extra_costs,
 272   &generic_addrcost_table,
 273   &generic_regmove_cost,
 274   &generic_vector_cost,
 275   NAMED_PARAM (memmov_cost, 4),
 276   NAMED_PARAM (issue_rate, 2)
 277 };
 278
 279 static const struct tune_params cortexa53_tunings =
 280 {
 281   &cortexa53_extra_costs,
 282   &generic_addrcost_table,
 283   &generic_regmove_cost,
 284   &generic_vector_cost,
 285   NAMED_PARAM (memmov_cost, 4),
 286   NAMED_PARAM (issue_rate, 2)
 287 };
 288
 289 static const struct tune_params cortexa57_tunings =
 290 {
 291   &cortexa57_extra_costs,
 292   &cortexa57_addrcost_table,
 293   &generic_regmove_cost,
 294   &cortexa57_vector_cost,
 295   NAMED_PARAM (memmov_cost, 4),
 296   NAMED_PARAM (issue_rate, 3)
 297 };
 298
 299 /* A processor implementing AArch64.  */
 300 struct processor
 301 {
 302   const char *const name;
 303   enum aarch64_processor core;
 304   const char *arch;
 305   const unsigned long flags;
 306   const struct tune_params *const tune;
 307 };
 308
 309 /* Processor cores implementing AArch64.  */
 310 static const struct processor all_cores[] =
 311 {
 312 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 313   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 314 #include "aarch64-cores.def"
 315 #undef AARCH64_CORE
 316   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 317   {NULL, aarch64_none, NULL, 0, NULL}
 318 };
 319
 320 /* Architectures implementing AArch64.  */
 321 static const struct processor all_architectures[] =
 322 {
 323 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 324   {NAME, CORE, #ARCH, FLAGS, NULL},
 325 #include "aarch64-arches.def"
 326 #undef AARCH64_ARCH
 327   {NULL, aarch64_none, NULL, 0, NULL}
 328 };
 329
 330 /* Target specification.  These are populated as commandline arguments
 331    are processed, or NULL if not specified.  */
 332 static const struct processor *selected_arch;
 333 static const struct processor *selected_cpu;
 334 static const struct processor *selected_tune;
 335
 336 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 337
 338 /* An ISA extension in the co-processor and main instruction set space.  */
 339 struct aarch64_option_extension
 340 {
 341   const char *const name;
 342   const unsigned long flags_on;
 343   const unsigned long flags_off;
 344 };
 345
 346 /* ISA extensions in AArch64.  */
 347 static const struct aarch64_option_extension all_extensions[] =
 348 {
 349 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 350   {NAME, FLAGS_ON, FLAGS_OFF},
 351 #include "aarch64-option-extensions.def"
 352 #undef AARCH64_OPT_EXTENSION
 353   {NULL, 0, 0}
 354 };
 355
 356 /* Used to track the size of an address when generating a pre/post
 357    increment address.  */
 358 static enum machine_mode aarch64_memory_reference_mode;
 359
 360 /* Used to force GTY into this file.  */
 361 static GTY(()) int gty_dummy;
 362
 363 /* A table of valid AArch64 "bitmask immediate" values for
 364    logical instructions.  */
 365
 366 #define AARCH64_NUM_BITMASKS  5334
 367 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 368
 369 typedef enum aarch64_cond_code
 370 {
 371   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 372   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 373   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 374 }
 375 aarch64_cc;
 376
 377 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 378
 379 /* The condition codes of the processor, and the inverse function.  */
 380 static const char * const aarch64_condition_codes[] =
 381 {
 382   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 383   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 384 };
 385
 386 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 387 unsigned
 388 aarch64_dbx_register_number (unsigned regno)
 389 {
 390    if (GP_REGNUM_P (regno))
 391      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 392    else if (regno == SP_REGNUM)
 393      return AARCH64_DWARF_SP;
 394    else if (FP_REGNUM_P (regno))
 395      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 396
 397    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 398       equivalent DWARF register.  */
 399    return DWARF_FRAME_REGISTERS;
 400 }
 401
 402 /* Return TRUE if MODE is any of the large INT modes.  */
 403 static bool
 404 aarch64_vect_struct_mode_p (enum machine_mode mode)
 405 {
 406   return mode == OImode || mode == CImode || mode == XImode;
 407 }
 408
 409 /* Return TRUE if MODE is any of the vector modes.  */
 410 static bool
 411 aarch64_vector_mode_p (enum machine_mode mode)
 412 {
 413   return aarch64_vector_mode_supported_p (mode)
 414          || aarch64_vect_struct_mode_p (mode);
 415 }
 416
 417 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 418 static bool
 419 aarch64_array_mode_supported_p (enum machine_mode mode,
 420                                 unsigned HOST_WIDE_INT nelems)
 421 {
 422   if (TARGET_SIMD
 423       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 424       && (nelems >= 2 && nelems <= 4))
 425     return true;
 426
 427   return false;
 428 }
 429
 430 /* Implement HARD_REGNO_NREGS.  */
 431
 432 int
 433 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 434 {
 435   switch (aarch64_regno_regclass (regno))
 436     {
 437     case FP_REGS:
 438     case FP_LO_REGS:
 439       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 440     default:
 441       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 442     }
 443   gcc_unreachable ();
 444 }
 445
 446 /* Implement HARD_REGNO_MODE_OK.  */
 447
 448 int
 449 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 450 {
 451   if (GET_MODE_CLASS (mode) == MODE_CC)
 452     return regno == CC_REGNUM;
 453
 454   if (regno == SP_REGNUM)
 455     /* The purpose of comparing with ptr_mode is to support the
 456        global register variable associated with the stack pointer
 457        register via the syntax of asm ("wsp") in ILP32.  */
 458     return mode == Pmode || mode == ptr_mode;
 459
 460   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 461     return mode == Pmode;
 462
 463   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 464     return 1;
 465
 466   if (FP_REGNUM_P (regno))
 467     {
 468       if (aarch64_vect_struct_mode_p (mode))
 469         return
 470           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 471       else
 472         return 1;
 473     }
 474
 475   return 0;
 476 }
 477
 478 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 479 enum machine_mode
 480 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 481                                      enum machine_mode mode)
 482 {
 483   /* Handle modes that fit within single registers.  */
 484   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 485     {
 486       if (GET_MODE_SIZE (mode) >= 4)
 487         return mode;
 488       else
 489         return SImode;
 490     }
 491   /* Fall back to generic for multi-reg and very large modes.  */
 492   else
 493     return choose_hard_reg_mode (regno, nregs, false);
 494 }
 495
 496 /* Return true if calls to DECL should be treated as
 497    long-calls (ie called via a register).  */
 498 static bool
 499 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 500 {
 501   return false;
 502 }
 503
 504 /* Return true if calls to symbol-ref SYM should be treated as
 505    long-calls (ie called via a register).  */
 506 bool
 507 aarch64_is_long_call_p (rtx sym)
 508 {
 509   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 510 }
 511
 512 /* Return true if the offsets to a zero/sign-extract operation
 513    represent an expression that matches an extend operation.  The
 514    operands represent the paramters from
 515
 516    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 517 bool
 518 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 519                                 rtx extract_imm)
 520 {
 521   HOST_WIDE_INT mult_val, extract_val;
 522
 523   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 524     return false;
 525
 526   mult_val = INTVAL (mult_imm);
 527   extract_val = INTVAL (extract_imm);
 528
 529   if (extract_val > 8
 530       && extract_val < GET_MODE_BITSIZE (mode)
 531       && exact_log2 (extract_val & ~7) > 0
 532       && (extract_val & 7) <= 4
 533       && mult_val == (1 << (extract_val & 7)))
 534     return true;
 535
 536   return false;
 537 }
 538
 539 /* Emit an insn that's a simple single-set.  Both the operands must be
 540    known to be valid.  */
 541 inline static rtx
 542 emit_set_insn (rtx x, rtx y)
 543 {
 544   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 545 }
 546
 547 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 548    return the rtx for register 0 in the proper mode.  */
 549 rtx
 550 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 551 {
 552   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 553   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 554
 555   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 556   return cc_reg;
 557 }
 558
 559 /* Build the SYMBOL_REF for __tls_get_addr.  */
 560
 561 static GTY(()) rtx tls_get_addr_libfunc;
 562
 563 rtx
 564 aarch64_tls_get_addr (void)
 565 {
 566   if (!tls_get_addr_libfunc)
 567     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 568   return tls_get_addr_libfunc;
 569 }
 570
 571 /* Return the TLS model to use for ADDR.  */
 572
 573 static enum tls_model
 574 tls_symbolic_operand_type (rtx addr)
 575 {
 576   enum tls_model tls_kind = TLS_MODEL_NONE;
 577   rtx sym, addend;
 578
 579   if (GET_CODE (addr) == CONST)
 580     {
 581       split_const (addr, &sym, &addend);
 582       if (GET_CODE (sym) == SYMBOL_REF)
 583         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 584     }
 585   else if (GET_CODE (addr) == SYMBOL_REF)
 586     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 587
 588   return tls_kind;
 589 }
 590
 591 /* We'll allow lo_sum's in addresses in our legitimate addresses
 592    so that combine would take care of combining addresses where
 593    necessary, but for generation purposes, we'll generate the address
 594    as :
 595    RTL                               Absolute
 596    tmp = hi (symbol_ref);            adrp  x1, foo
 597    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 598                                      nop
 599
 600    PIC                               TLS
 601    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 602    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 603                                      bl   __tls_get_addr
 604                                      nop
 605
 606    Load TLS symbol, depending on TLS mechanism and TLS access model.
 607
 608    Global Dynamic - Traditional TLS:
 609    adrp tmp, :tlsgd:imm
 610    add  dest, tmp, #:tlsgd_lo12:imm
 611    bl   __tls_get_addr
 612
 613    Global Dynamic - TLS Descriptors:
 614    adrp dest, :tlsdesc:imm
 615    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 616    add  dest, dest, #:tlsdesc_lo12:imm
 617    blr  tmp
 618    mrs  tp, tpidr_el0
 619    add  dest, dest, tp
 620
 621    Initial Exec:
 622    mrs  tp, tpidr_el0
 623    adrp tmp, :gottprel:imm
 624    ldr  dest, [tmp, #:gottprel_lo12:imm]
 625    add  dest, dest, tp
 626
 627    Local Exec:
 628    mrs  tp, tpidr_el0
 629    add  t0, tp, #:tprel_hi12:imm
 630    add  t0, #:tprel_lo12_nc:imm
 631 */
 632
 633 static void
 634 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 635                                    enum aarch64_symbol_type type)
 636 {
 637   switch (type)
 638     {
 639     case SYMBOL_SMALL_ABSOLUTE:
 640       {
 641         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 642         rtx tmp_reg = dest;
 643         enum machine_mode mode = GET_MODE (dest);
 644
 645         gcc_assert (mode == Pmode || mode == ptr_mode);
 646
 647         if (can_create_pseudo_p ())
 648           tmp_reg = gen_reg_rtx (mode);
 649
 650         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 651         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 652         return;
 653       }
 654
 655     case SYMBOL_TINY_ABSOLUTE:
 656       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 657       return;
 658
 659     case SYMBOL_SMALL_GOT:
 660       {
 661         /* In ILP32, the mode of dest can be either SImode or DImode,
 662            while the got entry is always of SImode size.  The mode of
 663            dest depends on how dest is used: if dest is assigned to a
 664            pointer (e.g. in the memory), it has SImode; it may have
 665            DImode if dest is dereferenced to access the memeory.
 666            This is why we have to handle three different ldr_got_small
 667            patterns here (two patterns for ILP32).  */
 668         rtx tmp_reg = dest;
 669         enum machine_mode mode = GET_MODE (dest);
 670
 671         if (can_create_pseudo_p ())
 672           tmp_reg = gen_reg_rtx (mode);
 673
 674         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 675         if (mode == ptr_mode)
 676           {
 677             if (mode == DImode)
 678               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 679             else
 680               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 681           }
 682         else
 683           {
 684             gcc_assert (mode == Pmode);
 685             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 686           }
 687
 688         return;
 689       }
 690
 691     case SYMBOL_SMALL_TLSGD:
 692       {
 693         rtx insns;
 694         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 695
 696         start_sequence ();
 697         emit_call_insn (gen_tlsgd_small (result, imm));
 698         insns = get_insns ();
 699         end_sequence ();
 700
 701         RTL_CONST_CALL_P (insns) = 1;
 702         emit_libcall_block (insns, dest, result, imm);
 703         return;
 704       }
 705
 706     case SYMBOL_SMALL_TLSDESC:
 707       {
 708         enum machine_mode mode = GET_MODE (dest);
 709         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 710         rtx tp;
 711
 712         gcc_assert (mode == Pmode || mode == ptr_mode);
 713
 714         /* In ILP32, the got entry is always of SImode size.  Unlike
 715            small GOT, the dest is fixed at reg 0.  */
 716         if (TARGET_ILP32)
 717           emit_insn (gen_tlsdesc_small_si (imm));
 718         else
 719           emit_insn (gen_tlsdesc_small_di (imm));
 720         tp = aarch64_load_tp (NULL);
 721
 722         if (mode != Pmode)
 723           tp = gen_lowpart (mode, tp);
 724
 725         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 726         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 727         return;
 728       }
 729
 730     case SYMBOL_SMALL_GOTTPREL:
 731       {
 732         /* In ILP32, the mode of dest can be either SImode or DImode,
 733            while the got entry is always of SImode size.  The mode of
 734            dest depends on how dest is used: if dest is assigned to a
 735            pointer (e.g. in the memory), it has SImode; it may have
 736            DImode if dest is dereferenced to access the memeory.
 737            This is why we have to handle three different tlsie_small
 738            patterns here (two patterns for ILP32).  */
 739         enum machine_mode mode = GET_MODE (dest);
 740         rtx tmp_reg = gen_reg_rtx (mode);
 741         rtx tp = aarch64_load_tp (NULL);
 742
 743         if (mode == ptr_mode)
 744           {
 745             if (mode == DImode)
 746               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 747             else
 748               {
 749                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 750                 tp = gen_lowpart (mode, tp);
 751               }
 752           }
 753         else
 754           {
 755             gcc_assert (mode == Pmode);
 756             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 757           }
 758
 759         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 760         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 761         return;
 762       }
 763
 764     case SYMBOL_SMALL_TPREL:
 765       {
 766         rtx tp = aarch64_load_tp (NULL);
 767         emit_insn (gen_tlsle_small (dest, tp, imm));
 768         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 769         return;
 770       }
 771
 772     case SYMBOL_TINY_GOT:
 773       emit_insn (gen_ldr_got_tiny (dest, imm));
 774       return;
 775
 776     default:
 777       gcc_unreachable ();
 778     }
 779 }
 780
 781 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 782    handle all moves if !can_create_pseudo_p ().  The distinction is
 783    important because, unlike emit_move_insn, the move expanders know
 784    how to force Pmode objects into the constant pool even when the
 785    constant pool address is not itself legitimate.  */
 786 static rtx
 787 aarch64_emit_move (rtx dest, rtx src)
 788 {
 789   return (can_create_pseudo_p ()
 790           ? emit_move_insn (dest, src)
 791           : emit_move_insn_1 (dest, src));
 792 }
 793
 794 /* Split a 128-bit move operation into two 64-bit move operations,
 795    taking care to handle partial overlap of register to register
 796    copies.  Special cases are needed when moving between GP regs and
 797    FP regs.  SRC can be a register, constant or memory; DST a register
 798    or memory.  If either operand is memory it must not have any side
 799    effects.  */
 800 void
 801 aarch64_split_128bit_move (rtx dst, rtx src)
 802 {
 803   rtx dst_lo, dst_hi;
 804   rtx src_lo, src_hi;
 805
 806   enum machine_mode mode = GET_MODE (dst);
 807
 808   gcc_assert (mode == TImode || mode == TFmode);
 809   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 810   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 811
 812   if (REG_P (dst) && REG_P (src))
 813     {
 814       int src_regno = REGNO (src);
 815       int dst_regno = REGNO (dst);
 816
 817       /* Handle FP <-> GP regs.  */
 818       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 819         {
 820           src_lo = gen_lowpart (word_mode, src);
 821           src_hi = gen_highpart (word_mode, src);
 822
 823           if (mode == TImode)
 824             {
 825               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 826               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 827             }
 828           else
 829             {
 830               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 831               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 832             }
 833           return;
 834         }
 835       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 836         {
 837           dst_lo = gen_lowpart (word_mode, dst);
 838           dst_hi = gen_highpart (word_mode, dst);
 839
 840           if (mode == TImode)
 841             {
 842               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 843               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 844             }
 845           else
 846             {
 847               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 848               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 849             }
 850           return;
 851         }
 852     }
 853
 854   dst_lo = gen_lowpart (word_mode, dst);
 855   dst_hi = gen_highpart (word_mode, dst);
 856   src_lo = gen_lowpart (word_mode, src);
 857   src_hi = gen_highpart_mode (word_mode, mode, src);
 858
 859   /* At most one pairing may overlap.  */
 860   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 861     {
 862       aarch64_emit_move (dst_hi, src_hi);
 863       aarch64_emit_move (dst_lo, src_lo);
 864     }
 865   else
 866     {
 867       aarch64_emit_move (dst_lo, src_lo);
 868       aarch64_emit_move (dst_hi, src_hi);
 869     }
 870 }
 871
 872 bool
 873 aarch64_split_128bit_move_p (rtx dst, rtx src)
 874 {
 875   return (! REG_P (src)
 876           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 877 }
 878
 879 /* Split a complex SIMD combine.  */
 880
 881 void
 882 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 883 {
 884   enum machine_mode src_mode = GET_MODE (src1);
 885   enum machine_mode dst_mode = GET_MODE (dst);
 886
 887   gcc_assert (VECTOR_MODE_P (dst_mode));
 888
 889   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 890     {
 891       rtx (*gen) (rtx, rtx, rtx);
 892
 893       switch (src_mode)
 894         {
 895         case V8QImode:
 896           gen = gen_aarch64_simd_combinev8qi;
 897           break;
 898         case V4HImode:
 899           gen = gen_aarch64_simd_combinev4hi;
 900           break;
 901         case V2SImode:
 902           gen = gen_aarch64_simd_combinev2si;
 903           break;
 904         case V2SFmode:
 905           gen = gen_aarch64_simd_combinev2sf;
 906           break;
 907         case DImode:
 908           gen = gen_aarch64_simd_combinedi;
 909           break;
 910         case DFmode:
 911           gen = gen_aarch64_simd_combinedf;
 912           break;
 913         default:
 914           gcc_unreachable ();
 915         }
 916
 917       emit_insn (gen (dst, src1, src2));
 918       return;
 919     }
 920 }
 921
 922 /* Split a complex SIMD move.  */
 923
 924 void
 925 aarch64_split_simd_move (rtx dst, rtx src)
 926 {
 927   enum machine_mode src_mode = GET_MODE (src);
 928   enum machine_mode dst_mode = GET_MODE (dst);
 929
 930   gcc_assert (VECTOR_MODE_P (dst_mode));
 931
 932   if (REG_P (dst) && REG_P (src))
 933     {
 934       rtx (*gen) (rtx, rtx);
 935
 936       gcc_assert (VECTOR_MODE_P (src_mode));
 937
 938       switch (src_mode)
 939         {
 940         case V16QImode:
 941           gen = gen_aarch64_split_simd_movv16qi;
 942           break;
 943         case V8HImode:
 944           gen = gen_aarch64_split_simd_movv8hi;
 945           break;
 946         case V4SImode:
 947           gen = gen_aarch64_split_simd_movv4si;
 948           break;
 949         case V2DImode:
 950           gen = gen_aarch64_split_simd_movv2di;
 951           break;
 952         case V4SFmode:
 953           gen = gen_aarch64_split_simd_movv4sf;
 954           break;
 955         case V2DFmode:
 956           gen = gen_aarch64_split_simd_movv2df;
 957           break;
 958         default:
 959           gcc_unreachable ();
 960         }
 961
 962       emit_insn (gen (dst, src));
 963       return;
 964     }
 965 }
 966
 967 static rtx
 968 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 969 {
 970   if (can_create_pseudo_p ())
 971     return force_reg (mode, value);
 972   else
 973     {
 974       x = aarch64_emit_move (x, value);
 975       return x;
 976     }
 977 }
 978
 979
 980 static rtx
 981 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 982 {
 983   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 984     {
 985       rtx high;
 986       /* Load the full offset into a register.  This
 987          might be improvable in the future.  */
 988       high = GEN_INT (offset);
 989       offset = 0;
 990       high = aarch64_force_temporary (mode, temp, high);
 991       reg = aarch64_force_temporary (mode, temp,
 992                                      gen_rtx_PLUS (mode, high, reg));
 993     }
 994   return plus_constant (mode, reg, offset);
 995 }
 996
 997 void
 998 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 999 {
1000   enum machine_mode mode = GET_MODE (dest);
1001   unsigned HOST_WIDE_INT mask;
1002   int i;
1003   bool first;
1004   unsigned HOST_WIDE_INT val;
1005   bool subtargets;
1006   rtx subtarget;
1007   int one_match, zero_match;
1008
1009   gcc_assert (mode == SImode || mode == DImode);
1010
1011   /* Check on what type of symbol it is.  */
1012   if (GET_CODE (imm) == SYMBOL_REF
1013       || GET_CODE (imm) == LABEL_REF
1014       || GET_CODE (imm) == CONST)
1015     {
1016       rtx mem, base, offset;
1017       enum aarch64_symbol_type sty;
1018
1019       /* If we have (const (plus symbol offset)), separate out the offset
1020          before we start classifying the symbol.  */
1021       split_const (imm, &base, &offset);
1022
1023       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1024       switch (sty)
1025         {
1026         case SYMBOL_FORCE_TO_MEM:
1027           if (offset != const0_rtx
1028               && targetm.cannot_force_const_mem (mode, imm))
1029             {
1030               gcc_assert (can_create_pseudo_p ());
1031               base = aarch64_force_temporary (mode, dest, base);
1032               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1033               aarch64_emit_move (dest, base);
1034               return;
1035             }
1036           mem = force_const_mem (ptr_mode, imm);
1037           gcc_assert (mem);
1038           if (mode != ptr_mode)
1039             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1040           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1041           return;
1042
1043         case SYMBOL_SMALL_TLSGD:
1044         case SYMBOL_SMALL_TLSDESC:
1045         case SYMBOL_SMALL_GOTTPREL:
1046         case SYMBOL_SMALL_GOT:
1047         case SYMBOL_TINY_GOT:
1048           if (offset != const0_rtx)
1049             {
1050               gcc_assert(can_create_pseudo_p ());
1051               base = aarch64_force_temporary (mode, dest, base);
1052               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1053               aarch64_emit_move (dest, base);
1054               return;
1055             }
1056           /* FALLTHRU */
1057
1058         case SYMBOL_SMALL_TPREL:
1059         case SYMBOL_SMALL_ABSOLUTE:
1060         case SYMBOL_TINY_ABSOLUTE:
1061           aarch64_load_symref_appropriately (dest, imm, sty);
1062           return;
1063
1064         default:
1065           gcc_unreachable ();
1066         }
1067     }
1068
1069   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1070     {
1071       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1072       return;
1073     }
1074
1075   if (!CONST_INT_P (imm))
1076     {
1077       if (GET_CODE (imm) == HIGH)
1078         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1079       else
1080         {
1081           rtx mem = force_const_mem (mode, imm);
1082           gcc_assert (mem);
1083           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1084         }
1085
1086       return;
1087     }
1088
1089   if (mode == SImode)
1090     {
1091       /* We know we can't do this in 1 insn, and we must be able to do it
1092          in two; so don't mess around looking for sequences that don't buy
1093          us anything.  */
1094       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1095       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1096                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1097       return;
1098     }
1099
1100   /* Remaining cases are all for DImode.  */
1101
1102   val = INTVAL (imm);
1103   subtargets = optimize && can_create_pseudo_p ();
1104
1105   one_match = 0;
1106   zero_match = 0;
1107   mask = 0xffff;
1108
1109   for (i = 0; i < 64; i += 16, mask <<= 16)
1110     {
1111       if ((val & mask) == 0)
1112         zero_match++;
1113       else if ((val & mask) == mask)
1114         one_match++;
1115     }
1116
1117   if (one_match == 2)
1118     {
1119       mask = 0xffff;
1120       for (i = 0; i < 64; i += 16, mask <<= 16)
1121         {
1122           if ((val & mask) != mask)
1123             {
1124               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1125               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1126                                          GEN_INT ((val >> i) & 0xffff)));
1127               return;
1128             }
1129         }
1130       gcc_unreachable ();
1131     }
1132
1133   if (zero_match == 2)
1134     goto simple_sequence;
1135
1136   mask = 0x0ffff0000UL;
1137   for (i = 16; i < 64; i += 16, mask <<= 16)
1138     {
1139       HOST_WIDE_INT comp = mask & ~(mask - 1);
1140
1141       if (aarch64_uimm12_shift (val - (val & mask)))
1142         {
1143           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1144
1145           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1146           emit_insn (gen_adddi3 (dest, subtarget,
1147                                  GEN_INT (val - (val & mask))));
1148           return;
1149         }
1150       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1151         {
1152           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1153
1154           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1155                                   GEN_INT ((val + comp) & mask)));
1156           emit_insn (gen_adddi3 (dest, subtarget,
1157                                  GEN_INT (val - ((val + comp) & mask))));
1158           return;
1159         }
1160       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1161         {
1162           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1163
1164           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1165                                   GEN_INT ((val - comp) | ~mask)));
1166           emit_insn (gen_adddi3 (dest, subtarget,
1167                                  GEN_INT (val - ((val - comp) | ~mask))));
1168           return;
1169         }
1170       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1171         {
1172           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1173
1174           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1175                                   GEN_INT (val | ~mask)));
1176           emit_insn (gen_adddi3 (dest, subtarget,
1177                                  GEN_INT (val - (val | ~mask))));
1178           return;
1179         }
1180     }
1181
1182   /* See if we can do it by arithmetically combining two
1183      immediates.  */
1184   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1185     {
1186       int j;
1187       mask = 0xffff;
1188
1189       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1190           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1191         {
1192           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1193           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1194                                   GEN_INT (aarch64_bitmasks[i])));
1195           emit_insn (gen_adddi3 (dest, subtarget,
1196                                  GEN_INT (val - aarch64_bitmasks[i])));
1197           return;
1198         }
1199
1200       for (j = 0; j < 64; j += 16, mask <<= 16)
1201         {
1202           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1203             {
1204               emit_insn (gen_rtx_SET (VOIDmode, dest,
1205                                       GEN_INT (aarch64_bitmasks[i])));
1206               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1207                                          GEN_INT ((val >> j) & 0xffff)));
1208               return;
1209             }
1210         }
1211     }
1212
1213   /* See if we can do it by logically combining two immediates.  */
1214   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1215     {
1216       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1217         {
1218           int j;
1219
1220           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1221             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1222               {
1223                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1224                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1225                                         GEN_INT (aarch64_bitmasks[i])));
1226                 emit_insn (gen_iordi3 (dest, subtarget,
1227                                        GEN_INT (aarch64_bitmasks[j])));
1228                 return;
1229               }
1230         }
1231       else if ((val & aarch64_bitmasks[i]) == val)
1232         {
1233           int j;
1234
1235           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1236             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1237               {
1238
1239                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1240                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241                                         GEN_INT (aarch64_bitmasks[j])));
1242                 emit_insn (gen_anddi3 (dest, subtarget,
1243                                        GEN_INT (aarch64_bitmasks[i])));
1244                 return;
1245               }
1246         }
1247     }
1248
1249  simple_sequence:
1250   first = true;
1251   mask = 0xffff;
1252   for (i = 0; i < 64; i += 16, mask <<= 16)
1253     {
1254       if ((val & mask) != 0)
1255         {
1256           if (first)
1257             {
1258               emit_insn (gen_rtx_SET (VOIDmode, dest,
1259                                       GEN_INT (val & mask)));
1260               first = false;
1261             }
1262           else
1263             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1264                                        GEN_INT ((val >> i) & 0xffff)));
1265         }
1266     }
1267 }
1268
1269 static bool
1270 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1271 {
1272   /* Indirect calls are not currently supported.  */
1273   if (decl == NULL)
1274     return false;
1275
1276   /* Cannot tail-call to long-calls, since these are outside of the
1277      range of a branch instruction (we could handle this if we added
1278      support for indirect tail-calls.  */
1279   if (aarch64_decl_is_long_call_p (decl))
1280     return false;
1281
1282   return true;
1283 }
1284
1285 /* Implement TARGET_PASS_BY_REFERENCE.  */
1286
1287 static bool
1288 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1289                            enum machine_mode mode,
1290                            const_tree type,
1291                            bool named ATTRIBUTE_UNUSED)
1292 {
1293   HOST_WIDE_INT size;
1294   enum machine_mode dummymode;
1295   int nregs;
1296
1297   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1298   size = (mode == BLKmode && type)
1299     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1300
1301   /* Aggregates are passed by reference based on their size.  */
1302   if (type && AGGREGATE_TYPE_P (type))
1303     {
1304       size = int_size_in_bytes (type);
1305     }
1306
1307   /* Variable sized arguments are always returned by reference.  */
1308   if (size < 0)
1309     return true;
1310
1311   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1312   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1313                                                &dummymode, &nregs,
1314                                                NULL))
1315     return false;
1316
1317   /* Arguments which are variable sized or larger than 2 registers are
1318      passed by reference unless they are a homogenous floating point
1319      aggregate.  */
1320   return size > 2 * UNITS_PER_WORD;
1321 }
1322
1323 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1324 static bool
1325 aarch64_return_in_msb (const_tree valtype)
1326 {
1327   enum machine_mode dummy_mode;
1328   int dummy_int;
1329
1330   /* Never happens in little-endian mode.  */
1331   if (!BYTES_BIG_ENDIAN)
1332     return false;
1333
1334   /* Only composite types smaller than or equal to 16 bytes can
1335      be potentially returned in registers.  */
1336   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1337       || int_size_in_bytes (valtype) <= 0
1338       || int_size_in_bytes (valtype) > 16)
1339     return false;
1340
1341   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1342      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1343      is always passed/returned in the least significant bits of fp/simd
1344      register(s).  */
1345   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1346                                                &dummy_mode, &dummy_int, NULL))
1347     return false;
1348
1349   return true;
1350 }
1351
1352 /* Implement TARGET_FUNCTION_VALUE.
1353    Define how to find the value returned by a function.  */
1354
1355 static rtx
1356 aarch64_function_value (const_tree type, const_tree func,
1357                         bool outgoing ATTRIBUTE_UNUSED)
1358 {
1359   enum machine_mode mode;
1360   int unsignedp;
1361   int count;
1362   enum machine_mode ag_mode;
1363
1364   mode = TYPE_MODE (type);
1365   if (INTEGRAL_TYPE_P (type))
1366     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1367
1368   if (aarch64_return_in_msb (type))
1369     {
1370       HOST_WIDE_INT size = int_size_in_bytes (type);
1371
1372       if (size % UNITS_PER_WORD != 0)
1373         {
1374           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1375           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1376         }
1377     }
1378
1379   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1380                                                &ag_mode, &count, NULL))
1381     {
1382       if (!aarch64_composite_type_p (type, mode))
1383         {
1384           gcc_assert (count == 1 && mode == ag_mode);
1385           return gen_rtx_REG (mode, V0_REGNUM);
1386         }
1387       else
1388         {
1389           int i;
1390           rtx par;
1391
1392           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1393           for (i = 0; i < count; i++)
1394             {
1395               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1396               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1397                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1398               XVECEXP (par, 0, i) = tmp;
1399             }
1400           return par;
1401         }
1402     }
1403   else
1404     return gen_rtx_REG (mode, R0_REGNUM);
1405 }
1406
1407 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1408    Return true if REGNO is the number of a hard register in which the values
1409    of called function may come back.  */
1410
1411 static bool
1412 aarch64_function_value_regno_p (const unsigned int regno)
1413 {
1414   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1415      of 16-byte return values are: 128-bit integers and 16-byte small
1416      structures (excluding homogeneous floating-point aggregates).  */
1417   if (regno == R0_REGNUM || regno == R1_REGNUM)
1418     return true;
1419
1420   /* Up to four fp/simd registers can return a function value, e.g. a
1421      homogeneous floating-point aggregate having four members.  */
1422   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1423     return !TARGET_GENERAL_REGS_ONLY;
1424
1425   return false;
1426 }
1427
1428 /* Implement TARGET_RETURN_IN_MEMORY.
1429
1430    If the type T of the result of a function is such that
1431      void func (T arg)
1432    would require that arg be passed as a value in a register (or set of
1433    registers) according to the parameter passing rules, then the result
1434    is returned in the same registers as would be used for such an
1435    argument.  */
1436
1437 static bool
1438 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1439 {
1440   HOST_WIDE_INT size;
1441   enum machine_mode ag_mode;
1442   int count;
1443
1444   if (!AGGREGATE_TYPE_P (type)
1445       && TREE_CODE (type) != COMPLEX_TYPE
1446       && TREE_CODE (type) != VECTOR_TYPE)
1447     /* Simple scalar types always returned in registers.  */
1448     return false;
1449
1450   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1451                                                type,
1452                                                &ag_mode,
1453                                                &count,
1454                                                NULL))
1455     return false;
1456
1457   /* Types larger than 2 registers returned in memory.  */
1458   size = int_size_in_bytes (type);
1459   return (size < 0 || size > 2 * UNITS_PER_WORD);
1460 }
1461
1462 static bool
1463 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1464                                const_tree type, int *nregs)
1465 {
1466   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1467   return aarch64_vfp_is_call_or_return_candidate (mode,
1468                                                   type,
1469                                                   &pcum->aapcs_vfp_rmode,
1470                                                   nregs,
1471                                                   NULL);
1472 }
1473
1474 /* Given MODE and TYPE of a function argument, return the alignment in
1475    bits.  The idea is to suppress any stronger alignment requested by
1476    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1477    This is a helper function for local use only.  */
1478
1479 static unsigned int
1480 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1481 {
1482   unsigned int alignment;
1483
1484   if (type)
1485     {
1486       if (!integer_zerop (TYPE_SIZE (type)))
1487         {
1488           if (TYPE_MODE (type) == mode)
1489             alignment = TYPE_ALIGN (type);
1490           else
1491             alignment = GET_MODE_ALIGNMENT (mode);
1492         }
1493       else
1494         alignment = 0;
1495     }
1496   else
1497     alignment = GET_MODE_ALIGNMENT (mode);
1498
1499   return alignment;
1500 }
1501
1502 /* Layout a function argument according to the AAPCS64 rules.  The rule
1503    numbers refer to the rule numbers in the AAPCS64.  */
1504
1505 static void
1506 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1507                     const_tree type,
1508                     bool named ATTRIBUTE_UNUSED)
1509 {
1510   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1511   int ncrn, nvrn, nregs;
1512   bool allocate_ncrn, allocate_nvrn;
1513
1514   /* We need to do this once per argument.  */
1515   if (pcum->aapcs_arg_processed)
1516     return;
1517
1518   pcum->aapcs_arg_processed = true;
1519
1520   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522                                                  mode,
1523                                                  type,
1524                                                  &nregs);
1525
1526   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527      The following code thus handles passing by SIMD/FP registers first.  */
1528
1529   nvrn = pcum->aapcs_nvrn;
1530
1531   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532      and homogenous short-vector aggregates (HVA).  */
1533   if (allocate_nvrn)
1534     {
1535       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1536         {
1537           pcum->aapcs_nextnvrn = nvrn + nregs;
1538           if (!aarch64_composite_type_p (type, mode))
1539             {
1540               gcc_assert (nregs == 1);
1541               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1542             }
1543           else
1544             {
1545               rtx par;
1546               int i;
1547               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548               for (i = 0; i < nregs; i++)
1549                 {
1550                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551                                          V0_REGNUM + nvrn + i);
1552                   tmp = gen_rtx_EXPR_LIST
1553                     (VOIDmode, tmp,
1554                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555                   XVECEXP (par, 0, i) = tmp;
1556                 }
1557               pcum->aapcs_reg = par;
1558             }
1559           return;
1560         }
1561       else
1562         {
1563           /* C.3 NSRN is set to 8.  */
1564           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565           goto on_stack;
1566         }
1567     }
1568
1569   ncrn = pcum->aapcs_ncrn;
1570   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1571            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1572
1573
1574   /* C6 - C9.  though the sign and zero extension semantics are
1575      handled elsewhere.  This is the case where the argument fits
1576      entirely general registers.  */
1577   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1578     {
1579       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1580
1581       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1582
1583       /* C.8 if the argument has an alignment of 16 then the NGRN is
1584          rounded up to the next even number.  */
1585       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1586         {
1587           ++ncrn;
1588           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1589         }
1590       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1591          A reg is still generated for it, but the caller should be smart
1592          enough not to use it.  */
1593       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1594         {
1595           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1596         }
1597       else
1598         {
1599           rtx par;
1600           int i;
1601
1602           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1603           for (i = 0; i < nregs; i++)
1604             {
1605               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1606               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1607                                        GEN_INT (i * UNITS_PER_WORD));
1608               XVECEXP (par, 0, i) = tmp;
1609             }
1610           pcum->aapcs_reg = par;
1611         }
1612
1613       pcum->aapcs_nextncrn = ncrn + nregs;
1614       return;
1615     }
1616
1617   /* C.11  */
1618   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1619
1620   /* The argument is passed on stack; record the needed number of words for
1621      this argument (we can re-use NREGS) and align the total size if
1622      necessary.  */
1623 on_stack:
1624   pcum->aapcs_stack_words = nregs;
1625   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1626     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1627                                                16 / UNITS_PER_WORD) + 1;
1628   return;
1629 }
1630
1631 /* Implement TARGET_FUNCTION_ARG.  */
1632
1633 static rtx
1634 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1635                       const_tree type, bool named)
1636 {
1637   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1638   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1639
1640   if (mode == VOIDmode)
1641     return NULL_RTX;
1642
1643   aarch64_layout_arg (pcum_v, mode, type, named);
1644   return pcum->aapcs_reg;
1645 }
1646
1647 void
1648 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1649                            const_tree fntype ATTRIBUTE_UNUSED,
1650                            rtx libname ATTRIBUTE_UNUSED,
1651                            const_tree fndecl ATTRIBUTE_UNUSED,
1652                            unsigned n_named ATTRIBUTE_UNUSED)
1653 {
1654   pcum->aapcs_ncrn = 0;
1655   pcum->aapcs_nvrn = 0;
1656   pcum->aapcs_nextncrn = 0;
1657   pcum->aapcs_nextnvrn = 0;
1658   pcum->pcs_variant = ARM_PCS_AAPCS64;
1659   pcum->aapcs_reg = NULL_RTX;
1660   pcum->aapcs_arg_processed = false;
1661   pcum->aapcs_stack_words = 0;
1662   pcum->aapcs_stack_size = 0;
1663
1664   return;
1665 }
1666
1667 static void
1668 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1669                               enum machine_mode mode,
1670                               const_tree type,
1671                               bool named)
1672 {
1673   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1674   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1675     {
1676       aarch64_layout_arg (pcum_v, mode, type, named);
1677       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1678                   != (pcum->aapcs_stack_words != 0));
1679       pcum->aapcs_arg_processed = false;
1680       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1681       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1682       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1683       pcum->aapcs_stack_words = 0;
1684       pcum->aapcs_reg = NULL_RTX;
1685     }
1686 }
1687
1688 bool
1689 aarch64_function_arg_regno_p (unsigned regno)
1690 {
1691   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1692           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1693 }
1694
1695 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1696    PARM_BOUNDARY bits of alignment, but will be given anything up
1697    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1698    that both before and after the layout of each argument, the Next
1699    Stacked Argument Address (NSAA) will have a minimum alignment of
1700    8 bytes.  */
1701
1702 static unsigned int
1703 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1704 {
1705   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1706
1707   if (alignment < PARM_BOUNDARY)
1708     alignment = PARM_BOUNDARY;
1709   if (alignment > STACK_BOUNDARY)
1710     alignment = STACK_BOUNDARY;
1711   return alignment;
1712 }
1713
1714 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1715
1716    Return true if an argument passed on the stack should be padded upwards,
1717    i.e. if the least-significant byte of the stack slot has useful data.
1718
1719    Small aggregate types are placed in the lowest memory address.
1720
1721    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1722
1723 bool
1724 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1725 {
1726   /* On little-endian targets, the least significant byte of every stack
1727      argument is passed at the lowest byte address of the stack slot.  */
1728   if (!BYTES_BIG_ENDIAN)
1729     return true;
1730
1731   /* Otherwise, integral, floating-point and pointer types are padded downward:
1732      the least significant byte of a stack argument is passed at the highest
1733      byte address of the stack slot.  */
1734   if (type
1735       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1736          || POINTER_TYPE_P (type))
1737       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1738     return false;
1739
1740   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1741   return true;
1742 }
1743
1744 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1745
1746    It specifies padding for the last (may also be the only)
1747    element of a block move between registers and memory.  If
1748    assuming the block is in the memory, padding upward means that
1749    the last element is padded after its highest significant byte,
1750    while in downward padding, the last element is padded at the
1751    its least significant byte side.
1752
1753    Small aggregates and small complex types are always padded
1754    upwards.
1755
1756    We don't need to worry about homogeneous floating-point or
1757    short-vector aggregates; their move is not affected by the
1758    padding direction determined here.  Regardless of endianness,
1759    each element of such an aggregate is put in the least
1760    significant bits of a fp/simd register.
1761
1762    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1763    register has useful data, and return the opposite if the most
1764    significant byte does.  */
1765
1766 bool
1767 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1768                      bool first ATTRIBUTE_UNUSED)
1769 {
1770
1771   /* Small composite types are always padded upward.  */
1772   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1773     {
1774       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1775                             : GET_MODE_SIZE (mode));
1776       if (size < 2 * UNITS_PER_WORD)
1777         return true;
1778     }
1779
1780   /* Otherwise, use the default padding.  */
1781   return !BYTES_BIG_ENDIAN;
1782 }
1783
1784 static enum machine_mode
1785 aarch64_libgcc_cmp_return_mode (void)
1786 {
1787   return SImode;
1788 }
1789
1790 static bool
1791 aarch64_frame_pointer_required (void)
1792 {
1793   /* If the function contains dynamic stack allocations, we need to
1794      use the frame pointer to access the static parts of the frame.  */
1795   if (cfun->calls_alloca)
1796     return true;
1797
1798   /* In aarch64_override_options_after_change
1799      flag_omit_leaf_frame_pointer turns off the frame pointer by
1800      default.  Turn it back on now if we've not got a leaf
1801      function.  */
1802   if (flag_omit_leaf_frame_pointer
1803       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1804     return true;
1805
1806   return false;
1807 }
1808
1809 /* Mark the registers that need to be saved by the callee and calculate
1810    the size of the callee-saved registers area and frame record (both FP
1811    and LR may be omitted).  */
1812 static void
1813 aarch64_layout_frame (void)
1814 {
1815   HOST_WIDE_INT offset = 0;
1816   int regno;
1817
1818   if (reload_completed && cfun->machine->frame.laid_out)
1819     return;
1820
1821   /* First mark all the registers that really need to be saved...  */
1822   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1823     cfun->machine->frame.reg_offset[regno] = -1;
1824
1825   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1826     cfun->machine->frame.reg_offset[regno] = -1;
1827
1828   /* ... that includes the eh data registers (if needed)...  */
1829   if (crtl->calls_eh_return)
1830     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1831       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1832
1833   /* ... and any callee saved register that dataflow says is live.  */
1834   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1835     if (df_regs_ever_live_p (regno)
1836         && !call_used_regs[regno])
1837       cfun->machine->frame.reg_offset[regno] = 0;
1838
1839   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1840     if (df_regs_ever_live_p (regno)
1841         && !call_used_regs[regno])
1842       cfun->machine->frame.reg_offset[regno] = 0;
1843
1844   if (frame_pointer_needed)
1845     {
1846       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1847       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1848       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1849     }
1850
1851   /* Now assign stack slots for them.  */
1852   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1853     if (cfun->machine->frame.reg_offset[regno] != -1)
1854       {
1855         cfun->machine->frame.reg_offset[regno] = offset;
1856         offset += UNITS_PER_WORD;
1857       }
1858
1859   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1860     if (cfun->machine->frame.reg_offset[regno] != -1)
1861       {
1862         cfun->machine->frame.reg_offset[regno] = offset;
1863         offset += UNITS_PER_WORD;
1864       }
1865
1866   if (frame_pointer_needed)
1867     {
1868       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1869       offset += UNITS_PER_WORD;
1870     }
1871
1872   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1873     {
1874       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1875       offset += UNITS_PER_WORD;
1876     }
1877
1878   cfun->machine->frame.padding0 =
1879     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1880   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1881
1882   cfun->machine->frame.saved_regs_size = offset;
1883   cfun->machine->frame.laid_out = true;
1884 }
1885
1886 /* Make the last instruction frame-related and note that it performs
1887    the operation described by FRAME_PATTERN.  */
1888
1889 static void
1890 aarch64_set_frame_expr (rtx frame_pattern)
1891 {
1892   rtx insn;
1893
1894   insn = get_last_insn ();
1895   RTX_FRAME_RELATED_P (insn) = 1;
1896   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1897   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1898                                       frame_pattern,
1899                                       REG_NOTES (insn));
1900 }
1901
1902 static bool
1903 aarch64_register_saved_on_entry (int regno)
1904 {
1905   return cfun->machine->frame.reg_offset[regno] != -1;
1906 }
1907
1908
1909 static void
1910 aarch64_save_or_restore_fprs (int start_offset, int increment,
1911                               bool restore, rtx base_rtx)
1912
1913 {
1914   unsigned regno;
1915   unsigned regno2;
1916   rtx insn;
1917   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1918     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1919
1920
1921   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1922     {
1923       if (aarch64_register_saved_on_entry (regno))
1924         {
1925           rtx mem;
1926           mem = gen_mem_ref (DFmode,
1927                              plus_constant (Pmode,
1928                                             base_rtx,
1929                                             start_offset));
1930
1931           for (regno2 = regno + 1;
1932                regno2 <= V31_REGNUM
1933                  && !aarch64_register_saved_on_entry (regno2);
1934                regno2++)
1935             {
1936               /* Empty loop.  */
1937             }
1938           if (regno2 <= V31_REGNUM &&
1939               aarch64_register_saved_on_entry (regno2))
1940             {
1941               rtx mem2;
1942               /* Next highest register to be saved.  */
1943               mem2 = gen_mem_ref (DFmode,
1944                                   plus_constant
1945                                   (Pmode,
1946                                    base_rtx,
1947                                    start_offset + increment));
1948               if (restore == false)
1949                 {
1950                   insn = emit_insn
1951                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1952                                         mem2, gen_rtx_REG (DFmode, regno2)));
1953
1954                 }
1955               else
1956                 {
1957                   insn = emit_insn
1958                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1959                                        gen_rtx_REG (DFmode, regno2), mem2));
1960
1961                   add_reg_note (insn, REG_CFA_RESTORE,
1962                                 gen_rtx_REG (DFmode, regno));
1963                   add_reg_note (insn, REG_CFA_RESTORE,
1964                                 gen_rtx_REG (DFmode, regno2));
1965                 }
1966
1967                   /* The first part of a frame-related parallel insn
1968                      is always assumed to be relevant to the frame
1969                      calculations; subsequent parts, are only
1970                      frame-related if explicitly marked.  */
1971               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1972               regno = regno2;
1973               start_offset += increment * 2;
1974             }
1975           else
1976             {
1977               if (restore == false)
1978                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1979               else
1980                 {
1981                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1982                   add_reg_note (insn, REG_CFA_RESTORE,
1983                                 gen_rtx_REG (DImode, regno));
1984                 }
1985               start_offset += increment;
1986             }
1987           RTX_FRAME_RELATED_P (insn) = 1;
1988         }
1989     }
1990
1991 }
1992
1993
1994 /* offset from the stack pointer of where the saves and
1995    restore's have to happen.  */
1996 static void
1997 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1998                                             bool restore)
1999 {
2000   rtx insn;
2001   rtx base_rtx = stack_pointer_rtx;
2002   HOST_WIDE_INT start_offset = offset;
2003   HOST_WIDE_INT increment = UNITS_PER_WORD;
2004   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2005   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2006   unsigned regno;
2007   unsigned regno2;
2008
2009   for (regno = R0_REGNUM; regno <= limit; regno++)
2010     {
2011       if (aarch64_register_saved_on_entry (regno))
2012         {
2013           rtx mem;
2014           mem = gen_mem_ref (Pmode,
2015                              plus_constant (Pmode,
2016                                             base_rtx,
2017                                             start_offset));
2018
2019           for (regno2 = regno + 1;
2020                regno2 <= limit
2021                  && !aarch64_register_saved_on_entry (regno2);
2022                regno2++)
2023             {
2024               /* Empty loop.  */
2025             }
2026           if (regno2 <= limit &&
2027               aarch64_register_saved_on_entry (regno2))
2028             {
2029               rtx mem2;
2030               /* Next highest register to be saved.  */
2031               mem2 = gen_mem_ref (Pmode,
2032                                   plus_constant
2033                                   (Pmode,
2034                                    base_rtx,
2035                                    start_offset + increment));
2036               if (restore == false)
2037                 {
2038                   insn = emit_insn
2039                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2040                                         mem2, gen_rtx_REG (DImode, regno2)));
2041
2042                 }
2043               else
2044                 {
2045                   insn = emit_insn
2046                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2047                                      gen_rtx_REG (DImode, regno2), mem2));
2048
2049                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2050                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2051                 }
2052
2053                   /* The first part of a frame-related parallel insn
2054                      is always assumed to be relevant to the frame
2055                      calculations; subsequent parts, are only
2056                      frame-related if explicitly marked.  */
2057               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2058                                             1)) = 1;
2059               regno = regno2;
2060               start_offset += increment * 2;
2061             }
2062           else
2063             {
2064               if (restore == false)
2065                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2066               else
2067                 {
2068                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2069                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2070                 }
2071               start_offset += increment;
2072             }
2073           RTX_FRAME_RELATED_P (insn) = 1;
2074         }
2075     }
2076
2077   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2078
2079 }
2080
2081 /* AArch64 stack frames generated by this compiler look like:
2082
2083         +-------------------------------+
2084         |                               |
2085         |  incoming stack arguments     |
2086         |                               |
2087         +-------------------------------+ <-- arg_pointer_rtx
2088         |                               |
2089         |  callee-allocated save area   |
2090         |  for register varargs         |
2091         |                               |
2092         +-------------------------------+ <-- frame_pointer_rtx
2093         |                               |
2094         |  local variables              |
2095         |                               |
2096         +-------------------------------+
2097         |  padding0                     | \
2098         +-------------------------------+  |
2099         |                               |  |
2100         |                               |  |
2101         |  callee-saved registers       |  | frame.saved_regs_size
2102         |                               |  |
2103         +-------------------------------+  |
2104         |  LR'                          |  |
2105         +-------------------------------+  |
2106         |  FP'                          | /
2107       P +-------------------------------+ <-- hard_frame_pointer_rtx
2108         |  dynamic allocation           |
2109         +-------------------------------+
2110         |                               |
2111         |  outgoing stack arguments     |
2112         |                               |
2113         +-------------------------------+ <-- stack_pointer_rtx
2114
2115    Dynamic stack allocations such as alloca insert data at point P.
2116    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2117    hard_frame_pointer_rtx unchanged.  */
2118
2119 /* Generate the prologue instructions for entry into a function.
2120    Establish the stack frame by decreasing the stack pointer with a
2121    properly calculated size and, if necessary, create a frame record
2122    filled with the values of LR and previous frame pointer.  The
2123    current FP is also set up if it is in use.  */
2124
2125 void
2126 aarch64_expand_prologue (void)
2127 {
2128   /* sub sp, sp, #<frame_size>
2129      stp {fp, lr}, [sp, #<frame_size> - 16]
2130      add fp, sp, #<frame_size> - hardfp_offset
2131      stp {cs_reg}, [fp, #-16] etc.
2132
2133      sub sp, sp, <final_adjustment_if_any>
2134   */
2135   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2136   HOST_WIDE_INT frame_size, offset;
2137   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2138   rtx insn;
2139
2140   aarch64_layout_frame ();
2141   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2142   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2143               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2144   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2145                 + crtl->outgoing_args_size);
2146   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2147                                           STACK_BOUNDARY / BITS_PER_UNIT);
2148
2149   if (flag_stack_usage_info)
2150     current_function_static_stack_size = frame_size;
2151
2152   fp_offset = (offset
2153                - original_frame_size
2154                - cfun->machine->frame.saved_regs_size);
2155
2156   /* Store pairs and load pairs have a range only -512 to 504.  */
2157   if (offset >= 512)
2158     {
2159       /* When the frame has a large size, an initial decrease is done on
2160          the stack pointer to jump over the callee-allocated save area for
2161          register varargs, the local variable area and/or the callee-saved
2162          register area.  This will allow the pre-index write-back
2163          store pair instructions to be used for setting up the stack frame
2164          efficiently.  */
2165       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2166       if (offset >= 512)
2167         offset = cfun->machine->frame.saved_regs_size;
2168
2169       frame_size -= (offset + crtl->outgoing_args_size);
2170       fp_offset = 0;
2171
2172       if (frame_size >= 0x1000000)
2173         {
2174           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2175           emit_move_insn (op0, GEN_INT (-frame_size));
2176           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2177           aarch64_set_frame_expr (gen_rtx_SET
2178                                   (Pmode, stack_pointer_rtx,
2179                                    plus_constant (Pmode,
2180                                                   stack_pointer_rtx,
2181                                                   -frame_size)));
2182         }
2183       else if (frame_size > 0)
2184         {
2185           if ((frame_size & 0xfff) != frame_size)
2186             {
2187               insn = emit_insn (gen_add2_insn
2188                                 (stack_pointer_rtx,
2189                                  GEN_INT (-(frame_size
2190                                             & ~(HOST_WIDE_INT)0xfff))));
2191               RTX_FRAME_RELATED_P (insn) = 1;
2192             }
2193           if ((frame_size & 0xfff) != 0)
2194             {
2195               insn = emit_insn (gen_add2_insn
2196                                 (stack_pointer_rtx,
2197                                  GEN_INT (-(frame_size
2198                                             & (HOST_WIDE_INT)0xfff))));
2199               RTX_FRAME_RELATED_P (insn) = 1;
2200             }
2201         }
2202     }
2203   else
2204     frame_size = -1;
2205
2206   if (offset > 0)
2207     {
2208       /* Save the frame pointer and lr if the frame pointer is needed
2209          first.  Make the frame pointer point to the location of the
2210          old frame pointer on the stack.  */
2211       if (frame_pointer_needed)
2212         {
2213           rtx mem_fp, mem_lr;
2214
2215           if (fp_offset)
2216             {
2217               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2218                                                GEN_INT (-offset)));
2219               RTX_FRAME_RELATED_P (insn) = 1;
2220               aarch64_set_frame_expr (gen_rtx_SET
2221                                       (Pmode, stack_pointer_rtx,
2222                                        gen_rtx_MINUS (Pmode,
2223                                                       stack_pointer_rtx,
2224                                                       GEN_INT (offset))));
2225               mem_fp = gen_frame_mem (DImode,
2226                                       plus_constant (Pmode,
2227                                                      stack_pointer_rtx,
2228                                                      fp_offset));
2229               mem_lr = gen_frame_mem (DImode,
2230                                       plus_constant (Pmode,
2231                                                      stack_pointer_rtx,
2232                                                      fp_offset
2233                                                      + UNITS_PER_WORD));
2234               insn = emit_insn (gen_store_pairdi (mem_fp,
2235                                                   hard_frame_pointer_rtx,
2236                                                   mem_lr,
2237                                                   gen_rtx_REG (DImode,
2238                                                                LR_REGNUM)));
2239             }
2240           else
2241             {
2242               insn = emit_insn (gen_storewb_pairdi_di
2243                                 (stack_pointer_rtx, stack_pointer_rtx,
2244                                  hard_frame_pointer_rtx,
2245                                  gen_rtx_REG (DImode, LR_REGNUM),
2246                                  GEN_INT (-offset),
2247                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2248               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2249             }
2250
2251           /* The first part of a frame-related parallel insn is always
2252              assumed to be relevant to the frame calculations;
2253              subsequent parts, are only frame-related if explicitly
2254              marked.  */
2255           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2256           RTX_FRAME_RELATED_P (insn) = 1;
2257
2258           /* Set up frame pointer to point to the location of the
2259              previous frame pointer on the stack.  */
2260           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2261                                            stack_pointer_rtx,
2262                                            GEN_INT (fp_offset)));
2263           aarch64_set_frame_expr (gen_rtx_SET
2264                                   (Pmode, hard_frame_pointer_rtx,
2265                                    plus_constant (Pmode,
2266                                                   stack_pointer_rtx,
2267                                                   fp_offset)));
2268           RTX_FRAME_RELATED_P (insn) = 1;
2269           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2270                                            hard_frame_pointer_rtx));
2271         }
2272       else
2273         {
2274           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2275                                            GEN_INT (-offset)));
2276           RTX_FRAME_RELATED_P (insn) = 1;
2277         }
2278
2279       aarch64_save_or_restore_callee_save_registers
2280         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2281     }
2282
2283   /* when offset >= 512,
2284      sub sp, sp, #<outgoing_args_size> */
2285   if (frame_size > -1)
2286     {
2287       if (crtl->outgoing_args_size > 0)
2288         {
2289           insn = emit_insn (gen_add2_insn
2290                             (stack_pointer_rtx,
2291                              GEN_INT (- crtl->outgoing_args_size)));
2292           RTX_FRAME_RELATED_P (insn) = 1;
2293         }
2294     }
2295 }
2296
2297 /* Generate the epilogue instructions for returning from a function.  */
2298 void
2299 aarch64_expand_epilogue (bool for_sibcall)
2300 {
2301   HOST_WIDE_INT original_frame_size, frame_size, offset;
2302   HOST_WIDE_INT fp_offset;
2303   rtx insn;
2304   rtx cfa_reg;
2305
2306   aarch64_layout_frame ();
2307   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2308   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2309                 + crtl->outgoing_args_size);
2310   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2311                                           STACK_BOUNDARY / BITS_PER_UNIT);
2312
2313   fp_offset = (offset
2314                - original_frame_size
2315                - cfun->machine->frame.saved_regs_size);
2316
2317   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2318
2319   /* Store pairs and load pairs have a range only -512 to 504.  */
2320   if (offset >= 512)
2321     {
2322       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2323       if (offset >= 512)
2324         offset = cfun->machine->frame.saved_regs_size;
2325
2326       frame_size -= (offset + crtl->outgoing_args_size);
2327       fp_offset = 0;
2328       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2329         {
2330           insn = emit_insn (gen_add2_insn
2331                             (stack_pointer_rtx,
2332                              GEN_INT (crtl->outgoing_args_size)));
2333           RTX_FRAME_RELATED_P (insn) = 1;
2334         }
2335     }
2336   else
2337     frame_size = -1;
2338
2339   /* If there were outgoing arguments or we've done dynamic stack
2340      allocation, then restore the stack pointer from the frame
2341      pointer.  This is at most one insn and more efficient than using
2342      GCC's internal mechanism.  */
2343   if (frame_pointer_needed
2344       && (crtl->outgoing_args_size || cfun->calls_alloca))
2345     {
2346       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2347                                        hard_frame_pointer_rtx,
2348                                        GEN_INT (- fp_offset)));
2349       RTX_FRAME_RELATED_P (insn) = 1;
2350       /* As SP is set to (FP - fp_offset), according to the rules in
2351          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2352          from the value of SP from now on.  */
2353       cfa_reg = stack_pointer_rtx;
2354     }
2355
2356   aarch64_save_or_restore_callee_save_registers
2357     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2358
2359   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2360   if (offset > 0)
2361     {
2362       if (frame_pointer_needed)
2363         {
2364           rtx mem_fp, mem_lr;
2365
2366           if (fp_offset)
2367             {
2368               mem_fp = gen_frame_mem (DImode,
2369                                       plus_constant (Pmode,
2370                                                      stack_pointer_rtx,
2371                                                      fp_offset));
2372               mem_lr = gen_frame_mem (DImode,
2373                                       plus_constant (Pmode,
2374                                                      stack_pointer_rtx,
2375                                                      fp_offset
2376                                                      + UNITS_PER_WORD));
2377               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2378                                                  mem_fp,
2379                                                  gen_rtx_REG (DImode,
2380                                                               LR_REGNUM),
2381                                                  mem_lr));
2382             }
2383           else
2384             {
2385               insn = emit_insn (gen_loadwb_pairdi_di
2386                                 (stack_pointer_rtx,
2387                                  stack_pointer_rtx,
2388                                  hard_frame_pointer_rtx,
2389                                  gen_rtx_REG (DImode, LR_REGNUM),
2390                                  GEN_INT (offset),
2391                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2392               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2393               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2394                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2395                                           plus_constant (Pmode, cfa_reg,
2396                                                          offset))));
2397             }
2398
2399           /* The first part of a frame-related parallel insn
2400              is always assumed to be relevant to the frame
2401              calculations; subsequent parts, are only
2402              frame-related if explicitly marked.  */
2403           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2404           RTX_FRAME_RELATED_P (insn) = 1;
2405           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2406           add_reg_note (insn, REG_CFA_RESTORE,
2407                         gen_rtx_REG (DImode, LR_REGNUM));
2408
2409           if (fp_offset)
2410             {
2411               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2412                                                GEN_INT (offset)));
2413               RTX_FRAME_RELATED_P (insn) = 1;
2414             }
2415         }
2416       else
2417         {
2418           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2419                                            GEN_INT (offset)));
2420           RTX_FRAME_RELATED_P (insn) = 1;
2421         }
2422     }
2423
2424   /* Stack adjustment for exception handler.  */
2425   if (crtl->calls_eh_return)
2426     {
2427       /* We need to unwind the stack by the offset computed by
2428          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2429          based on SP.  Ideally we would update the SP and define the
2430          CFA along the lines of:
2431
2432          SP = SP + EH_RETURN_STACKADJ_RTX
2433          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2434
2435          However the dwarf emitter only understands a constant
2436          register offset.
2437
2438          The solution chosen here is to use the otherwise unused IP0
2439          as a temporary register to hold the current SP value.  The
2440          CFA is described using IP0 then SP is modified.  */
2441
2442       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2443
2444       insn = emit_move_insn (ip0, stack_pointer_rtx);
2445       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2446       RTX_FRAME_RELATED_P (insn) = 1;
2447
2448       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2449
2450       /* Ensure the assignment to IP0 does not get optimized away.  */
2451       emit_use (ip0);
2452     }
2453
2454   if (frame_size > -1)
2455     {
2456       if (frame_size >= 0x1000000)
2457         {
2458           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2459           emit_move_insn (op0, GEN_INT (frame_size));
2460           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2461           aarch64_set_frame_expr (gen_rtx_SET
2462                                   (Pmode, stack_pointer_rtx,
2463                                    plus_constant (Pmode,
2464                                                   stack_pointer_rtx,
2465                                                   frame_size)));
2466         }
2467       else if (frame_size > 0)
2468         {
2469           if ((frame_size & 0xfff) != 0)
2470             {
2471               insn = emit_insn (gen_add2_insn
2472                                 (stack_pointer_rtx,
2473                                  GEN_INT ((frame_size
2474                                            & (HOST_WIDE_INT) 0xfff))));
2475               RTX_FRAME_RELATED_P (insn) = 1;
2476             }
2477           if ((frame_size & 0xfff) != frame_size)
2478             {
2479               insn = emit_insn (gen_add2_insn
2480                                 (stack_pointer_rtx,
2481                                  GEN_INT ((frame_size
2482                                            & ~ (HOST_WIDE_INT) 0xfff))));
2483               RTX_FRAME_RELATED_P (insn) = 1;
2484             }
2485         }
2486
2487         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2488                                              plus_constant (Pmode,
2489                                                             stack_pointer_rtx,
2490                                                             offset)));
2491     }
2492
2493   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2494   if (!for_sibcall)
2495     emit_jump_insn (ret_rtx);
2496 }
2497
2498 /* Return the place to copy the exception unwinding return address to.
2499    This will probably be a stack slot, but could (in theory be the
2500    return register).  */
2501 rtx
2502 aarch64_final_eh_return_addr (void)
2503 {
2504   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2505   aarch64_layout_frame ();
2506   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2507   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2508                 + crtl->outgoing_args_size);
2509   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2510                                           STACK_BOUNDARY / BITS_PER_UNIT);
2511   fp_offset = offset
2512     - original_frame_size
2513     - cfun->machine->frame.saved_regs_size;
2514
2515   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2516     return gen_rtx_REG (DImode, LR_REGNUM);
2517
2518   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2519      result in a store to save LR introduced by builtin_eh_return () being
2520      incorrectly deleted because the alias is not detected.
2521      So in the calculation of the address to copy the exception unwinding
2522      return address to, we note 2 cases.
2523      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2524      we return a SP-relative location since all the addresses are SP-relative
2525      in this case.  This prevents the store from being optimized away.
2526      If the fp_offset is not 0, then the addresses will be FP-relative and
2527      therefore we return a FP-relative location.  */
2528
2529   if (frame_pointer_needed)
2530     {
2531       if (fp_offset)
2532         return gen_frame_mem (DImode,
2533                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2534       else
2535         return gen_frame_mem (DImode,
2536                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2537     }
2538
2539   /* If FP is not needed, we calculate the location of LR, which would be
2540      at the top of the saved registers block.  */
2541
2542   return gen_frame_mem (DImode,
2543                         plus_constant (Pmode,
2544                                        stack_pointer_rtx,
2545                                        fp_offset
2546                                        + cfun->machine->frame.saved_regs_size
2547                                        - 2 * UNITS_PER_WORD));
2548 }
2549
2550 /* Possibly output code to build up a constant in a register.  For
2551    the benefit of the costs infrastructure, returns the number of
2552    instructions which would be emitted.  GENERATE inhibits or
2553    enables code generation.  */
2554
2555 static int
2556 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2557 {
2558   int insns = 0;
2559
2560   if (aarch64_bitmask_imm (val, DImode))
2561     {
2562       if (generate)
2563         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2564       insns = 1;
2565     }
2566   else
2567     {
2568       int i;
2569       int ncount = 0;
2570       int zcount = 0;
2571       HOST_WIDE_INT valp = val >> 16;
2572       HOST_WIDE_INT valm;
2573       HOST_WIDE_INT tval;
2574
2575       for (i = 16; i < 64; i += 16)
2576         {
2577           valm = (valp & 0xffff);
2578
2579           if (valm != 0)
2580             ++ zcount;
2581
2582           if (valm != 0xffff)
2583             ++ ncount;
2584
2585           valp >>= 16;
2586         }
2587
2588       /* zcount contains the number of additional MOVK instructions
2589          required if the constant is built up with an initial MOVZ instruction,
2590          while ncount is the number of MOVK instructions required if starting
2591          with a MOVN instruction.  Choose the sequence that yields the fewest
2592          number of instructions, preferring MOVZ instructions when they are both
2593          the same.  */
2594       if (ncount < zcount)
2595         {
2596           if (generate)
2597             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2598                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2599           tval = 0xffff;
2600           insns++;
2601         }
2602       else
2603         {
2604           if (generate)
2605             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2606                             GEN_INT (val & 0xffff));
2607           tval = 0;
2608           insns++;
2609         }
2610
2611       val >>= 16;
2612
2613       for (i = 16; i < 64; i += 16)
2614         {
2615           if ((val & 0xffff) != tval)
2616             {
2617               if (generate)
2618                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2619                                            GEN_INT (i),
2620                                            GEN_INT (val & 0xffff)));
2621               insns++;
2622             }
2623           val >>= 16;
2624         }
2625     }
2626   return insns;
2627 }
2628
2629 static void
2630 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2631 {
2632   HOST_WIDE_INT mdelta = delta;
2633   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2634   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2635
2636   if (mdelta < 0)
2637     mdelta = -mdelta;
2638
2639   if (mdelta >= 4096 * 4096)
2640     {
2641       (void) aarch64_build_constant (scratchreg, delta, true);
2642       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2643     }
2644   else if (mdelta > 0)
2645     {
2646       if (mdelta >= 4096)
2647         {
2648           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2649           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2650           if (delta < 0)
2651             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2652                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2653           else
2654             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2655                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2656         }
2657       if (mdelta % 4096 != 0)
2658         {
2659           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2660           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2661                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2662         }
2663     }
2664 }
2665
2666 /* Output code to add DELTA to the first argument, and then jump
2667    to FUNCTION.  Used for C++ multiple inheritance.  */
2668 static void
2669 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2670                          HOST_WIDE_INT delta,
2671                          HOST_WIDE_INT vcall_offset,
2672                          tree function)
2673 {
2674   /* The this pointer is always in x0.  Note that this differs from
2675      Arm where the this pointer maybe bumped to r1 if r0 is required
2676      to return a pointer to an aggregate.  On AArch64 a result value
2677      pointer will be in x8.  */
2678   int this_regno = R0_REGNUM;
2679   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2680
2681   reload_completed = 1;
2682   emit_note (NOTE_INSN_PROLOGUE_END);
2683
2684   if (vcall_offset == 0)
2685     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2686   else
2687     {
2688       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2689
2690       this_rtx = gen_rtx_REG (Pmode, this_regno);
2691       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2692       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2693
2694       addr = this_rtx;
2695       if (delta != 0)
2696         {
2697           if (delta >= -256 && delta < 256)
2698             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2699                                        plus_constant (Pmode, this_rtx, delta));
2700           else
2701             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2702         }
2703
2704       if (Pmode == ptr_mode)
2705         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2706       else
2707         aarch64_emit_move (temp0,
2708                            gen_rtx_ZERO_EXTEND (Pmode,
2709                                                 gen_rtx_MEM (ptr_mode, addr)));
2710
2711       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2712           addr = plus_constant (Pmode, temp0, vcall_offset);
2713       else
2714         {
2715           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2716           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2717         }
2718
2719       if (Pmode == ptr_mode)
2720         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2721       else
2722         aarch64_emit_move (temp1,
2723                            gen_rtx_SIGN_EXTEND (Pmode,
2724                                                 gen_rtx_MEM (ptr_mode, addr)));
2725
2726       emit_insn (gen_add2_insn (this_rtx, temp1));
2727     }
2728
2729   /* Generate a tail call to the target function.  */
2730   if (!TREE_USED (function))
2731     {
2732       assemble_external (function);
2733       TREE_USED (function) = 1;
2734     }
2735   funexp = XEXP (DECL_RTL (function), 0);
2736   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2737   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2738   SIBLING_CALL_P (insn) = 1;
2739
2740   insn = get_insns ();
2741   shorten_branches (insn);
2742   final_start_function (insn, file, 1);
2743   final (insn, file, 1);
2744   final_end_function ();
2745
2746   /* Stop pretending to be a post-reload pass.  */
2747   reload_completed = 0;
2748 }
2749
2750 static int
2751 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2752 {
2753   if (GET_CODE (*x) == SYMBOL_REF)
2754     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2755
2756   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2757      TLS offsets, not real symbol references.  */
2758   if (GET_CODE (*x) == UNSPEC
2759       && XINT (*x, 1) == UNSPEC_TLS)
2760     return -1;
2761
2762   return 0;
2763 }
2764
2765 static bool
2766 aarch64_tls_referenced_p (rtx x)
2767 {
2768   if (!TARGET_HAVE_TLS)
2769     return false;
2770
2771   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2772 }
2773
2774
2775 static int
2776 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2777 {
2778   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2779   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2780
2781   if (*imm1 < *imm2)
2782     return -1;
2783   if (*imm1 > *imm2)
2784     return +1;
2785   return 0;
2786 }
2787
2788
2789 static void
2790 aarch64_build_bitmask_table (void)
2791 {
2792   unsigned HOST_WIDE_INT mask, imm;
2793   unsigned int log_e, e, s, r;
2794   unsigned int nimms = 0;
2795
2796   for (log_e = 1; log_e <= 6; log_e++)
2797     {
2798       e = 1 << log_e;
2799       if (e == 64)
2800         mask = ~(HOST_WIDE_INT) 0;
2801       else
2802         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2803       for (s = 1; s < e; s++)
2804         {
2805           for (r = 0; r < e; r++)
2806             {
2807               /* set s consecutive bits to 1 (s < 64) */
2808               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2809               /* rotate right by r */
2810               if (r != 0)
2811                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2812               /* replicate the constant depending on SIMD size */
2813               switch (log_e) {
2814               case 1: imm |= (imm <<  2);
2815               case 2: imm |= (imm <<  4);
2816               case 3: imm |= (imm <<  8);
2817               case 4: imm |= (imm << 16);
2818               case 5: imm |= (imm << 32);
2819               case 6:
2820                 break;
2821               default:
2822                 gcc_unreachable ();
2823               }
2824               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2825               aarch64_bitmasks[nimms++] = imm;
2826             }
2827         }
2828     }
2829
2830   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2831   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2832          aarch64_bitmasks_cmp);
2833 }
2834
2835
2836 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2837    a left shift of 0 or 12 bits.  */
2838 bool
2839 aarch64_uimm12_shift (HOST_WIDE_INT val)
2840 {
2841   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2842           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2843           );
2844 }
2845
2846
2847 /* Return true if val is an immediate that can be loaded into a
2848    register by a MOVZ instruction.  */
2849 static bool
2850 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2851 {
2852   if (GET_MODE_SIZE (mode) > 4)
2853     {
2854       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2855           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2856         return 1;
2857     }
2858   else
2859     {
2860       /* Ignore sign extension.  */
2861       val &= (HOST_WIDE_INT) 0xffffffff;
2862     }
2863   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2864           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2865 }
2866
2867
2868 /* Return true if val is a valid bitmask immediate.  */
2869 bool
2870 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2871 {
2872   if (GET_MODE_SIZE (mode) < 8)
2873     {
2874       /* Replicate bit pattern.  */
2875       val &= (HOST_WIDE_INT) 0xffffffff;
2876       val |= val << 32;
2877     }
2878   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2879                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2880 }
2881
2882
2883 /* Return true if val is an immediate that can be loaded into a
2884    register in a single instruction.  */
2885 bool
2886 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2887 {
2888   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2889     return 1;
2890   return aarch64_bitmask_imm (val, mode);
2891 }
2892
2893 static bool
2894 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2895 {
2896   rtx base, offset;
2897
2898   if (GET_CODE (x) == HIGH)
2899     return true;
2900
2901   split_const (x, &base, &offset);
2902   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2903     {
2904       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2905           != SYMBOL_FORCE_TO_MEM)
2906         return true;
2907       else
2908         /* Avoid generating a 64-bit relocation in ILP32; leave
2909            to aarch64_expand_mov_immediate to handle it properly.  */
2910         return mode != ptr_mode;
2911     }
2912
2913   return aarch64_tls_referenced_p (x);
2914 }
2915
2916 /* Return true if register REGNO is a valid index register.
2917    STRICT_P is true if REG_OK_STRICT is in effect.  */
2918
2919 bool
2920 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2921 {
2922   if (!HARD_REGISTER_NUM_P (regno))
2923     {
2924       if (!strict_p)
2925         return true;
2926
2927       if (!reg_renumber)
2928         return false;
2929
2930       regno = reg_renumber[regno];
2931     }
2932   return GP_REGNUM_P (regno);
2933 }
2934
2935 /* Return true if register REGNO is a valid base register for mode MODE.
2936    STRICT_P is true if REG_OK_STRICT is in effect.  */
2937
2938 bool
2939 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2940 {
2941   if (!HARD_REGISTER_NUM_P (regno))
2942     {
2943       if (!strict_p)
2944         return true;
2945
2946       if (!reg_renumber)
2947         return false;
2948
2949       regno = reg_renumber[regno];
2950     }
2951
2952   /* The fake registers will be eliminated to either the stack or
2953      hard frame pointer, both of which are usually valid base registers.
2954      Reload deals with the cases where the eliminated form isn't valid.  */
2955   return (GP_REGNUM_P (regno)
2956           || regno == SP_REGNUM
2957           || regno == FRAME_POINTER_REGNUM
2958           || regno == ARG_POINTER_REGNUM);
2959 }
2960
2961 /* Return true if X is a valid base register for mode MODE.
2962    STRICT_P is true if REG_OK_STRICT is in effect.  */
2963
2964 static bool
2965 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2966 {
2967   if (!strict_p && GET_CODE (x) == SUBREG)
2968     x = SUBREG_REG (x);
2969
2970   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2971 }
2972
2973 /* Return true if address offset is a valid index.  If it is, fill in INFO
2974    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2975
2976 static bool
2977 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2978                         enum machine_mode mode, bool strict_p)
2979 {
2980   enum aarch64_address_type type;
2981   rtx index;
2982   int shift;
2983
2984   /* (reg:P) */
2985   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2986       && GET_MODE (x) == Pmode)
2987     {
2988       type = ADDRESS_REG_REG;
2989       index = x;
2990       shift = 0;
2991     }
2992   /* (sign_extend:DI (reg:SI)) */
2993   else if ((GET_CODE (x) == SIGN_EXTEND
2994             || GET_CODE (x) == ZERO_EXTEND)
2995            && GET_MODE (x) == DImode
2996            && GET_MODE (XEXP (x, 0)) == SImode)
2997     {
2998       type = (GET_CODE (x) == SIGN_EXTEND)
2999         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3000       index = XEXP (x, 0);
3001       shift = 0;
3002     }
3003   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3004   else if (GET_CODE (x) == MULT
3005            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3006                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3007            && GET_MODE (XEXP (x, 0)) == DImode
3008            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3009            && CONST_INT_P (XEXP (x, 1)))
3010     {
3011       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3012         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3013       index = XEXP (XEXP (x, 0), 0);
3014       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3015     }
3016   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3017   else if (GET_CODE (x) == ASHIFT
3018            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3019                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3020            && GET_MODE (XEXP (x, 0)) == DImode
3021            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3022            && CONST_INT_P (XEXP (x, 1)))
3023     {
3024       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3025         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3026       index = XEXP (XEXP (x, 0), 0);
3027       shift = INTVAL (XEXP (x, 1));
3028     }
3029   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3030   else if ((GET_CODE (x) == SIGN_EXTRACT
3031             || GET_CODE (x) == ZERO_EXTRACT)
3032            && GET_MODE (x) == DImode
3033            && GET_CODE (XEXP (x, 0)) == MULT
3034            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3035            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3036     {
3037       type = (GET_CODE (x) == SIGN_EXTRACT)
3038         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3039       index = XEXP (XEXP (x, 0), 0);
3040       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3041       if (INTVAL (XEXP (x, 1)) != 32 + shift
3042           || INTVAL (XEXP (x, 2)) != 0)
3043         shift = -1;
3044     }
3045   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3046      (const_int 0xffffffff<<shift)) */
3047   else if (GET_CODE (x) == AND
3048            && GET_MODE (x) == DImode
3049            && GET_CODE (XEXP (x, 0)) == MULT
3050            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3051            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3052            && CONST_INT_P (XEXP (x, 1)))
3053     {
3054       type = ADDRESS_REG_UXTW;
3055       index = XEXP (XEXP (x, 0), 0);
3056       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3057       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3058         shift = -1;
3059     }
3060   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3061   else if ((GET_CODE (x) == SIGN_EXTRACT
3062             || GET_CODE (x) == ZERO_EXTRACT)
3063            && GET_MODE (x) == DImode
3064            && GET_CODE (XEXP (x, 0)) == ASHIFT
3065            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3066            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3067     {
3068       type = (GET_CODE (x) == SIGN_EXTRACT)
3069         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3070       index = XEXP (XEXP (x, 0), 0);
3071       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3072       if (INTVAL (XEXP (x, 1)) != 32 + shift
3073           || INTVAL (XEXP (x, 2)) != 0)
3074         shift = -1;
3075     }
3076   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3077      (const_int 0xffffffff<<shift)) */
3078   else if (GET_CODE (x) == AND
3079            && GET_MODE (x) == DImode
3080            && GET_CODE (XEXP (x, 0)) == ASHIFT
3081            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3082            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3083            && CONST_INT_P (XEXP (x, 1)))
3084     {
3085       type = ADDRESS_REG_UXTW;
3086       index = XEXP (XEXP (x, 0), 0);
3087       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3088       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3089         shift = -1;
3090     }
3091   /* (mult:P (reg:P) (const_int scale)) */
3092   else if (GET_CODE (x) == MULT
3093            && GET_MODE (x) == Pmode
3094            && GET_MODE (XEXP (x, 0)) == Pmode
3095            && CONST_INT_P (XEXP (x, 1)))
3096     {
3097       type = ADDRESS_REG_REG;
3098       index = XEXP (x, 0);
3099       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3100     }
3101   /* (ashift:P (reg:P) (const_int shift)) */
3102   else if (GET_CODE (x) == ASHIFT
3103            && GET_MODE (x) == Pmode
3104            && GET_MODE (XEXP (x, 0)) == Pmode
3105            && CONST_INT_P (XEXP (x, 1)))
3106     {
3107       type = ADDRESS_REG_REG;
3108       index = XEXP (x, 0);
3109       shift = INTVAL (XEXP (x, 1));
3110     }
3111   else
3112     return false;
3113
3114   if (GET_CODE (index) == SUBREG)
3115     index = SUBREG_REG (index);
3116
3117   if ((shift == 0 ||
3118        (shift > 0 && shift <= 3
3119         && (1 << shift) == GET_MODE_SIZE (mode)))
3120       && REG_P (index)
3121       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3122     {
3123       info->type = type;
3124       info->offset = index;
3125       info->shift = shift;
3126       return true;
3127     }
3128
3129   return false;
3130 }
3131
3132 static inline bool
3133 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3134 {
3135   return (offset >= -64 * GET_MODE_SIZE (mode)
3136           && offset < 64 * GET_MODE_SIZE (mode)
3137           && offset % GET_MODE_SIZE (mode) == 0);
3138 }
3139
3140 static inline bool
3141 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3142                                HOST_WIDE_INT offset)
3143 {
3144   return offset >= -256 && offset < 256;
3145 }
3146
3147 static inline bool
3148 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3149 {
3150   return (offset >= 0
3151           && offset < 4096 * GET_MODE_SIZE (mode)
3152           && offset % GET_MODE_SIZE (mode) == 0);
3153 }
3154
3155 /* Return true if X is a valid address for machine mode MODE.  If it is,
3156    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3157    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3158
3159 static bool
3160 aarch64_classify_address (struct aarch64_address_info *info,
3161                           rtx x, enum machine_mode mode,
3162                           RTX_CODE outer_code, bool strict_p)
3163 {
3164   enum rtx_code code = GET_CODE (x);
3165   rtx op0, op1;
3166   bool allow_reg_index_p =
3167     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3168
3169   /* Don't support anything other than POST_INC or REG addressing for
3170      AdvSIMD.  */
3171   if (aarch64_vector_mode_p (mode)
3172       && (code != POST_INC && code != REG))
3173     return false;
3174
3175   switch (code)
3176     {
3177     case REG:
3178     case SUBREG:
3179       info->type = ADDRESS_REG_IMM;
3180       info->base = x;
3181       info->offset = const0_rtx;
3182       return aarch64_base_register_rtx_p (x, strict_p);
3183
3184     case PLUS:
3185       op0 = XEXP (x, 0);
3186       op1 = XEXP (x, 1);
3187       if (GET_MODE_SIZE (mode) != 0
3188           && CONST_INT_P (op1)
3189           && aarch64_base_register_rtx_p (op0, strict_p))
3190         {
3191           HOST_WIDE_INT offset = INTVAL (op1);
3192
3193           info->type = ADDRESS_REG_IMM;
3194           info->base = op0;
3195           info->offset = op1;
3196
3197           /* TImode and TFmode values are allowed in both pairs of X
3198              registers and individual Q registers.  The available
3199              address modes are:
3200              X,X: 7-bit signed scaled offset
3201              Q:   9-bit signed offset
3202              We conservatively require an offset representable in either mode.
3203            */
3204           if (mode == TImode || mode == TFmode)
3205             return (offset_7bit_signed_scaled_p (mode, offset)
3206                     && offset_9bit_signed_unscaled_p (mode, offset));
3207
3208           if (outer_code == PARALLEL)
3209             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3210                     && offset_7bit_signed_scaled_p (mode, offset));
3211           else
3212             return (offset_9bit_signed_unscaled_p (mode, offset)
3213                     || offset_12bit_unsigned_scaled_p (mode, offset));
3214         }
3215
3216       if (allow_reg_index_p)
3217         {
3218           /* Look for base + (scaled/extended) index register.  */
3219           if (aarch64_base_register_rtx_p (op0, strict_p)
3220               && aarch64_classify_index (info, op1, mode, strict_p))
3221             {
3222               info->base = op0;
3223               return true;
3224             }
3225           if (aarch64_base_register_rtx_p (op1, strict_p)
3226               && aarch64_classify_index (info, op0, mode, strict_p))
3227             {
3228               info->base = op1;
3229               return true;
3230             }
3231         }
3232
3233       return false;
3234
3235     case POST_INC:
3236     case POST_DEC:
3237     case PRE_INC:
3238     case PRE_DEC:
3239       info->type = ADDRESS_REG_WB;
3240       info->base = XEXP (x, 0);
3241       info->offset = NULL_RTX;
3242       return aarch64_base_register_rtx_p (info->base, strict_p);
3243
3244     case POST_MODIFY:
3245     case PRE_MODIFY:
3246       info->type = ADDRESS_REG_WB;
3247       info->base = XEXP (x, 0);
3248       if (GET_CODE (XEXP (x, 1)) == PLUS
3249           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3250           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3251           && aarch64_base_register_rtx_p (info->base, strict_p))
3252         {
3253           HOST_WIDE_INT offset;
3254           info->offset = XEXP (XEXP (x, 1), 1);
3255           offset = INTVAL (info->offset);
3256
3257           /* TImode and TFmode values are allowed in both pairs of X
3258              registers and individual Q registers.  The available
3259              address modes are:
3260              X,X: 7-bit signed scaled offset
3261              Q:   9-bit signed offset
3262              We conservatively require an offset representable in either mode.
3263            */
3264           if (mode == TImode || mode == TFmode)
3265             return (offset_7bit_signed_scaled_p (mode, offset)
3266                     && offset_9bit_signed_unscaled_p (mode, offset));
3267
3268           if (outer_code == PARALLEL)
3269             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3270                     && offset_7bit_signed_scaled_p (mode, offset));
3271           else
3272             return offset_9bit_signed_unscaled_p (mode, offset);
3273         }
3274       return false;
3275
3276     case CONST:
3277     case SYMBOL_REF:
3278     case LABEL_REF:
3279       /* load literal: pc-relative constant pool entry.  Only supported
3280          for SI mode or larger.  */
3281       info->type = ADDRESS_SYMBOLIC;
3282       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3283         {
3284           rtx sym, addend;
3285
3286           split_const (x, &sym, &addend);
3287           return (GET_CODE (sym) == LABEL_REF
3288                   || (GET_CODE (sym) == SYMBOL_REF
3289                       && CONSTANT_POOL_ADDRESS_P (sym)));
3290         }
3291       return false;
3292
3293     case LO_SUM:
3294       info->type = ADDRESS_LO_SUM;
3295       info->base = XEXP (x, 0);
3296       info->offset = XEXP (x, 1);
3297       if (allow_reg_index_p
3298           && aarch64_base_register_rtx_p (info->base, strict_p))
3299         {
3300           rtx sym, offs;
3301           split_const (info->offset, &sym, &offs);
3302           if (GET_CODE (sym) == SYMBOL_REF
3303               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3304                   == SYMBOL_SMALL_ABSOLUTE))
3305             {
3306               /* The symbol and offset must be aligned to the access size.  */
3307               unsigned int align;
3308               unsigned int ref_size;
3309
3310               if (CONSTANT_POOL_ADDRESS_P (sym))
3311                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3312               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3313                 {
3314                   tree exp = SYMBOL_REF_DECL (sym);
3315                   align = TYPE_ALIGN (TREE_TYPE (exp));
3316                   align = CONSTANT_ALIGNMENT (exp, align);
3317                 }
3318               else if (SYMBOL_REF_DECL (sym))
3319                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3320               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3321                        && SYMBOL_REF_BLOCK (sym) != NULL)
3322                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3323               else
3324                 align = BITS_PER_UNIT;
3325
3326               ref_size = GET_MODE_SIZE (mode);
3327               if (ref_size == 0)
3328                 ref_size = GET_MODE_SIZE (DImode);
3329
3330               return ((INTVAL (offs) & (ref_size - 1)) == 0
3331                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3332             }
3333         }
3334       return false;
3335
3336     default:
3337       return false;
3338     }
3339 }
3340
3341 bool
3342 aarch64_symbolic_address_p (rtx x)
3343 {
3344   rtx offset;
3345
3346   split_const (x, &x, &offset);
3347   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3348 }
3349
3350 /* Classify the base of symbolic expression X, given that X appears in
3351    context CONTEXT.  */
3352
3353 enum aarch64_symbol_type
3354 aarch64_classify_symbolic_expression (rtx x,
3355                                       enum aarch64_symbol_context context)
3356 {
3357   rtx offset;
3358
3359   split_const (x, &x, &offset);
3360   return aarch64_classify_symbol (x, context);
3361 }
3362
3363
3364 /* Return TRUE if X is a legitimate address for accessing memory in
3365    mode MODE.  */
3366 static bool
3367 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3368 {
3369   struct aarch64_address_info addr;
3370
3371   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3372 }
3373
3374 /* Return TRUE if X is a legitimate address for accessing memory in
3375    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3376    pair operation.  */
3377 bool
3378 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3379                               RTX_CODE outer_code, bool strict_p)
3380 {
3381   struct aarch64_address_info addr;
3382
3383   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3384 }
3385
3386 /* Return TRUE if rtx X is immediate constant 0.0 */
3387 bool
3388 aarch64_float_const_zero_rtx_p (rtx x)
3389 {
3390   REAL_VALUE_TYPE r;
3391
3392   if (GET_MODE (x) == VOIDmode)
3393     return false;
3394
3395   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3396   if (REAL_VALUE_MINUS_ZERO (r))
3397     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3398   return REAL_VALUES_EQUAL (r, dconst0);
3399 }
3400
3401 /* Return the fixed registers used for condition codes.  */
3402
3403 static bool
3404 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3405 {
3406   *p1 = CC_REGNUM;
3407   *p2 = INVALID_REGNUM;
3408   return true;
3409 }
3410
3411 enum machine_mode
3412 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3413 {
3414   /* All floating point compares return CCFP if it is an equality
3415      comparison, and CCFPE otherwise.  */
3416   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3417     {
3418       switch (code)
3419         {
3420         case EQ:
3421         case NE:
3422         case UNORDERED:
3423         case ORDERED:
3424         case UNLT:
3425         case UNLE:
3426         case UNGT:
3427         case UNGE:
3428         case UNEQ:
3429         case LTGT:
3430           return CCFPmode;
3431
3432         case LT:
3433         case LE:
3434         case GT:
3435         case GE:
3436           return CCFPEmode;
3437
3438         default:
3439           gcc_unreachable ();
3440         }
3441     }
3442
3443   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3444       && y == const0_rtx
3445       && (code == EQ || code == NE || code == LT || code == GE)
3446       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3447           || GET_CODE (x) == NEG))
3448     return CC_NZmode;
3449
3450   /* A compare with a shifted operand.  Because of canonicalization,
3451      the comparison will have to be swapped when we emit the assembly
3452      code.  */
3453   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3454       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3455       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3456           || GET_CODE (x) == LSHIFTRT
3457           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3458     return CC_SWPmode;
3459
3460   /* Similarly for a negated operand, but we can only do this for
3461      equalities.  */
3462   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3463       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3464       && (code == EQ || code == NE)
3465       && GET_CODE (x) == NEG)
3466     return CC_Zmode;
3467
3468   /* A compare of a mode narrower than SI mode against zero can be done
3469      by extending the value in the comparison.  */
3470   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3471       && y == const0_rtx)
3472     /* Only use sign-extension if we really need it.  */
3473     return ((code == GT || code == GE || code == LE || code == LT)
3474             ? CC_SESWPmode : CC_ZESWPmode);
3475
3476   /* For everything else, return CCmode.  */
3477   return CCmode;
3478 }
3479
3480 static unsigned
3481 aarch64_get_condition_code (rtx x)
3482 {
3483   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3484   enum rtx_code comp_code = GET_CODE (x);
3485
3486   if (GET_MODE_CLASS (mode) != MODE_CC)
3487     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3488
3489   switch (mode)
3490     {
3491     case CCFPmode:
3492     case CCFPEmode:
3493       switch (comp_code)
3494         {
3495         case GE: return AARCH64_GE;
3496         case GT: return AARCH64_GT;
3497         case LE: return AARCH64_LS;
3498         case LT: return AARCH64_MI;
3499         case NE: return AARCH64_NE;
3500         case EQ: return AARCH64_EQ;
3501         case ORDERED: return AARCH64_VC;
3502         case UNORDERED: return AARCH64_VS;
3503         case UNLT: return AARCH64_LT;
3504         case UNLE: return AARCH64_LE;
3505         case UNGT: return AARCH64_HI;
3506         case UNGE: return AARCH64_PL;
3507         default: gcc_unreachable ();
3508         }
3509       break;
3510
3511     case CCmode:
3512       switch (comp_code)
3513         {
3514         case NE: return AARCH64_NE;
3515         case EQ: return AARCH64_EQ;
3516         case GE: return AARCH64_GE;
3517         case GT: return AARCH64_GT;
3518         case LE: return AARCH64_LE;
3519         case LT: return AARCH64_LT;
3520         case GEU: return AARCH64_CS;
3521         case GTU: return AARCH64_HI;
3522         case LEU: return AARCH64_LS;
3523         case LTU: return AARCH64_CC;
3524         default: gcc_unreachable ();
3525         }
3526       break;
3527
3528     case CC_SWPmode:
3529     case CC_ZESWPmode:
3530     case CC_SESWPmode:
3531       switch (comp_code)
3532         {
3533         case NE: return AARCH64_NE;
3534         case EQ: return AARCH64_EQ;
3535         case GE: return AARCH64_LE;
3536         case GT: return AARCH64_LT;
3537         case LE: return AARCH64_GE;
3538         case LT: return AARCH64_GT;
3539         case GEU: return AARCH64_LS;
3540         case GTU: return AARCH64_CC;
3541         case LEU: return AARCH64_CS;
3542         case LTU: return AARCH64_HI;
3543         default: gcc_unreachable ();
3544         }
3545       break;
3546
3547     case CC_NZmode:
3548       switch (comp_code)
3549         {
3550         case NE: return AARCH64_NE;
3551         case EQ: return AARCH64_EQ;
3552         case GE: return AARCH64_PL;
3553         case LT: return AARCH64_MI;
3554         default: gcc_unreachable ();
3555         }
3556       break;
3557
3558     case CC_Zmode:
3559       switch (comp_code)
3560         {
3561         case NE: return AARCH64_NE;
3562         case EQ: return AARCH64_EQ;
3563         default: gcc_unreachable ();
3564         }
3565       break;
3566
3567     default:
3568       gcc_unreachable ();
3569       break;
3570     }
3571 }
3572
3573 static unsigned
3574 bit_count (unsigned HOST_WIDE_INT value)
3575 {
3576   unsigned count = 0;
3577
3578   while (value)
3579     {
3580       count++;
3581       value &= value - 1;
3582     }
3583
3584   return count;
3585 }
3586
3587 void
3588 aarch64_print_operand (FILE *f, rtx x, char code)
3589 {
3590   switch (code)
3591     {
3592     /* An integer or symbol address without a preceding # sign.  */
3593     case 'c':
3594       switch (GET_CODE (x))
3595         {
3596         case CONST_INT:
3597           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3598           break;
3599
3600         case SYMBOL_REF:
3601           output_addr_const (f, x);
3602           break;
3603
3604         case CONST:
3605           if (GET_CODE (XEXP (x, 0)) == PLUS
3606               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3607             {
3608               output_addr_const (f, x);
3609               break;
3610             }
3611           /* Fall through.  */
3612
3613         default:
3614           output_operand_lossage ("Unsupported operand for code '%c'", code);
3615         }
3616       break;
3617
3618     case 'e':
3619       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3620       {
3621         int n;
3622
3623         if (GET_CODE (x) != CONST_INT
3624             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3625           {
3626             output_operand_lossage ("invalid operand for '%%%c'", code);
3627             return;
3628           }
3629
3630         switch (n)
3631           {
3632           case 3:
3633             fputc ('b', f);
3634             break;
3635           case 4:
3636             fputc ('h', f);
3637             break;
3638           case 5:
3639             fputc ('w', f);
3640             break;
3641           default:
3642             output_operand_lossage ("invalid operand for '%%%c'", code);
3643             return;
3644           }
3645       }
3646       break;
3647
3648     case 'p':
3649       {
3650         int n;
3651
3652         /* Print N such that 2^N == X.  */
3653         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3654           {
3655             output_operand_lossage ("invalid operand for '%%%c'", code);
3656             return;
3657           }
3658
3659         asm_fprintf (f, "%d", n);
3660       }
3661       break;
3662
3663     case 'P':
3664       /* Print the number of non-zero bits in X (a const_int).  */
3665       if (GET_CODE (x) != CONST_INT)
3666         {
3667           output_operand_lossage ("invalid operand for '%%%c'", code);
3668           return;
3669         }
3670
3671       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3672       break;
3673
3674     case 'H':
3675       /* Print the higher numbered register of a pair (TImode) of regs.  */
3676       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3677         {
3678           output_operand_lossage ("invalid operand for '%%%c'", code);
3679           return;
3680         }
3681
3682       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3683       break;
3684
3685     case 'm':
3686       /* Print a condition (eq, ne, etc).  */
3687
3688       /* CONST_TRUE_RTX means always -- that's the default.  */
3689       if (x == const_true_rtx)
3690         return;
3691
3692       if (!COMPARISON_P (x))
3693         {
3694           output_operand_lossage ("invalid operand for '%%%c'", code);
3695           return;
3696         }
3697
3698       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3699       break;
3700
3701     case 'M':
3702       /* Print the inverse of a condition (eq <-> ne, etc).  */
3703
3704       /* CONST_TRUE_RTX means never -- that's the default.  */
3705       if (x == const_true_rtx)
3706         {
3707           fputs ("nv", f);
3708           return;
3709         }
3710
3711       if (!COMPARISON_P (x))
3712         {
3713           output_operand_lossage ("invalid operand for '%%%c'", code);
3714           return;
3715         }
3716
3717       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3718                                   (aarch64_get_condition_code (x))], f);
3719       break;
3720
3721     case 'b':
3722     case 'h':
3723     case 's':
3724     case 'd':
3725     case 'q':
3726       /* Print a scalar FP/SIMD register name.  */
3727       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3728         {
3729           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3730           return;
3731         }
3732       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3733       break;
3734
3735     case 'S':
3736     case 'T':
3737     case 'U':
3738     case 'V':
3739       /* Print the first FP/SIMD register name in a list.  */
3740       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3741         {
3742           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3743           return;
3744         }
3745       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3746       break;
3747
3748     case 'X':
3749       /* Print bottom 16 bits of integer constant in hex.  */
3750       if (GET_CODE (x) != CONST_INT)
3751         {
3752           output_operand_lossage ("invalid operand for '%%%c'", code);
3753           return;
3754         }
3755       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3756       break;
3757
3758     case 'w':
3759     case 'x':
3760       /* Print a general register name or the zero register (32-bit or
3761          64-bit).  */
3762       if (x == const0_rtx
3763           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3764         {
3765           asm_fprintf (f, "%czr", code);
3766           break;
3767         }
3768
3769       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3770         {
3771           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3772           break;
3773         }
3774
3775       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3776         {
3777           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3778           break;
3779         }
3780
3781       /* Fall through */
3782
3783     case 0:
3784       /* Print a normal operand, if it's a general register, then we
3785          assume DImode.  */
3786       if (x == NULL)
3787         {
3788           output_operand_lossage ("missing operand");
3789           return;
3790         }
3791
3792       switch (GET_CODE (x))
3793         {
3794         case REG:
3795           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3796           break;
3797
3798         case MEM:
3799           aarch64_memory_reference_mode = GET_MODE (x);
3800           output_address (XEXP (x, 0));
3801           break;
3802
3803         case LABEL_REF:
3804         case SYMBOL_REF:
3805           output_addr_const (asm_out_file, x);
3806           break;
3807
3808         case CONST_INT:
3809           asm_fprintf (f, "%wd", INTVAL (x));
3810           break;
3811
3812         case CONST_VECTOR:
3813           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3814             {
3815               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3816                                                             HOST_WIDE_INT_MIN,
3817                                                             HOST_WIDE_INT_MAX));
3818               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3819             }
3820           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3821             {
3822               fputc ('0', f);
3823             }
3824           else
3825             gcc_unreachable ();
3826           break;
3827
3828         case CONST_DOUBLE:
3829           /* CONST_DOUBLE can represent a double-width integer.
3830              In this case, the mode of x is VOIDmode.  */
3831           if (GET_MODE (x) == VOIDmode)
3832             ; /* Do Nothing.  */
3833           else if (aarch64_float_const_zero_rtx_p (x))
3834             {
3835               fputc ('0', f);
3836               break;
3837             }
3838           else if (aarch64_float_const_representable_p (x))
3839             {
3840 #define buf_size 20
3841               char float_buf[buf_size] = {'\0'};
3842               REAL_VALUE_TYPE r;
3843               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3844               real_to_decimal_for_mode (float_buf, &r,
3845                                         buf_size, buf_size,
3846                                         1, GET_MODE (x));
3847               asm_fprintf (asm_out_file, "%s", float_buf);
3848               break;
3849 #undef buf_size
3850             }
3851           output_operand_lossage ("invalid constant");
3852           return;
3853         default:
3854           output_operand_lossage ("invalid operand");
3855           return;
3856         }
3857       break;
3858
3859     case 'A':
3860       if (GET_CODE (x) == HIGH)
3861         x = XEXP (x, 0);
3862
3863       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3864         {
3865         case SYMBOL_SMALL_GOT:
3866           asm_fprintf (asm_out_file, ":got:");
3867           break;
3868
3869         case SYMBOL_SMALL_TLSGD:
3870           asm_fprintf (asm_out_file, ":tlsgd:");
3871           break;
3872
3873         case SYMBOL_SMALL_TLSDESC:
3874           asm_fprintf (asm_out_file, ":tlsdesc:");
3875           break;
3876
3877         case SYMBOL_SMALL_GOTTPREL:
3878           asm_fprintf (asm_out_file, ":gottprel:");
3879           break;
3880
3881         case SYMBOL_SMALL_TPREL:
3882           asm_fprintf (asm_out_file, ":tprel:");
3883           break;
3884
3885         case SYMBOL_TINY_GOT:
3886           gcc_unreachable ();
3887           break;
3888
3889         default:
3890           break;
3891         }
3892       output_addr_const (asm_out_file, x);
3893       break;
3894
3895     case 'L':
3896       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3897         {
3898         case SYMBOL_SMALL_GOT:
3899           asm_fprintf (asm_out_file, ":lo12:");
3900           break;
3901
3902         case SYMBOL_SMALL_TLSGD:
3903           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3904           break;
3905
3906         case SYMBOL_SMALL_TLSDESC:
3907           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3908           break;
3909
3910         case SYMBOL_SMALL_GOTTPREL:
3911           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3912           break;
3913
3914         case SYMBOL_SMALL_TPREL:
3915           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3916           break;
3917
3918         case SYMBOL_TINY_GOT:
3919           asm_fprintf (asm_out_file, ":got:");
3920           break;
3921
3922         default:
3923           break;
3924         }
3925       output_addr_const (asm_out_file, x);
3926       break;
3927
3928     case 'G':
3929
3930       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3931         {
3932         case SYMBOL_SMALL_TPREL:
3933           asm_fprintf (asm_out_file, ":tprel_hi12:");
3934           break;
3935         default:
3936           break;
3937         }
3938       output_addr_const (asm_out_file, x);
3939       break;
3940
3941     default:
3942       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3943       return;
3944     }
3945 }
3946
3947 void
3948 aarch64_print_operand_address (FILE *f, rtx x)
3949 {
3950   struct aarch64_address_info addr;
3951
3952   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3953                              MEM, true))
3954     switch (addr.type)
3955       {
3956       case ADDRESS_REG_IMM:
3957         if (addr.offset == const0_rtx)
3958           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3959         else
3960           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3961                        INTVAL (addr.offset));
3962         return;
3963
3964       case ADDRESS_REG_REG:
3965         if (addr.shift == 0)
3966           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3967                        reg_names [REGNO (addr.offset)]);
3968         else
3969           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3970                        reg_names [REGNO (addr.offset)], addr.shift);
3971         return;
3972
3973       case ADDRESS_REG_UXTW:
3974         if (addr.shift == 0)
3975           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3976                        REGNO (addr.offset) - R0_REGNUM);
3977         else
3978           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3979                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3980         return;
3981
3982       case ADDRESS_REG_SXTW:
3983         if (addr.shift == 0)
3984           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3985                        REGNO (addr.offset) - R0_REGNUM);
3986         else
3987           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3988                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3989         return;
3990
3991       case ADDRESS_REG_WB:
3992         switch (GET_CODE (x))
3993           {
3994           case PRE_INC:
3995             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3996                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3997             return;
3998           case POST_INC:
3999             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4000                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4001             return;
4002           case PRE_DEC:
4003             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4004                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4005             return;
4006           case POST_DEC:
4007             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4008                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4009             return;
4010           case PRE_MODIFY:
4011             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4012                          INTVAL (addr.offset));
4013             return;
4014           case POST_MODIFY:
4015             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4016                          INTVAL (addr.offset));
4017             return;
4018           default:
4019             break;
4020           }
4021         break;
4022
4023       case ADDRESS_LO_SUM:
4024         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4025         output_addr_const (f, addr.offset);
4026         asm_fprintf (f, "]");
4027         return;
4028
4029       case ADDRESS_SYMBOLIC:
4030         break;
4031       }
4032
4033   output_addr_const (f, x);
4034 }
4035
4036 bool
4037 aarch64_label_mentioned_p (rtx x)
4038 {
4039   const char *fmt;
4040   int i;
4041
4042   if (GET_CODE (x) == LABEL_REF)
4043     return true;
4044
4045   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4046      referencing instruction, but they are constant offsets, not
4047      symbols.  */
4048   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4049     return false;
4050
4051   fmt = GET_RTX_FORMAT (GET_CODE (x));
4052   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4053     {
4054       if (fmt[i] == 'E')
4055         {
4056           int j;
4057
4058           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4059             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4060               return 1;
4061         }
4062       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4063         return 1;
4064     }
4065
4066   return 0;
4067 }
4068
4069 /* Implement REGNO_REG_CLASS.  */
4070
4071 enum reg_class
4072 aarch64_regno_regclass (unsigned regno)
4073 {
4074   if (GP_REGNUM_P (regno))
4075     return CORE_REGS;
4076
4077   if (regno == SP_REGNUM)
4078     return STACK_REG;
4079
4080   if (regno == FRAME_POINTER_REGNUM
4081       || regno == ARG_POINTER_REGNUM)
4082     return POINTER_REGS;
4083
4084   if (FP_REGNUM_P (regno))
4085     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4086
4087   return NO_REGS;
4088 }
4089
4090 /* Try a machine-dependent way of reloading an illegitimate address
4091    operand.  If we find one, push the reload and return the new rtx.  */
4092
4093 rtx
4094 aarch64_legitimize_reload_address (rtx *x_p,
4095                                    enum machine_mode mode,
4096                                    int opnum, int type,
4097                                    int ind_levels ATTRIBUTE_UNUSED)
4098 {
4099   rtx x = *x_p;
4100
4101   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4102   if (aarch64_vector_mode_p (mode)
4103       && GET_CODE (x) == PLUS
4104       && REG_P (XEXP (x, 0))
4105       && CONST_INT_P (XEXP (x, 1)))
4106     {
4107       rtx orig_rtx = x;
4108       x = copy_rtx (x);
4109       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4110                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4111                    opnum, (enum reload_type) type);
4112       return x;
4113     }
4114
4115   /* We must recognize output that we have already generated ourselves.  */
4116   if (GET_CODE (x) == PLUS
4117       && GET_CODE (XEXP (x, 0)) == PLUS
4118       && REG_P (XEXP (XEXP (x, 0), 0))
4119       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4120       && CONST_INT_P (XEXP (x, 1)))
4121     {
4122       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4123                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4124                    opnum, (enum reload_type) type);
4125       return x;
4126     }
4127
4128   /* We wish to handle large displacements off a base register by splitting
4129      the addend across an add and the mem insn.  This can cut the number of
4130      extra insns needed from 3 to 1.  It is only useful for load/store of a
4131      single register with 12 bit offset field.  */
4132   if (GET_CODE (x) == PLUS
4133       && REG_P (XEXP (x, 0))
4134       && CONST_INT_P (XEXP (x, 1))
4135       && HARD_REGISTER_P (XEXP (x, 0))
4136       && mode != TImode
4137       && mode != TFmode
4138       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4139     {
4140       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4141       HOST_WIDE_INT low = val & 0xfff;
4142       HOST_WIDE_INT high = val - low;
4143       HOST_WIDE_INT offs;
4144       rtx cst;
4145       enum machine_mode xmode = GET_MODE (x);
4146
4147       /* In ILP32, xmode can be either DImode or SImode.  */
4148       gcc_assert (xmode == DImode || xmode == SImode);
4149
4150       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4151          BLKmode alignment.  */
4152       if (GET_MODE_SIZE (mode) == 0)
4153         return NULL_RTX;
4154
4155       offs = low % GET_MODE_SIZE (mode);
4156
4157       /* Align misaligned offset by adjusting high part to compensate.  */
4158       if (offs != 0)
4159         {
4160           if (aarch64_uimm12_shift (high + offs))
4161             {
4162               /* Align down.  */
4163               low = low - offs;
4164               high = high + offs;
4165             }
4166           else
4167             {
4168               /* Align up.  */
4169               offs = GET_MODE_SIZE (mode) - offs;
4170               low = low + offs;
4171               high = high + (low & 0x1000) - offs;
4172               low &= 0xfff;
4173             }
4174         }
4175
4176       /* Check for overflow.  */
4177       if (high + low != val)
4178         return NULL_RTX;
4179
4180       cst = GEN_INT (high);
4181       if (!aarch64_uimm12_shift (high))
4182         cst = force_const_mem (xmode, cst);
4183
4184       /* Reload high part into base reg, leaving the low part
4185          in the mem instruction.
4186          Note that replacing this gen_rtx_PLUS with plus_constant is
4187          wrong in this case because we rely on the
4188          (plus (plus reg c1) c2) structure being preserved so that
4189          XEXP (*p, 0) in push_reload below uses the correct term.  */
4190       x = gen_rtx_PLUS (xmode,
4191                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4192                         GEN_INT (low));
4193
4194       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4195                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4196                    opnum, (enum reload_type) type);
4197       return x;
4198     }
4199
4200   return NULL_RTX;
4201 }
4202
4203
4204 static reg_class_t
4205 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4206                           reg_class_t rclass,
4207                           enum machine_mode mode,
4208                           secondary_reload_info *sri)
4209 {
4210   /* Without the TARGET_SIMD instructions we cannot move a Q register
4211      to a Q register directly.  We need a scratch.  */
4212   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4213       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4214       && reg_class_subset_p (rclass, FP_REGS))
4215     {
4216       if (mode == TFmode)
4217         sri->icode = CODE_FOR_aarch64_reload_movtf;
4218       else if (mode == TImode)
4219         sri->icode = CODE_FOR_aarch64_reload_movti;
4220       return NO_REGS;
4221     }
4222
4223   /* A TFmode or TImode memory access should be handled via an FP_REGS
4224      because AArch64 has richer addressing modes for LDR/STR instructions
4225      than LDP/STP instructions.  */
4226   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4227       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4228     return FP_REGS;
4229
4230   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4231       return CORE_REGS;
4232
4233   return NO_REGS;
4234 }
4235
4236 static bool
4237 aarch64_can_eliminate (const int from, const int to)
4238 {
4239   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4240      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4241
4242   if (frame_pointer_needed)
4243     {
4244       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4245         return true;
4246       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4247         return false;
4248       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4249           && !cfun->calls_alloca)
4250         return true;
4251       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4252         return true;
4253
4254       return false;
4255     }
4256
4257   return true;
4258 }
4259
4260 HOST_WIDE_INT
4261 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4262 {
4263   HOST_WIDE_INT frame_size;
4264   HOST_WIDE_INT offset;
4265
4266   aarch64_layout_frame ();
4267   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4268                 + crtl->outgoing_args_size
4269                 + cfun->machine->saved_varargs_size);
4270
4271   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4272   offset = frame_size;
4273
4274   if (to == HARD_FRAME_POINTER_REGNUM)
4275     {
4276       if (from == ARG_POINTER_REGNUM)
4277         return offset - crtl->outgoing_args_size;
4278
4279       if (from == FRAME_POINTER_REGNUM)
4280         return cfun->machine->frame.saved_regs_size + get_frame_size ();
4281     }
4282
4283   if (to == STACK_POINTER_REGNUM)
4284     {
4285       if (from == FRAME_POINTER_REGNUM)
4286         {
4287           HOST_WIDE_INT elim = crtl->outgoing_args_size
4288             + cfun->machine->frame.saved_regs_size
4289             + get_frame_size ();
4290           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4291           return elim;
4292         }
4293     }
4294
4295   return offset;
4296 }
4297
4298
4299 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4300    previous frame.  */
4301
4302 rtx
4303 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4304 {
4305   if (count != 0)
4306     return const0_rtx;
4307   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4308 }
4309
4310
4311 static void
4312 aarch64_asm_trampoline_template (FILE *f)
4313 {
4314   if (TARGET_ILP32)
4315     {
4316       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4317       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4318     }
4319   else
4320     {
4321       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4322       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4323     }
4324   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4325   assemble_aligned_integer (4, const0_rtx);
4326   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4327   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328 }
4329
4330 static void
4331 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4332 {
4333   rtx fnaddr, mem, a_tramp;
4334   const int tramp_code_sz = 16;
4335
4336   /* Don't need to copy the trailing D-words, we fill those in below.  */
4337   emit_block_move (m_tramp, assemble_trampoline_template (),
4338                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4339   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4340   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4341   if (GET_MODE (fnaddr) != ptr_mode)
4342     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4343   emit_move_insn (mem, fnaddr);
4344
4345   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4346   emit_move_insn (mem, chain_value);
4347
4348   /* XXX We should really define a "clear_cache" pattern and use
4349      gen_clear_cache().  */
4350   a_tramp = XEXP (m_tramp, 0);
4351   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4352                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4353                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4354                      ptr_mode);
4355 }
4356
4357 static unsigned char
4358 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4359 {
4360   switch (regclass)
4361     {
4362     case CORE_REGS:
4363     case POINTER_REGS:
4364     case GENERAL_REGS:
4365     case ALL_REGS:
4366     case FP_REGS:
4367     case FP_LO_REGS:
4368       return
4369         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4370                                        (GET_MODE_SIZE (mode) + 7) / 8;
4371     case STACK_REG:
4372       return 1;
4373
4374     case NO_REGS:
4375       return 0;
4376
4377     default:
4378       break;
4379     }
4380   gcc_unreachable ();
4381 }
4382
4383 static reg_class_t
4384 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4385 {
4386   if (regclass == POINTER_REGS)
4387     return GENERAL_REGS;
4388
4389   if (regclass == STACK_REG)
4390     {
4391       if (REG_P(x)
4392           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4393           return regclass;
4394
4395       return NO_REGS;
4396     }
4397
4398   /* If it's an integer immediate that MOVI can't handle, then
4399      FP_REGS is not an option, so we return NO_REGS instead.  */
4400   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4401       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4402     return NO_REGS;
4403
4404   /* Register eliminiation can result in a request for
4405      SP+constant->FP_REGS.  We cannot support such operations which
4406      use SP as source and an FP_REG as destination, so reject out
4407      right now.  */
4408   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4409     {
4410       rtx lhs = XEXP (x, 0);
4411
4412       /* Look through a possible SUBREG introduced by ILP32.  */
4413       if (GET_CODE (lhs) == SUBREG)
4414         lhs = SUBREG_REG (lhs);
4415
4416       gcc_assert (REG_P (lhs));
4417       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4418                                       POINTER_REGS));
4419       return NO_REGS;
4420     }
4421
4422   return regclass;
4423 }
4424
4425 void
4426 aarch64_asm_output_labelref (FILE* f, const char *name)
4427 {
4428   asm_fprintf (f, "%U%s", name);
4429 }
4430
4431 static void
4432 aarch64_elf_asm_constructor (rtx symbol, int priority)
4433 {
4434   if (priority == DEFAULT_INIT_PRIORITY)
4435     default_ctor_section_asm_out_constructor (symbol, priority);
4436   else
4437     {
4438       section *s;
4439       char buf[18];
4440       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4441       s = get_section (buf, SECTION_WRITE, NULL);
4442       switch_to_section (s);
4443       assemble_align (POINTER_SIZE);
4444       assemble_aligned_integer (POINTER_BYTES, symbol);
4445     }
4446 }
4447
4448 static void
4449 aarch64_elf_asm_destructor (rtx symbol, int priority)
4450 {
4451   if (priority == DEFAULT_INIT_PRIORITY)
4452     default_dtor_section_asm_out_destructor (symbol, priority);
4453   else
4454     {
4455       section *s;
4456       char buf[18];
4457       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4458       s = get_section (buf, SECTION_WRITE, NULL);
4459       switch_to_section (s);
4460       assemble_align (POINTER_SIZE);
4461       assemble_aligned_integer (POINTER_BYTES, symbol);
4462     }
4463 }
4464
4465 const char*
4466 aarch64_output_casesi (rtx *operands)
4467 {
4468   char buf[100];
4469   char label[100];
4470   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4471   int index;
4472   static const char *const patterns[4][2] =
4473   {
4474     {
4475       "ldrb\t%w3, [%0,%w1,uxtw]",
4476       "add\t%3, %4, %w3, sxtb #2"
4477     },
4478     {
4479       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4480       "add\t%3, %4, %w3, sxth #2"
4481     },
4482     {
4483       "ldr\t%w3, [%0,%w1,uxtw #2]",
4484       "add\t%3, %4, %w3, sxtw #2"
4485     },
4486     /* We assume that DImode is only generated when not optimizing and
4487        that we don't really need 64-bit address offsets.  That would
4488        imply an object file with 8GB of code in a single function!  */
4489     {
4490       "ldr\t%w3, [%0,%w1,uxtw #2]",
4491       "add\t%3, %4, %w3, sxtw #2"
4492     }
4493   };
4494
4495   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4496
4497   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4498
4499   gcc_assert (index >= 0 && index <= 3);
4500
4501   /* Need to implement table size reduction, by chaning the code below.  */
4502   output_asm_insn (patterns[index][0], operands);
4503   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4504   snprintf (buf, sizeof (buf),
4505             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4506   output_asm_insn (buf, operands);
4507   output_asm_insn (patterns[index][1], operands);
4508   output_asm_insn ("br\t%3", operands);
4509   assemble_label (asm_out_file, label);
4510   return "";
4511 }
4512
4513
4514 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4515    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4516    operator.  */
4517
4518 int
4519 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4520 {
4521   if (shift >= 0 && shift <= 3)
4522     {
4523       int size;
4524       for (size = 8; size <= 32; size *= 2)
4525         {
4526           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4527           if (mask == bits << shift)
4528             return size;
4529         }
4530     }
4531   return 0;
4532 }
4533
4534 static bool
4535 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4536                                    const_rtx x ATTRIBUTE_UNUSED)
4537 {
4538   /* We can't use blocks for constants when we're using a per-function
4539      constant pool.  */
4540   return false;
4541 }
4542
4543 static section *
4544 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4545                             rtx x ATTRIBUTE_UNUSED,
4546                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4547 {
4548   /* Force all constant pool entries into the current function section.  */
4549   return function_section (current_function_decl);
4550 }
4551
4552
4553 /* Costs.  */
4554
4555 /* Helper function for rtx cost calculation.  Strip a shift expression
4556    from X.  Returns the inner operand if successful, or the original
4557    expression on failure.  */
4558 static rtx
4559 aarch64_strip_shift (rtx x)
4560 {
4561   rtx op = x;
4562
4563   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4564      we can convert both to ROR during final output.  */
4565   if ((GET_CODE (op) == ASHIFT
4566        || GET_CODE (op) == ASHIFTRT
4567        || GET_CODE (op) == LSHIFTRT
4568        || GET_CODE (op) == ROTATERT
4569        || GET_CODE (op) == ROTATE)
4570       && CONST_INT_P (XEXP (op, 1)))
4571     return XEXP (op, 0);
4572
4573   if (GET_CODE (op) == MULT
4574       && CONST_INT_P (XEXP (op, 1))
4575       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4576     return XEXP (op, 0);
4577
4578   return x;
4579 }
4580
4581 /* Helper function for rtx cost calculation.  Strip an extend
4582    expression from X.  Returns the inner operand if successful, or the
4583    original expression on failure.  We deal with a number of possible
4584    canonicalization variations here.  */
4585 static rtx
4586 aarch64_strip_extend (rtx x)
4587 {
4588   rtx op = x;
4589
4590   /* Zero and sign extraction of a widened value.  */
4591   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4592       && XEXP (op, 2) == const0_rtx
4593       && GET_CODE (XEXP (op, 0)) == MULT
4594       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4595                                          XEXP (op, 1)))
4596     return XEXP (XEXP (op, 0), 0);
4597
4598   /* It can also be represented (for zero-extend) as an AND with an
4599      immediate.  */
4600   if (GET_CODE (op) == AND
4601       && GET_CODE (XEXP (op, 0)) == MULT
4602       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4603       && CONST_INT_P (XEXP (op, 1))
4604       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4605                            INTVAL (XEXP (op, 1))) != 0)
4606     return XEXP (XEXP (op, 0), 0);
4607
4608   /* Now handle extended register, as this may also have an optional
4609      left shift by 1..4.  */
4610   if (GET_CODE (op) == ASHIFT
4611       && CONST_INT_P (XEXP (op, 1))
4612       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4613     op = XEXP (op, 0);
4614
4615   if (GET_CODE (op) == ZERO_EXTEND
4616       || GET_CODE (op) == SIGN_EXTEND)
4617     op = XEXP (op, 0);
4618
4619   if (op != x)
4620     return op;
4621
4622   return x;
4623 }
4624
4625 /* Helper function for rtx cost calculation.  Calculate the cost of
4626    a MULT, which may be part of a multiply-accumulate rtx.  Return
4627    the calculated cost of the expression, recursing manually in to
4628    operands where needed.  */
4629
4630 static int
4631 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4632 {
4633   rtx op0, op1;
4634   const struct cpu_cost_table *extra_cost
4635     = aarch64_tune_params->insn_extra_cost;
4636   int cost = 0;
4637   bool maybe_fma = (outer == PLUS || outer == MINUS);
4638   enum machine_mode mode = GET_MODE (x);
4639
4640   gcc_checking_assert (code == MULT);
4641
4642   op0 = XEXP (x, 0);
4643   op1 = XEXP (x, 1);
4644
4645   if (VECTOR_MODE_P (mode))
4646     mode = GET_MODE_INNER (mode);
4647
4648   /* Integer multiply/fma.  */
4649   if (GET_MODE_CLASS (mode) == MODE_INT)
4650     {
4651       /* The multiply will be canonicalized as a shift, cost it as such.  */
4652       if (CONST_INT_P (op1)
4653           && exact_log2 (INTVAL (op1)) > 0)
4654         {
4655           if (speed)
4656             {
4657               if (maybe_fma)
4658                 /* ADD (shifted register).  */
4659                 cost += extra_cost->alu.arith_shift;
4660               else
4661                 /* LSL (immediate).  */
4662                 cost += extra_cost->alu.shift;
4663             }
4664
4665           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4666
4667           return cost;
4668         }
4669
4670       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4671       if ((GET_CODE (op0) == ZERO_EXTEND
4672            && GET_CODE (op1) == ZERO_EXTEND)
4673           || (GET_CODE (op0) == SIGN_EXTEND
4674               && GET_CODE (op1) == SIGN_EXTEND))
4675         {
4676           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4677                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4678
4679           if (speed)
4680             {
4681               if (maybe_fma)
4682                 /* MADD/SMADDL/UMADDL.  */
4683                 cost += extra_cost->mult[0].extend_add;
4684               else
4685                 /* MUL/SMULL/UMULL.  */
4686                 cost += extra_cost->mult[0].extend;
4687             }
4688
4689           return cost;
4690         }
4691
4692       /* This is either an integer multiply or an FMA.  In both cases
4693          we want to recurse and cost the operands.  */
4694       cost += rtx_cost (op0, MULT, 0, speed)
4695               + rtx_cost (op1, MULT, 1, speed);
4696
4697       if (speed)
4698         {
4699           if (maybe_fma)
4700             /* MADD.  */
4701             cost += extra_cost->mult[mode == DImode].add;
4702           else
4703             /* MUL.  */
4704             cost += extra_cost->mult[mode == DImode].simple;
4705         }
4706
4707       return cost;
4708     }
4709   else
4710     {
4711       if (speed)
4712         {
4713           /* Floating-point FMA can also support negations of the
4714              operands.  */
4715           if (GET_CODE (op0) == NEG)
4716             {
4717               maybe_fma = true;
4718               op0 = XEXP (op0, 0);
4719             }
4720           if (GET_CODE (op1) == NEG)
4721             {
4722               maybe_fma = true;
4723               op1 = XEXP (op1, 0);
4724             }
4725
4726           if (maybe_fma)
4727             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4728             cost += extra_cost->fp[mode == DFmode].fma;
4729           else
4730             /* FMUL.  */
4731             cost += extra_cost->fp[mode == DFmode].mult;
4732         }
4733
4734       cost += rtx_cost (op0, MULT, 0, speed)
4735               + rtx_cost (op1, MULT, 1, speed);
4736       return cost;
4737     }
4738 }
4739
4740 static int
4741 aarch64_address_cost (rtx x,
4742                       enum machine_mode mode,
4743                       addr_space_t as ATTRIBUTE_UNUSED,
4744                       bool speed)
4745 {
4746   enum rtx_code c = GET_CODE (x);
4747   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4748   struct aarch64_address_info info;
4749   int cost = 0;
4750   info.shift = 0;
4751
4752   if (!aarch64_classify_address (&info, x, mode, c, false))
4753     {
4754       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4755         {
4756           /* This is a CONST or SYMBOL ref which will be split
4757              in a different way depending on the code model in use.
4758              Cost it through the generic infrastructure.  */
4759           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4760           /* Divide through by the cost of one instruction to
4761              bring it to the same units as the address costs.  */
4762           cost_symbol_ref /= COSTS_N_INSNS (1);
4763           /* The cost is then the cost of preparing the address,
4764              followed by an immediate (possibly 0) offset.  */
4765           return cost_symbol_ref + addr_cost->imm_offset;
4766         }
4767       else
4768         {
4769           /* This is most likely a jump table from a case
4770              statement.  */
4771           return addr_cost->register_offset;
4772         }
4773     }
4774
4775   switch (info.type)
4776     {
4777       case ADDRESS_LO_SUM:
4778       case ADDRESS_SYMBOLIC:
4779       case ADDRESS_REG_IMM:
4780         cost += addr_cost->imm_offset;
4781         break;
4782
4783       case ADDRESS_REG_WB:
4784         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4785           cost += addr_cost->pre_modify;
4786         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4787           cost += addr_cost->post_modify;
4788         else
4789           gcc_unreachable ();
4790
4791         break;
4792
4793       case ADDRESS_REG_REG:
4794         cost += addr_cost->register_offset;
4795         break;
4796
4797       case ADDRESS_REG_UXTW:
4798       case ADDRESS_REG_SXTW:
4799         cost += addr_cost->register_extend;
4800         break;
4801
4802       default:
4803         gcc_unreachable ();
4804     }
4805
4806
4807   if (info.shift > 0)
4808     {
4809       /* For the sake of calculating the cost of the shifted register
4810          component, we can treat same sized modes in the same way.  */
4811       switch (GET_MODE_BITSIZE (mode))
4812         {
4813           case 16:
4814             cost += addr_cost->addr_scale_costs.hi;
4815             break;
4816
4817           case 32:
4818             cost += addr_cost->addr_scale_costs.si;
4819             break;
4820
4821           case 64:
4822             cost += addr_cost->addr_scale_costs.di;
4823             break;
4824
4825           /* We can't tell, or this is a 128-bit vector.  */
4826           default:
4827             cost += addr_cost->addr_scale_costs.ti;
4828             break;
4829         }
4830     }
4831
4832   return cost;
4833 }
4834
4835 /* Calculate the cost of calculating X, storing it in *COST.  Result
4836    is true if the total cost of the operation has now been calculated.  */
4837 static bool
4838 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4839                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4840 {
4841   rtx op0, op1;
4842   const struct cpu_cost_table *extra_cost
4843     = aarch64_tune_params->insn_extra_cost;
4844   enum machine_mode mode = GET_MODE (x);
4845
4846   /* By default, assume that everything has equivalent cost to the
4847      cheapest instruction.  Any additional costs are applied as a delta
4848      above this default.  */
4849   *cost = COSTS_N_INSNS (1);
4850
4851   /* TODO: The cost infrastructure currently does not handle
4852      vector operations.  Assume that all vector operations
4853      are equally expensive.  */
4854   if (VECTOR_MODE_P (mode))
4855     {
4856       if (speed)
4857         *cost += extra_cost->vect.alu;
4858       return true;
4859     }
4860
4861   switch (code)
4862     {
4863     case SET:
4864       /* The cost depends entirely on the operands to SET.  */
4865       *cost = 0;
4866       op0 = SET_DEST (x);
4867       op1 = SET_SRC (x);
4868
4869       switch (GET_CODE (op0))
4870         {
4871         case MEM:
4872           if (speed)
4873             *cost += extra_cost->ldst.store;
4874
4875           *cost += rtx_cost (op1, SET, 1, speed);
4876           return true;
4877
4878         case SUBREG:
4879           if (! REG_P (SUBREG_REG (op0)))
4880             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4881
4882           /* Fall through.  */
4883         case REG:
4884           /* const0_rtx is in general free, but we will use an
4885              instruction to set a register to 0.  */
4886           if (REG_P (op1) || op1 == const0_rtx)
4887             {
4888               /* The cost is 1 per register copied.  */
4889               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4890                               / UNITS_PER_WORD;
4891               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4892             }
4893           else
4894             /* Cost is just the cost of the RHS of the set.  */
4895             *cost += rtx_cost (op1, SET, 1, speed);
4896           return true;
4897
4898         case ZERO_EXTRACT:
4899         case SIGN_EXTRACT:
4900           /* Bit-field insertion.  Strip any redundant widening of
4901              the RHS to meet the width of the target.  */
4902           if (GET_CODE (op1) == SUBREG)
4903             op1 = SUBREG_REG (op1);
4904           if ((GET_CODE (op1) == ZERO_EXTEND
4905                || GET_CODE (op1) == SIGN_EXTEND)
4906               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4907               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4908                   >= INTVAL (XEXP (op0, 1))))
4909             op1 = XEXP (op1, 0);
4910
4911           if (CONST_INT_P (op1))
4912             {
4913               /* MOV immediate is assumed to always be cheap.  */
4914               *cost = COSTS_N_INSNS (1);
4915             }
4916           else
4917             {
4918               /* BFM.  */
4919               if (speed)
4920                 *cost += extra_cost->alu.bfi;
4921               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4922             }
4923
4924           return true;
4925
4926         default:
4927           /* We can't make sense of this, assume default cost.  */
4928           *cost = COSTS_N_INSNS (1);
4929           break;
4930         }
4931       return false;
4932
4933     case CONST_INT:
4934       /* If an instruction can incorporate a constant within the
4935          instruction, the instruction's expression avoids calling
4936          rtx_cost() on the constant.  If rtx_cost() is called on a
4937          constant, then it is usually because the constant must be
4938          moved into a register by one or more instructions.
4939
4940          The exception is constant 0, which can be expressed
4941          as XZR/WZR and is therefore free.  The exception to this is
4942          if we have (set (reg) (const0_rtx)) in which case we must cost
4943          the move.  However, we can catch that when we cost the SET, so
4944          we don't need to consider that here.  */
4945       if (x == const0_rtx)
4946         *cost = 0;
4947       else
4948         {
4949           /* To an approximation, building any other constant is
4950              proportionally expensive to the number of instructions
4951              required to build that constant.  This is true whether we
4952              are compiling for SPEED or otherwise.  */
4953           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4954                                                          INTVAL (x),
4955                                                          false));
4956         }
4957       return true;
4958
4959     case CONST_DOUBLE:
4960       if (speed)
4961         {
4962           /* mov[df,sf]_aarch64.  */
4963           if (aarch64_float_const_representable_p (x))
4964             /* FMOV (scalar immediate).  */
4965             *cost += extra_cost->fp[mode == DFmode].fpconst;
4966           else if (!aarch64_float_const_zero_rtx_p (x))
4967             {
4968               /* This will be a load from memory.  */
4969               if (mode == DFmode)
4970                 *cost += extra_cost->ldst.loadd;
4971               else
4972                 *cost += extra_cost->ldst.loadf;
4973             }
4974           else
4975             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
4976                or MOV v0.s[0], wzr - neither of which are modeled by the
4977                cost tables.  Just use the default cost.  */
4978             {
4979             }
4980         }
4981
4982       return true;
4983
4984     case MEM:
4985       if (speed)
4986         *cost += extra_cost->ldst.load;
4987
4988       return true;
4989
4990     case NEG:
4991       op0 = XEXP (x, 0);
4992
4993       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4994        {
4995           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4996               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4997             {
4998               /* CSETM.  */
4999               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5000               return true;
5001             }
5002
5003           /* Cost this as SUB wzr, X.  */
5004           op0 = CONST0_RTX (GET_MODE (x));
5005           op1 = XEXP (x, 0);
5006           goto cost_minus;
5007         }
5008
5009       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5010         {
5011           /* Support (neg(fma...)) as a single instruction only if
5012              sign of zeros is unimportant.  This matches the decision
5013              making in aarch64.md.  */
5014           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5015             {
5016               /* FNMADD.  */
5017               *cost = rtx_cost (op0, NEG, 0, speed);
5018               return true;
5019             }
5020           if (speed)
5021             /* FNEG.  */
5022             *cost += extra_cost->fp[mode == DFmode].neg;
5023           return false;
5024         }
5025
5026       return false;
5027
5028     case COMPARE:
5029       op0 = XEXP (x, 0);
5030       op1 = XEXP (x, 1);
5031
5032       if (op1 == const0_rtx
5033           && GET_CODE (op0) == AND)
5034         {
5035           x = op0;
5036           goto cost_logic;
5037         }
5038
5039       /* Comparisons can work if the order is swapped.
5040          Canonicalization puts the more complex operation first, but
5041          we want it in op1.  */
5042       if (! (REG_P (op0)
5043              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5044         {
5045           op0 = XEXP (x, 1);
5046           op1 = XEXP (x, 0);
5047         }
5048       goto cost_minus;
5049
5050     case MINUS:
5051       {
5052         op0 = XEXP (x, 0);
5053         op1 = XEXP (x, 1);
5054
5055 cost_minus:
5056         /* Detect valid immediates.  */
5057         if ((GET_MODE_CLASS (mode) == MODE_INT
5058              || (GET_MODE_CLASS (mode) == MODE_CC
5059                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5060             && CONST_INT_P (op1)
5061             && aarch64_uimm12_shift (INTVAL (op1)))
5062           {
5063             *cost += rtx_cost (op0, MINUS, 0, speed);
5064
5065             if (speed)
5066               /* SUB(S) (immediate).  */
5067               *cost += extra_cost->alu.arith;
5068             return true;
5069
5070           }
5071
5072         rtx new_op1 = aarch64_strip_extend (op1);
5073
5074         /* Cost this as an FMA-alike operation.  */
5075         if ((GET_CODE (new_op1) == MULT
5076              || GET_CODE (new_op1) == ASHIFT)
5077             && code != COMPARE)
5078           {
5079             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5080                                             (enum rtx_code) code,
5081                                             speed);
5082             *cost += rtx_cost (op0, MINUS, 0, speed);
5083             return true;
5084           }
5085
5086         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5087
5088         if (speed)
5089           {
5090             if (GET_MODE_CLASS (mode) == MODE_INT)
5091               /* SUB(S).  */
5092               *cost += extra_cost->alu.arith;
5093             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5094               /* FSUB.  */
5095               *cost += extra_cost->fp[mode == DFmode].addsub;
5096           }
5097         return true;
5098       }
5099
5100     case PLUS:
5101       {
5102         rtx new_op0;
5103
5104         op0 = XEXP (x, 0);
5105         op1 = XEXP (x, 1);
5106
5107         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5108             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5109           {
5110             /* CSINC.  */
5111             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5112             *cost += rtx_cost (op1, PLUS, 1, speed);
5113             return true;
5114           }
5115
5116         if (GET_MODE_CLASS (mode) == MODE_INT
5117             && CONST_INT_P (op1)
5118             && aarch64_uimm12_shift (INTVAL (op1)))
5119           {
5120             *cost += rtx_cost (op0, PLUS, 0, speed);
5121
5122             if (speed)
5123               /* ADD (immediate).  */
5124               *cost += extra_cost->alu.arith;
5125             return true;
5126           }
5127
5128         /* Strip any extend, leave shifts behind as we will
5129            cost them through mult_cost.  */
5130         new_op0 = aarch64_strip_extend (op0);
5131
5132         if (GET_CODE (new_op0) == MULT
5133             || GET_CODE (new_op0) == ASHIFT)
5134           {
5135             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5136                                             speed);
5137             *cost += rtx_cost (op1, PLUS, 1, speed);
5138             return true;
5139           }
5140
5141         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5142                   + rtx_cost (op1, PLUS, 1, speed));
5143
5144         if (speed)
5145           {
5146             if (GET_MODE_CLASS (mode) == MODE_INT)
5147               /* ADD.  */
5148               *cost += extra_cost->alu.arith;
5149             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5150               /* FADD.  */
5151               *cost += extra_cost->fp[mode == DFmode].addsub;
5152           }
5153         return true;
5154       }
5155
5156     case BSWAP:
5157       *cost = COSTS_N_INSNS (1);
5158
5159       if (speed)
5160         *cost += extra_cost->alu.rev;
5161
5162       return false;
5163
5164     case IOR:
5165       if (aarch_rev16_p (x))
5166         {
5167           *cost = COSTS_N_INSNS (1);
5168
5169           if (speed)
5170             *cost += extra_cost->alu.rev;
5171
5172           return true;
5173         }
5174     /* Fall through.  */
5175     case XOR:
5176     case AND:
5177     cost_logic:
5178       op0 = XEXP (x, 0);
5179       op1 = XEXP (x, 1);
5180
5181       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5182         {
5183           if (CONST_INT_P (op1)
5184               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5185             {
5186               *cost += rtx_cost (op0, AND, 0, speed);
5187             }
5188           else
5189             {
5190               if (GET_CODE (op0) == NOT)
5191                 op0 = XEXP (op0, 0);
5192               op0 = aarch64_strip_shift (op0);
5193               *cost += (rtx_cost (op0, AND, 0, speed)
5194                         + rtx_cost (op1, AND, 1, speed));
5195             }
5196           return true;
5197         }
5198       return false;
5199
5200     case ZERO_EXTEND:
5201       if ((GET_MODE (x) == DImode
5202            && GET_MODE (XEXP (x, 0)) == SImode)
5203           || GET_CODE (XEXP (x, 0)) == MEM)
5204         {
5205           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5206           return true;
5207         }
5208       return false;
5209
5210     case SIGN_EXTEND:
5211       if (GET_CODE (XEXP (x, 0)) == MEM)
5212         {
5213           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
5214           return true;
5215         }
5216       return false;
5217
5218     case ROTATE:
5219       if (!CONST_INT_P (XEXP (x, 1)))
5220         *cost += COSTS_N_INSNS (2);
5221       /* Fall through.  */
5222     case ROTATERT:
5223     case LSHIFTRT:
5224     case ASHIFT:
5225     case ASHIFTRT:
5226
5227       /* Shifting by a register often takes an extra cycle.  */
5228       if (speed && !CONST_INT_P (XEXP (x, 1)))
5229         *cost += extra_cost->alu.arith_shift_reg;
5230
5231       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5232       return true;
5233
5234     case HIGH:
5235       if (!CONSTANT_P (XEXP (x, 0)))
5236         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5237       return true;
5238
5239     case LO_SUM:
5240       if (!CONSTANT_P (XEXP (x, 1)))
5241         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5242       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5243       return true;
5244
5245     case ZERO_EXTRACT:
5246     case SIGN_EXTRACT:
5247       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5248       return true;
5249
5250     case MULT:
5251       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5252       /* aarch64_rtx_mult_cost always handles recursion to its
5253          operands.  */
5254       return true;
5255
5256     case MOD:
5257     case UMOD:
5258       *cost = COSTS_N_INSNS (2);
5259       if (speed)
5260         {
5261           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5262             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5263                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5264           else if (GET_MODE (x) == DFmode)
5265             *cost += (extra_cost->fp[1].mult
5266                       + extra_cost->fp[1].div);
5267           else if (GET_MODE (x) == SFmode)
5268             *cost += (extra_cost->fp[0].mult
5269                       + extra_cost->fp[0].div);
5270         }
5271       return false;  /* All arguments need to be in registers.  */
5272
5273     case DIV:
5274     case UDIV:
5275       *cost = COSTS_N_INSNS (1);
5276       if (speed)
5277         {
5278           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5279             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5280           else if (GET_MODE (x) == DFmode)
5281             *cost += extra_cost->fp[1].div;
5282           else if (GET_MODE (x) == SFmode)
5283             *cost += extra_cost->fp[0].div;
5284         }
5285       return false;  /* All arguments need to be in registers.  */
5286
5287     default:
5288       break;
5289     }
5290   return false;
5291 }
5292
5293 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5294    calculated for X.  This cost is stored in *COST.  Returns true
5295    if the total cost of X was calculated.  */
5296 static bool
5297 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5298                    int param, int *cost, bool speed)
5299 {
5300   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5301
5302   if (dump_file && (dump_flags & TDF_DETAILS))
5303     {
5304       print_rtl_single (dump_file, x);
5305       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5306                speed ? "Hot" : "Cold",
5307                *cost, result ? "final" : "partial");
5308     }
5309
5310   return result;
5311 }
5312
5313 static int
5314 aarch64_register_move_cost (enum machine_mode mode,
5315                             reg_class_t from_i, reg_class_t to_i)
5316 {
5317   enum reg_class from = (enum reg_class) from_i;
5318   enum reg_class to = (enum reg_class) to_i;
5319   const struct cpu_regmove_cost *regmove_cost
5320     = aarch64_tune_params->regmove_cost;
5321
5322   /* Moving between GPR and stack cost is the same as GP2GP.  */
5323   if ((from == GENERAL_REGS && to == STACK_REG)
5324       || (to == GENERAL_REGS && from == STACK_REG))
5325     return regmove_cost->GP2GP;
5326
5327   /* To/From the stack register, we move via the gprs.  */
5328   if (to == STACK_REG || from == STACK_REG)
5329     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5330             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5331
5332   if (from == GENERAL_REGS && to == GENERAL_REGS)
5333     return regmove_cost->GP2GP;
5334   else if (from == GENERAL_REGS)
5335     return regmove_cost->GP2FP;
5336   else if (to == GENERAL_REGS)
5337     return regmove_cost->FP2GP;
5338
5339   /* When AdvSIMD instructions are disabled it is not possible to move
5340      a 128-bit value directly between Q registers.  This is handled in
5341      secondary reload.  A general register is used as a scratch to move
5342      the upper DI value and the lower DI value is moved directly,
5343      hence the cost is the sum of three moves. */
5344   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5345     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5346
5347   return regmove_cost->FP2FP;
5348 }
5349
5350 static int
5351 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5352                           reg_class_t rclass ATTRIBUTE_UNUSED,
5353                           bool in ATTRIBUTE_UNUSED)
5354 {
5355   return aarch64_tune_params->memmov_cost;
5356 }
5357
5358 /* Return the number of instructions that can be issued per cycle.  */
5359 static int
5360 aarch64_sched_issue_rate (void)
5361 {
5362   return aarch64_tune_params->issue_rate;
5363 }
5364
5365 /* Vectorizer cost model target hooks.  */
5366
5367 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5368 static int
5369 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5370                                     tree vectype,
5371                                     int misalign ATTRIBUTE_UNUSED)
5372 {
5373   unsigned elements;
5374
5375   switch (type_of_cost)
5376     {
5377       case scalar_stmt:
5378         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5379
5380       case scalar_load:
5381         return aarch64_tune_params->vec_costs->scalar_load_cost;
5382
5383       case scalar_store:
5384         return aarch64_tune_params->vec_costs->scalar_store_cost;
5385
5386       case vector_stmt:
5387         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5388
5389       case vector_load:
5390         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5391
5392       case vector_store:
5393         return aarch64_tune_params->vec_costs->vec_store_cost;
5394
5395       case vec_to_scalar:
5396         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5397
5398       case scalar_to_vec:
5399         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5400
5401       case unaligned_load:
5402         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5403
5404       case unaligned_store:
5405         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5406
5407       case cond_branch_taken:
5408         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5409
5410       case cond_branch_not_taken:
5411         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5412
5413       case vec_perm:
5414       case vec_promote_demote:
5415         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5416
5417       case vec_construct:
5418         elements = TYPE_VECTOR_SUBPARTS (vectype);
5419         return elements / 2 + 1;
5420
5421       default:
5422         gcc_unreachable ();
5423     }
5424 }
5425
5426 /* Implement targetm.vectorize.add_stmt_cost.  */
5427 static unsigned
5428 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5429                        struct _stmt_vec_info *stmt_info, int misalign,
5430                        enum vect_cost_model_location where)
5431 {
5432   unsigned *cost = (unsigned *) data;
5433   unsigned retval = 0;
5434
5435   if (flag_vect_cost_model)
5436     {
5437       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5438       int stmt_cost =
5439             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5440
5441       /* Statements in an inner loop relative to the loop being
5442          vectorized are weighted more heavily.  The value here is
5443          a function (linear for now) of the loop nest level.  */
5444       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5445         {
5446           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5447           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5448           unsigned nest_level = loop_depth (loop);
5449
5450           count *= nest_level;
5451         }
5452
5453       retval = (unsigned) (count * stmt_cost);
5454       cost[where] += retval;
5455     }
5456
5457   return retval;
5458 }
5459
5460 static void initialize_aarch64_code_model (void);
5461
5462 /* Parse the architecture extension string.  */
5463
5464 static void
5465 aarch64_parse_extension (char *str)
5466 {
5467   /* The extension string is parsed left to right.  */
5468   const struct aarch64_option_extension *opt = NULL;
5469
5470   /* Flag to say whether we are adding or removing an extension.  */
5471   int adding_ext = -1;
5472
5473   while (str != NULL && *str != 0)
5474     {
5475       char *ext;
5476       size_t len;
5477
5478       str++;
5479       ext = strchr (str, '+');
5480
5481       if (ext != NULL)
5482         len = ext - str;
5483       else
5484         len = strlen (str);
5485
5486       if (len >= 2 && strncmp (str, "no", 2) == 0)
5487         {
5488           adding_ext = 0;
5489           len -= 2;
5490           str += 2;
5491         }
5492       else if (len > 0)
5493         adding_ext = 1;
5494
5495       if (len == 0)
5496         {
5497           error ("missing feature modifier after %qs", "+no");
5498           return;
5499         }
5500
5501       /* Scan over the extensions table trying to find an exact match.  */
5502       for (opt = all_extensions; opt->name != NULL; opt++)
5503         {
5504           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5505             {
5506               /* Add or remove the extension.  */
5507               if (adding_ext)
5508                 aarch64_isa_flags |= opt->flags_on;
5509               else
5510                 aarch64_isa_flags &= ~(opt->flags_off);
5511               break;
5512             }
5513         }
5514
5515       if (opt->name == NULL)
5516         {
5517           /* Extension not found in list.  */
5518           error ("unknown feature modifier %qs", str);
5519           return;
5520         }
5521
5522       str = ext;
5523     };
5524
5525   return;
5526 }
5527
5528 /* Parse the ARCH string.  */
5529
5530 static void
5531 aarch64_parse_arch (void)
5532 {
5533   char *ext;
5534   const struct processor *arch;
5535   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5536   size_t len;
5537
5538   strcpy (str, aarch64_arch_string);
5539
5540   ext = strchr (str, '+');
5541
5542   if (ext != NULL)
5543     len = ext - str;
5544   else
5545     len = strlen (str);
5546
5547   if (len == 0)
5548     {
5549       error ("missing arch name in -march=%qs", str);
5550       return;
5551     }
5552
5553   /* Loop through the list of supported ARCHs to find a match.  */
5554   for (arch = all_architectures; arch->name != NULL; arch++)
5555     {
5556       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5557         {
5558           selected_arch = arch;
5559           aarch64_isa_flags = selected_arch->flags;
5560
5561           if (!selected_cpu)
5562             selected_cpu = &all_cores[selected_arch->core];
5563
5564           if (ext != NULL)
5565             {
5566               /* ARCH string contains at least one extension.  */
5567               aarch64_parse_extension (ext);
5568             }
5569
5570           if (strcmp (selected_arch->arch, selected_cpu->arch))
5571             {
5572               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5573                        selected_cpu->name, selected_arch->name);
5574             }
5575
5576           return;
5577         }
5578     }
5579
5580   /* ARCH name not found in list.  */
5581   error ("unknown value %qs for -march", str);
5582   return;
5583 }
5584
5585 /* Parse the CPU string.  */
5586
5587 static void
5588 aarch64_parse_cpu (void)
5589 {
5590   char *ext;
5591   const struct processor *cpu;
5592   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5593   size_t len;
5594
5595   strcpy (str, aarch64_cpu_string);
5596
5597   ext = strchr (str, '+');
5598
5599   if (ext != NULL)
5600     len = ext - str;
5601   else
5602     len = strlen (str);
5603
5604   if (len == 0)
5605     {
5606       error ("missing cpu name in -mcpu=%qs", str);
5607       return;
5608     }
5609
5610   /* Loop through the list of supported CPUs to find a match.  */
5611   for (cpu = all_cores; cpu->name != NULL; cpu++)
5612     {
5613       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5614         {
5615           selected_cpu = cpu;
5616           selected_tune = cpu;
5617           aarch64_isa_flags = selected_cpu->flags;
5618
5619           if (ext != NULL)
5620             {
5621               /* CPU string contains at least one extension.  */
5622               aarch64_parse_extension (ext);
5623             }
5624
5625           return;
5626         }
5627     }
5628
5629   /* CPU name not found in list.  */
5630   error ("unknown value %qs for -mcpu", str);
5631   return;
5632 }
5633
5634 /* Parse the TUNE string.  */
5635
5636 static void
5637 aarch64_parse_tune (void)
5638 {
5639   const struct processor *cpu;
5640   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5641   strcpy (str, aarch64_tune_string);
5642
5643   /* Loop through the list of supported CPUs to find a match.  */
5644   for (cpu = all_cores; cpu->name != NULL; cpu++)
5645     {
5646       if (strcmp (cpu->name, str) == 0)
5647         {
5648           selected_tune = cpu;
5649           return;
5650         }
5651     }
5652
5653   /* CPU name not found in list.  */
5654   error ("unknown value %qs for -mtune", str);
5655   return;
5656 }
5657
5658
5659 /* Implement TARGET_OPTION_OVERRIDE.  */
5660
5661 static void
5662 aarch64_override_options (void)
5663 {
5664   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5665      If either of -march or -mtune is given, they override their
5666      respective component of -mcpu.
5667
5668      So, first parse AARCH64_CPU_STRING, then the others, be careful
5669      with -march as, if -mcpu is not present on the command line, march
5670      must set a sensible default CPU.  */
5671   if (aarch64_cpu_string)
5672     {
5673       aarch64_parse_cpu ();
5674     }
5675
5676   if (aarch64_arch_string)
5677     {
5678       aarch64_parse_arch ();
5679     }
5680
5681   if (aarch64_tune_string)
5682     {
5683       aarch64_parse_tune ();
5684     }
5685
5686 #ifndef HAVE_AS_MABI_OPTION
5687   /* The compiler may have been configured with 2.23.* binutils, which does
5688      not have support for ILP32.  */
5689   if (TARGET_ILP32)
5690     error ("Assembler does not support -mabi=ilp32");
5691 #endif
5692
5693   initialize_aarch64_code_model ();
5694
5695   aarch64_build_bitmask_table ();
5696
5697   /* This target defaults to strict volatile bitfields.  */
5698   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5699     flag_strict_volatile_bitfields = 1;
5700
5701   /* If the user did not specify a processor, choose the default
5702      one for them.  This will be the CPU set during configuration using
5703      --with-cpu, otherwise it is "generic".  */
5704   if (!selected_cpu)
5705     {
5706       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5707       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5708     }
5709
5710   gcc_assert (selected_cpu);
5711
5712   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5713   if (!selected_tune)
5714     selected_tune = &all_cores[selected_cpu->core];
5715
5716   aarch64_tune_flags = selected_tune->flags;
5717   aarch64_tune = selected_tune->core;
5718   aarch64_tune_params = selected_tune->tune;
5719
5720   aarch64_override_options_after_change ();
5721 }
5722
5723 /* Implement targetm.override_options_after_change.  */
5724
5725 static void
5726 aarch64_override_options_after_change (void)
5727 {
5728   if (flag_omit_frame_pointer)
5729     flag_omit_leaf_frame_pointer = false;
5730   else if (flag_omit_leaf_frame_pointer)
5731     flag_omit_frame_pointer = true;
5732 }
5733
5734 static struct machine_function *
5735 aarch64_init_machine_status (void)
5736 {
5737   struct machine_function *machine;
5738   machine = ggc_alloc_cleared_machine_function ();
5739   return machine;
5740 }
5741
5742 void
5743 aarch64_init_expanders (void)
5744 {
5745   init_machine_status = aarch64_init_machine_status;
5746 }
5747
5748 /* A checking mechanism for the implementation of the various code models.  */
5749 static void
5750 initialize_aarch64_code_model (void)
5751 {
5752    if (flag_pic)
5753      {
5754        switch (aarch64_cmodel_var)
5755          {
5756          case AARCH64_CMODEL_TINY:
5757            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5758            break;
5759          case AARCH64_CMODEL_SMALL:
5760            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5761            break;
5762          case AARCH64_CMODEL_LARGE:
5763            sorry ("code model %qs with -f%s", "large",
5764                   flag_pic > 1 ? "PIC" : "pic");
5765          default:
5766            gcc_unreachable ();
5767          }
5768      }
5769    else
5770      aarch64_cmodel = aarch64_cmodel_var;
5771 }
5772
5773 /* Return true if SYMBOL_REF X binds locally.  */
5774
5775 static bool
5776 aarch64_symbol_binds_local_p (const_rtx x)
5777 {
5778   return (SYMBOL_REF_DECL (x)
5779           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5780           : SYMBOL_REF_LOCAL_P (x));
5781 }
5782
5783 /* Return true if SYMBOL_REF X is thread local */
5784 static bool
5785 aarch64_tls_symbol_p (rtx x)
5786 {
5787   if (! TARGET_HAVE_TLS)
5788     return false;
5789
5790   if (GET_CODE (x) != SYMBOL_REF)
5791     return false;
5792
5793   return SYMBOL_REF_TLS_MODEL (x) != 0;
5794 }
5795
5796 /* Classify a TLS symbol into one of the TLS kinds.  */
5797 enum aarch64_symbol_type
5798 aarch64_classify_tls_symbol (rtx x)
5799 {
5800   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5801
5802   switch (tls_kind)
5803     {
5804     case TLS_MODEL_GLOBAL_DYNAMIC:
5805     case TLS_MODEL_LOCAL_DYNAMIC:
5806       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5807
5808     case TLS_MODEL_INITIAL_EXEC:
5809       return SYMBOL_SMALL_GOTTPREL;
5810
5811     case TLS_MODEL_LOCAL_EXEC:
5812       return SYMBOL_SMALL_TPREL;
5813
5814     case TLS_MODEL_EMULATED:
5815     case TLS_MODEL_NONE:
5816       return SYMBOL_FORCE_TO_MEM;
5817
5818     default:
5819       gcc_unreachable ();
5820     }
5821 }
5822
5823 /* Return the method that should be used to access SYMBOL_REF or
5824    LABEL_REF X in context CONTEXT.  */
5825
5826 enum aarch64_symbol_type
5827 aarch64_classify_symbol (rtx x,
5828                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5829 {
5830   if (GET_CODE (x) == LABEL_REF)
5831     {
5832       switch (aarch64_cmodel)
5833         {
5834         case AARCH64_CMODEL_LARGE:
5835           return SYMBOL_FORCE_TO_MEM;
5836
5837         case AARCH64_CMODEL_TINY_PIC:
5838         case AARCH64_CMODEL_TINY:
5839           return SYMBOL_TINY_ABSOLUTE;
5840
5841         case AARCH64_CMODEL_SMALL_PIC:
5842         case AARCH64_CMODEL_SMALL:
5843           return SYMBOL_SMALL_ABSOLUTE;
5844
5845         default:
5846           gcc_unreachable ();
5847         }
5848     }
5849
5850   if (GET_CODE (x) == SYMBOL_REF)
5851     {
5852       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5853           return SYMBOL_FORCE_TO_MEM;
5854
5855       if (aarch64_tls_symbol_p (x))
5856         return aarch64_classify_tls_symbol (x);
5857
5858       switch (aarch64_cmodel)
5859         {
5860         case AARCH64_CMODEL_TINY:
5861           if (SYMBOL_REF_WEAK (x))
5862             return SYMBOL_FORCE_TO_MEM;
5863           return SYMBOL_TINY_ABSOLUTE;
5864
5865         case AARCH64_CMODEL_SMALL:
5866           if (SYMBOL_REF_WEAK (x))
5867             return SYMBOL_FORCE_TO_MEM;
5868           return SYMBOL_SMALL_ABSOLUTE;
5869
5870         case AARCH64_CMODEL_TINY_PIC:
5871           if (!aarch64_symbol_binds_local_p (x))
5872             return SYMBOL_TINY_GOT;
5873           return SYMBOL_TINY_ABSOLUTE;
5874
5875         case AARCH64_CMODEL_SMALL_PIC:
5876           if (!aarch64_symbol_binds_local_p (x))
5877             return SYMBOL_SMALL_GOT;
5878           return SYMBOL_SMALL_ABSOLUTE;
5879
5880         default:
5881           gcc_unreachable ();
5882         }
5883     }
5884
5885   /* By default push everything into the constant pool.  */
5886   return SYMBOL_FORCE_TO_MEM;
5887 }
5888
5889 bool
5890 aarch64_constant_address_p (rtx x)
5891 {
5892   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5893 }
5894
5895 bool
5896 aarch64_legitimate_pic_operand_p (rtx x)
5897 {
5898   if (GET_CODE (x) == SYMBOL_REF
5899       || (GET_CODE (x) == CONST
5900           && GET_CODE (XEXP (x, 0)) == PLUS
5901           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5902      return false;
5903
5904   return true;
5905 }
5906
5907 /* Return true if X holds either a quarter-precision or
5908      floating-point +0.0 constant.  */
5909 static bool
5910 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5911 {
5912   if (!CONST_DOUBLE_P (x))
5913     return false;
5914
5915   /* TODO: We could handle moving 0.0 to a TFmode register,
5916      but first we would like to refactor the movtf_aarch64
5917      to be more amicable to split moves properly and
5918      correctly gate on TARGET_SIMD.  For now - reject all
5919      constants which are not to SFmode or DFmode registers.  */
5920   if (!(mode == SFmode || mode == DFmode))
5921     return false;
5922
5923   if (aarch64_float_const_zero_rtx_p (x))
5924     return true;
5925   return aarch64_float_const_representable_p (x);
5926 }
5927
5928 static bool
5929 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5930 {
5931   /* Do not allow vector struct mode constants.  We could support
5932      0 and -1 easily, but they need support in aarch64-simd.md.  */
5933   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5934     return false;
5935
5936   /* This could probably go away because
5937      we now decompose CONST_INTs according to expand_mov_immediate.  */
5938   if ((GET_CODE (x) == CONST_VECTOR
5939        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5940       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5941         return !targetm.cannot_force_const_mem (mode, x);
5942
5943   if (GET_CODE (x) == HIGH
5944       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5945     return true;
5946
5947   return aarch64_constant_address_p (x);
5948 }
5949
5950 rtx
5951 aarch64_load_tp (rtx target)
5952 {
5953   if (!target
5954       || GET_MODE (target) != Pmode
5955       || !register_operand (target, Pmode))
5956     target = gen_reg_rtx (Pmode);
5957
5958   /* Can return in any reg.  */
5959   emit_insn (gen_aarch64_load_tp_hard (target));
5960   return target;
5961 }
5962
5963 /* On AAPCS systems, this is the "struct __va_list".  */
5964 static GTY(()) tree va_list_type;
5965
5966 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5967    Return the type to use as __builtin_va_list.
5968
5969    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5970
5971    struct __va_list
5972    {
5973      void *__stack;
5974      void *__gr_top;
5975      void *__vr_top;
5976      int   __gr_offs;
5977      int   __vr_offs;
5978    };  */
5979
5980 static tree
5981 aarch64_build_builtin_va_list (void)
5982 {
5983   tree va_list_name;
5984   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5985
5986   /* Create the type.  */
5987   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5988   /* Give it the required name.  */
5989   va_list_name = build_decl (BUILTINS_LOCATION,
5990                              TYPE_DECL,
5991                              get_identifier ("__va_list"),
5992                              va_list_type);
5993   DECL_ARTIFICIAL (va_list_name) = 1;
5994   TYPE_NAME (va_list_type) = va_list_name;
5995   TYPE_STUB_DECL (va_list_type) = va_list_name;
5996
5997   /* Create the fields.  */
5998   f_stack = build_decl (BUILTINS_LOCATION,
5999                         FIELD_DECL, get_identifier ("__stack"),
6000                         ptr_type_node);
6001   f_grtop = build_decl (BUILTINS_LOCATION,
6002                         FIELD_DECL, get_identifier ("__gr_top"),
6003                         ptr_type_node);
6004   f_vrtop = build_decl (BUILTINS_LOCATION,
6005                         FIELD_DECL, get_identifier ("__vr_top"),
6006                         ptr_type_node);
6007   f_groff = build_decl (BUILTINS_LOCATION,
6008                         FIELD_DECL, get_identifier ("__gr_offs"),
6009                         integer_type_node);
6010   f_vroff = build_decl (BUILTINS_LOCATION,
6011                         FIELD_DECL, get_identifier ("__vr_offs"),
6012                         integer_type_node);
6013
6014   DECL_ARTIFICIAL (f_stack) = 1;
6015   DECL_ARTIFICIAL (f_grtop) = 1;
6016   DECL_ARTIFICIAL (f_vrtop) = 1;
6017   DECL_ARTIFICIAL (f_groff) = 1;
6018   DECL_ARTIFICIAL (f_vroff) = 1;
6019
6020   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6021   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6022   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6023   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6024   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6025
6026   TYPE_FIELDS (va_list_type) = f_stack;
6027   DECL_CHAIN (f_stack) = f_grtop;
6028   DECL_CHAIN (f_grtop) = f_vrtop;
6029   DECL_CHAIN (f_vrtop) = f_groff;
6030   DECL_CHAIN (f_groff) = f_vroff;
6031
6032   /* Compute its layout.  */
6033   layout_type (va_list_type);
6034
6035   return va_list_type;
6036 }
6037
6038 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6039 static void
6040 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6041 {
6042   const CUMULATIVE_ARGS *cum;
6043   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6044   tree stack, grtop, vrtop, groff, vroff;
6045   tree t;
6046   int gr_save_area_size;
6047   int vr_save_area_size;
6048   int vr_offset;
6049
6050   cum = &crtl->args.info;
6051   gr_save_area_size
6052     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6053   vr_save_area_size
6054     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6055
6056   if (TARGET_GENERAL_REGS_ONLY)
6057     {
6058       if (cum->aapcs_nvrn > 0)
6059         sorry ("%qs and floating point or vector arguments",
6060                "-mgeneral-regs-only");
6061       vr_save_area_size = 0;
6062     }
6063
6064   f_stack = TYPE_FIELDS (va_list_type_node);
6065   f_grtop = DECL_CHAIN (f_stack);
6066   f_vrtop = DECL_CHAIN (f_grtop);
6067   f_groff = DECL_CHAIN (f_vrtop);
6068   f_vroff = DECL_CHAIN (f_groff);
6069
6070   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6071                   NULL_TREE);
6072   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6073                   NULL_TREE);
6074   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6075                   NULL_TREE);
6076   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6077                   NULL_TREE);
6078   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6079                   NULL_TREE);
6080
6081   /* Emit code to initialize STACK, which points to the next varargs stack
6082      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6083      by named arguments.  STACK is 8-byte aligned.  */
6084   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6085   if (cum->aapcs_stack_size > 0)
6086     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6087   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6088   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6089
6090   /* Emit code to initialize GRTOP, the top of the GR save area.
6091      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6092   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6093   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6094   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6095
6096   /* Emit code to initialize VRTOP, the top of the VR save area.
6097      This address is gr_save_area_bytes below GRTOP, rounded
6098      down to the next 16-byte boundary.  */
6099   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6100   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6101                              STACK_BOUNDARY / BITS_PER_UNIT);
6102
6103   if (vr_offset)
6104     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6105   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6106   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6107
6108   /* Emit code to initialize GROFF, the offset from GRTOP of the
6109      next GPR argument.  */
6110   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6111               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6112   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6113
6114   /* Likewise emit code to initialize VROFF, the offset from FTOP
6115      of the next VR argument.  */
6116   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6117               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6118   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6119 }
6120
6121 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6122
6123 static tree
6124 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6125                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6126 {
6127   tree addr;
6128   bool indirect_p;
6129   bool is_ha;           /* is HFA or HVA.  */
6130   bool dw_align;        /* double-word align.  */
6131   enum machine_mode ag_mode = VOIDmode;
6132   int nregs;
6133   enum machine_mode mode;
6134
6135   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6136   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6137   HOST_WIDE_INT size, rsize, adjust, align;
6138   tree t, u, cond1, cond2;
6139
6140   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6141   if (indirect_p)
6142     type = build_pointer_type (type);
6143
6144   mode = TYPE_MODE (type);
6145
6146   f_stack = TYPE_FIELDS (va_list_type_node);
6147   f_grtop = DECL_CHAIN (f_stack);
6148   f_vrtop = DECL_CHAIN (f_grtop);
6149   f_groff = DECL_CHAIN (f_vrtop);
6150   f_vroff = DECL_CHAIN (f_groff);
6151
6152   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6153                   f_stack, NULL_TREE);
6154   size = int_size_in_bytes (type);
6155   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6156
6157   dw_align = false;
6158   adjust = 0;
6159   if (aarch64_vfp_is_call_or_return_candidate (mode,
6160                                                type,
6161                                                &ag_mode,
6162                                                &nregs,
6163                                                &is_ha))
6164     {
6165       /* TYPE passed in fp/simd registers.  */
6166       if (TARGET_GENERAL_REGS_ONLY)
6167         sorry ("%qs and floating point or vector arguments",
6168                "-mgeneral-regs-only");
6169
6170       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6171                       unshare_expr (valist), f_vrtop, NULL_TREE);
6172       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6173                       unshare_expr (valist), f_vroff, NULL_TREE);
6174
6175       rsize = nregs * UNITS_PER_VREG;
6176
6177       if (is_ha)
6178         {
6179           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6180             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6181         }
6182       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6183                && size < UNITS_PER_VREG)
6184         {
6185           adjust = UNITS_PER_VREG - size;
6186         }
6187     }
6188   else
6189     {
6190       /* TYPE passed in general registers.  */
6191       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6192                       unshare_expr (valist), f_grtop, NULL_TREE);
6193       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6194                       unshare_expr (valist), f_groff, NULL_TREE);
6195       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6196       nregs = rsize / UNITS_PER_WORD;
6197
6198       if (align > 8)
6199         dw_align = true;
6200
6201       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6202           && size < UNITS_PER_WORD)
6203         {
6204           adjust = UNITS_PER_WORD  - size;
6205         }
6206     }
6207
6208   /* Get a local temporary for the field value.  */
6209   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6210
6211   /* Emit code to branch if off >= 0.  */
6212   t = build2 (GE_EXPR, boolean_type_node, off,
6213               build_int_cst (TREE_TYPE (off), 0));
6214   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6215
6216   if (dw_align)
6217     {
6218       /* Emit: offs = (offs + 15) & -16.  */
6219       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6220                   build_int_cst (TREE_TYPE (off), 15));
6221       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6222                   build_int_cst (TREE_TYPE (off), -16));
6223       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6224     }
6225   else
6226     roundup = NULL;
6227
6228   /* Update ap.__[g|v]r_offs  */
6229   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6230               build_int_cst (TREE_TYPE (off), rsize));
6231   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6232
6233   /* String up.  */
6234   if (roundup)
6235     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6236
6237   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6238   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6239               build_int_cst (TREE_TYPE (f_off), 0));
6240   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6241
6242   /* String up: make sure the assignment happens before the use.  */
6243   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6244   COND_EXPR_ELSE (cond1) = t;
6245
6246   /* Prepare the trees handling the argument that is passed on the stack;
6247      the top level node will store in ON_STACK.  */
6248   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6249   if (align > 8)
6250     {
6251       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6252       t = fold_convert (intDI_type_node, arg);
6253       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6254                   build_int_cst (TREE_TYPE (t), 15));
6255       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6256                   build_int_cst (TREE_TYPE (t), -16));
6257       t = fold_convert (TREE_TYPE (arg), t);
6258       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6259     }
6260   else
6261     roundup = NULL;
6262   /* Advance ap.__stack  */
6263   t = fold_convert (intDI_type_node, arg);
6264   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6265               build_int_cst (TREE_TYPE (t), size + 7));
6266   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6267               build_int_cst (TREE_TYPE (t), -8));
6268   t = fold_convert (TREE_TYPE (arg), t);
6269   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6270   /* String up roundup and advance.  */
6271   if (roundup)
6272     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6273   /* String up with arg */
6274   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6275   /* Big-endianness related address adjustment.  */
6276   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6277       && size < UNITS_PER_WORD)
6278   {
6279     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6280                 size_int (UNITS_PER_WORD - size));
6281     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6282   }
6283
6284   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6285   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6286
6287   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6288   t = off;
6289   if (adjust)
6290     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6291                 build_int_cst (TREE_TYPE (off), adjust));
6292
6293   t = fold_convert (sizetype, t);
6294   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6295
6296   if (is_ha)
6297     {
6298       /* type ha; // treat as "struct {ftype field[n];}"
6299          ... [computing offs]
6300          for (i = 0; i <nregs; ++i, offs += 16)
6301            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6302          return ha;  */
6303       int i;
6304       tree tmp_ha, field_t, field_ptr_t;
6305
6306       /* Declare a local variable.  */
6307       tmp_ha = create_tmp_var_raw (type, "ha");
6308       gimple_add_tmp_var (tmp_ha);
6309
6310       /* Establish the base type.  */
6311       switch (ag_mode)
6312         {
6313         case SFmode:
6314           field_t = float_type_node;
6315           field_ptr_t = float_ptr_type_node;
6316           break;
6317         case DFmode:
6318           field_t = double_type_node;
6319           field_ptr_t = double_ptr_type_node;
6320           break;
6321         case TFmode:
6322           field_t = long_double_type_node;
6323           field_ptr_t = long_double_ptr_type_node;
6324           break;
6325 /* The half precision and quad precision are not fully supported yet.  Enable
6326    the following code after the support is complete.  Need to find the correct
6327    type node for __fp16 *.  */
6328 #if 0
6329         case HFmode:
6330           field_t = float_type_node;
6331           field_ptr_t = float_ptr_type_node;
6332           break;
6333 #endif
6334         case V2SImode:
6335         case V4SImode:
6336             {
6337               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6338               field_t = build_vector_type_for_mode (innertype, ag_mode);
6339               field_ptr_t = build_pointer_type (field_t);
6340             }
6341           break;
6342         default:
6343           gcc_assert (0);
6344         }
6345
6346       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6347       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6348       addr = t;
6349       t = fold_convert (field_ptr_t, addr);
6350       t = build2 (MODIFY_EXPR, field_t,
6351                   build1 (INDIRECT_REF, field_t, tmp_ha),
6352                   build1 (INDIRECT_REF, field_t, t));
6353
6354       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6355       for (i = 1; i < nregs; ++i)
6356         {
6357           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6358           u = fold_convert (field_ptr_t, addr);
6359           u = build2 (MODIFY_EXPR, field_t,
6360                       build2 (MEM_REF, field_t, tmp_ha,
6361                               build_int_cst (field_ptr_t,
6362                                              (i *
6363                                               int_size_in_bytes (field_t)))),
6364                       build1 (INDIRECT_REF, field_t, u));
6365           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6366         }
6367
6368       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6369       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6370     }
6371
6372   COND_EXPR_ELSE (cond2) = t;
6373   addr = fold_convert (build_pointer_type (type), cond1);
6374   addr = build_va_arg_indirect_ref (addr);
6375
6376   if (indirect_p)
6377     addr = build_va_arg_indirect_ref (addr);
6378
6379   return addr;
6380 }
6381
6382 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6383
6384 static void
6385 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6386                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6387                                 int no_rtl)
6388 {
6389   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6390   CUMULATIVE_ARGS local_cum;
6391   int gr_saved, vr_saved;
6392
6393   /* The caller has advanced CUM up to, but not beyond, the last named
6394      argument.  Advance a local copy of CUM past the last "real" named
6395      argument, to find out how many registers are left over.  */
6396   local_cum = *cum;
6397   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6398
6399   /* Found out how many registers we need to save.  */
6400   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6401   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6402
6403   if (TARGET_GENERAL_REGS_ONLY)
6404     {
6405       if (local_cum.aapcs_nvrn > 0)
6406         sorry ("%qs and floating point or vector arguments",
6407                "-mgeneral-regs-only");
6408       vr_saved = 0;
6409     }
6410
6411   if (!no_rtl)
6412     {
6413       if (gr_saved > 0)
6414         {
6415           rtx ptr, mem;
6416
6417           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6418           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6419                                - gr_saved * UNITS_PER_WORD);
6420           mem = gen_frame_mem (BLKmode, ptr);
6421           set_mem_alias_set (mem, get_varargs_alias_set ());
6422
6423           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6424                                mem, gr_saved);
6425         }
6426       if (vr_saved > 0)
6427         {
6428           /* We can't use move_block_from_reg, because it will use
6429              the wrong mode, storing D regs only.  */
6430           enum machine_mode mode = TImode;
6431           int off, i;
6432
6433           /* Set OFF to the offset from virtual_incoming_args_rtx of
6434              the first vector register.  The VR save area lies below
6435              the GR one, and is aligned to 16 bytes.  */
6436           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6437                                    STACK_BOUNDARY / BITS_PER_UNIT);
6438           off -= vr_saved * UNITS_PER_VREG;
6439
6440           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6441             {
6442               rtx ptr, mem;
6443
6444               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6445               mem = gen_frame_mem (mode, ptr);
6446               set_mem_alias_set (mem, get_varargs_alias_set ());
6447               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6448               off += UNITS_PER_VREG;
6449             }
6450         }
6451     }
6452
6453   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6454      any complication of having crtl->args.pretend_args_size changed.  */
6455   cfun->machine->saved_varargs_size
6456     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6457                       STACK_BOUNDARY / BITS_PER_UNIT)
6458        + vr_saved * UNITS_PER_VREG);
6459 }
6460
6461 static void
6462 aarch64_conditional_register_usage (void)
6463 {
6464   int i;
6465   if (!TARGET_FLOAT)
6466     {
6467       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6468         {
6469           fixed_regs[i] = 1;
6470           call_used_regs[i] = 1;
6471         }
6472     }
6473 }
6474
6475 /* Walk down the type tree of TYPE counting consecutive base elements.
6476    If *MODEP is VOIDmode, then set it to the first valid floating point
6477    type.  If a non-floating point type is found, or if a floating point
6478    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6479    otherwise return the count in the sub-tree.  */
6480 static int
6481 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6482 {
6483   enum machine_mode mode;
6484   HOST_WIDE_INT size;
6485
6486   switch (TREE_CODE (type))
6487     {
6488     case REAL_TYPE:
6489       mode = TYPE_MODE (type);
6490       if (mode != DFmode && mode != SFmode && mode != TFmode)
6491         return -1;
6492
6493       if (*modep == VOIDmode)
6494         *modep = mode;
6495
6496       if (*modep == mode)
6497         return 1;
6498
6499       break;
6500
6501     case COMPLEX_TYPE:
6502       mode = TYPE_MODE (TREE_TYPE (type));
6503       if (mode != DFmode && mode != SFmode && mode != TFmode)
6504         return -1;
6505
6506       if (*modep == VOIDmode)
6507         *modep = mode;
6508
6509       if (*modep == mode)
6510         return 2;
6511
6512       break;
6513
6514     case VECTOR_TYPE:
6515       /* Use V2SImode and V4SImode as representatives of all 64-bit
6516          and 128-bit vector types.  */
6517       size = int_size_in_bytes (type);
6518       switch (size)
6519         {
6520         case 8:
6521           mode = V2SImode;
6522           break;
6523         case 16:
6524           mode = V4SImode;
6525           break;
6526         default:
6527           return -1;
6528         }
6529
6530       if (*modep == VOIDmode)
6531         *modep = mode;
6532
6533       /* Vector modes are considered to be opaque: two vectors are
6534          equivalent for the purposes of being homogeneous aggregates
6535          if they are the same size.  */
6536       if (*modep == mode)
6537         return 1;
6538
6539       break;
6540
6541     case ARRAY_TYPE:
6542       {
6543         int count;
6544         tree index = TYPE_DOMAIN (type);
6545
6546         /* Can't handle incomplete types nor sizes that are not
6547            fixed.  */
6548         if (!COMPLETE_TYPE_P (type)
6549             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6550           return -1;
6551
6552         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6553         if (count == -1
6554             || !index
6555             || !TYPE_MAX_VALUE (index)
6556             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6557             || !TYPE_MIN_VALUE (index)
6558             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6559             || count < 0)
6560           return -1;
6561
6562         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6563                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6564
6565         /* There must be no padding.  */
6566         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6567           return -1;
6568
6569         return count;
6570       }
6571
6572     case RECORD_TYPE:
6573       {
6574         int count = 0;
6575         int sub_count;
6576         tree field;
6577
6578         /* Can't handle incomplete types nor sizes that are not
6579            fixed.  */
6580         if (!COMPLETE_TYPE_P (type)
6581             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6582           return -1;
6583
6584         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6585           {
6586             if (TREE_CODE (field) != FIELD_DECL)
6587               continue;
6588
6589             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6590             if (sub_count < 0)
6591               return -1;
6592             count += sub_count;
6593           }
6594
6595         /* There must be no padding.  */
6596         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6597           return -1;
6598
6599         return count;
6600       }
6601
6602     case UNION_TYPE:
6603     case QUAL_UNION_TYPE:
6604       {
6605         /* These aren't very interesting except in a degenerate case.  */
6606         int count = 0;
6607         int sub_count;
6608         tree field;
6609
6610         /* Can't handle incomplete types nor sizes that are not
6611            fixed.  */
6612         if (!COMPLETE_TYPE_P (type)
6613             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6614           return -1;
6615
6616         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6617           {
6618             if (TREE_CODE (field) != FIELD_DECL)
6619               continue;
6620
6621             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6622             if (sub_count < 0)
6623               return -1;
6624             count = count > sub_count ? count : sub_count;
6625           }
6626
6627         /* There must be no padding.  */
6628         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6629           return -1;
6630
6631         return count;
6632       }
6633
6634     default:
6635       break;
6636     }
6637
6638   return -1;
6639 }
6640
6641 /* Return true if we use LRA instead of reload pass.  */
6642 static bool
6643 aarch64_lra_p (void)
6644 {
6645   return aarch64_lra_flag;
6646 }
6647
6648 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6649    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6650    array types.  The C99 floating-point complex types are also considered
6651    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6652    types, which are GCC extensions and out of the scope of AAPCS64, are
6653    treated as composite types here as well.
6654
6655    Note that MODE itself is not sufficient in determining whether a type
6656    is such a composite type or not.  This is because
6657    stor-layout.c:compute_record_mode may have already changed the MODE
6658    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6659    structure with only one field may have its MODE set to the mode of the
6660    field.  Also an integer mode whose size matches the size of the
6661    RECORD_TYPE type may be used to substitute the original mode
6662    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6663    solely relied on.  */
6664
6665 static bool
6666 aarch64_composite_type_p (const_tree type,
6667                           enum machine_mode mode)
6668 {
6669   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6670     return true;
6671
6672   if (mode == BLKmode
6673       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6674       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6675     return true;
6676
6677   return false;
6678 }
6679
6680 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6681    type as described in AAPCS64 \S 4.1.2.
6682
6683    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6684
6685 static bool
6686 aarch64_short_vector_p (const_tree type,
6687                         enum machine_mode mode)
6688 {
6689   HOST_WIDE_INT size = -1;
6690
6691   if (type && TREE_CODE (type) == VECTOR_TYPE)
6692     size = int_size_in_bytes (type);
6693   else if (!aarch64_composite_type_p (type, mode)
6694            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6695                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6696     size = GET_MODE_SIZE (mode);
6697
6698   return (size == 8 || size == 16) ? true : false;
6699 }
6700
6701 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6702    shall be passed or returned in simd/fp register(s) (providing these
6703    parameter passing registers are available).
6704
6705    Upon successful return, *COUNT returns the number of needed registers,
6706    *BASE_MODE returns the mode of the individual register and when IS_HAF
6707    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6708    floating-point aggregate or a homogeneous short-vector aggregate.  */
6709
6710 static bool
6711 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6712                                          const_tree type,
6713                                          enum machine_mode *base_mode,
6714                                          int *count,
6715                                          bool *is_ha)
6716 {
6717   enum machine_mode new_mode = VOIDmode;
6718   bool composite_p = aarch64_composite_type_p (type, mode);
6719
6720   if (is_ha != NULL) *is_ha = false;
6721
6722   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6723       || aarch64_short_vector_p (type, mode))
6724     {
6725       *count = 1;
6726       new_mode = mode;
6727     }
6728   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6729     {
6730       if (is_ha != NULL) *is_ha = true;
6731       *count = 2;
6732       new_mode = GET_MODE_INNER (mode);
6733     }
6734   else if (type && composite_p)
6735     {
6736       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6737
6738       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6739         {
6740           if (is_ha != NULL) *is_ha = true;
6741           *count = ag_count;
6742         }
6743       else
6744         return false;
6745     }
6746   else
6747     return false;
6748
6749   *base_mode = new_mode;
6750   return true;
6751 }
6752
6753 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6754
6755 static rtx
6756 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6757                           int incoming ATTRIBUTE_UNUSED)
6758 {
6759   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6760 }
6761
6762 /* Implements target hook vector_mode_supported_p.  */
6763 static bool
6764 aarch64_vector_mode_supported_p (enum machine_mode mode)
6765 {
6766   if (TARGET_SIMD
6767       && (mode == V4SImode  || mode == V8HImode
6768           || mode == V16QImode || mode == V2DImode
6769           || mode == V2SImode  || mode == V4HImode
6770           || mode == V8QImode || mode == V2SFmode
6771           || mode == V4SFmode || mode == V2DFmode))
6772     return true;
6773
6774   return false;
6775 }
6776
6777 /* Return appropriate SIMD container
6778    for MODE within a vector of WIDTH bits.  */
6779 static enum machine_mode
6780 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6781 {
6782   gcc_assert (width == 64 || width == 128);
6783   if (TARGET_SIMD)
6784     {
6785       if (width == 128)
6786         switch (mode)
6787           {
6788           case DFmode:
6789             return V2DFmode;
6790           case SFmode:
6791             return V4SFmode;
6792           case SImode:
6793             return V4SImode;
6794           case HImode:
6795             return V8HImode;
6796           case QImode:
6797             return V16QImode;
6798           case DImode:
6799             return V2DImode;
6800           default:
6801             break;
6802           }
6803       else
6804         switch (mode)
6805           {
6806           case SFmode:
6807             return V2SFmode;
6808           case SImode:
6809             return V2SImode;
6810           case HImode:
6811             return V4HImode;
6812           case QImode:
6813             return V8QImode;
6814           default:
6815             break;
6816           }
6817     }
6818   return word_mode;
6819 }
6820
6821 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6822 static enum machine_mode
6823 aarch64_preferred_simd_mode (enum machine_mode mode)
6824 {
6825   return aarch64_simd_container_mode (mode, 128);
6826 }
6827
6828 /* Return the bitmask of possible vector sizes for the vectorizer
6829    to iterate over.  */
6830 static unsigned int
6831 aarch64_autovectorize_vector_sizes (void)
6832 {
6833   return (16 | 8);
6834 }
6835
6836 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6837    vector types in order to conform to the AAPCS64 (see "Procedure
6838    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6839    qualify for emission with the mangled names defined in that document,
6840    a vector type must not only be of the correct mode but also be
6841    composed of AdvSIMD vector element types (e.g.
6842    _builtin_aarch64_simd_qi); these types are registered by
6843    aarch64_init_simd_builtins ().  In other words, vector types defined
6844    in other ways e.g. via vector_size attribute will get default
6845    mangled names.  */
6846 typedef struct
6847 {
6848   enum machine_mode mode;
6849   const char *element_type_name;
6850   const char *mangled_name;
6851 } aarch64_simd_mangle_map_entry;
6852
6853 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6854   /* 64-bit containerized types.  */
6855   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6856   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6857   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6858   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6859   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6860   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6861   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6862   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6863   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6864   /* 128-bit containerized types.  */
6865   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6866   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6867   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6868   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6869   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6870   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6871   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6872   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6873   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6874   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6875   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6876   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6877   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6878   { VOIDmode, NULL, NULL }
6879 };
6880
6881 /* Implement TARGET_MANGLE_TYPE.  */
6882
6883 static const char *
6884 aarch64_mangle_type (const_tree type)
6885 {
6886   /* The AArch64 ABI documents say that "__va_list" has to be
6887      managled as if it is in the "std" namespace.  */
6888   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6889     return "St9__va_list";
6890
6891   /* Check the mode of the vector type, and the name of the vector
6892      element type, against the table.  */
6893   if (TREE_CODE (type) == VECTOR_TYPE)
6894     {
6895       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6896
6897       while (pos->mode != VOIDmode)
6898         {
6899           tree elt_type = TREE_TYPE (type);
6900
6901           if (pos->mode == TYPE_MODE (type)
6902               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6903               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6904                           pos->element_type_name))
6905             return pos->mangled_name;
6906
6907           pos++;
6908         }
6909     }
6910
6911   /* Use the default mangling.  */
6912   return NULL;
6913 }
6914
6915 /* Return the equivalent letter for size.  */
6916 static char
6917 sizetochar (int size)
6918 {
6919   switch (size)
6920     {
6921     case 64: return 'd';
6922     case 32: return 's';
6923     case 16: return 'h';
6924     case 8 : return 'b';
6925     default: gcc_unreachable ();
6926     }
6927 }
6928
6929 /* Return true iff x is a uniform vector of floating-point
6930    constants, and the constant can be represented in
6931    quarter-precision form.  Note, as aarch64_float_const_representable
6932    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6933 static bool
6934 aarch64_vect_float_const_representable_p (rtx x)
6935 {
6936   int i = 0;
6937   REAL_VALUE_TYPE r0, ri;
6938   rtx x0, xi;
6939
6940   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6941     return false;
6942
6943   x0 = CONST_VECTOR_ELT (x, 0);
6944   if (!CONST_DOUBLE_P (x0))
6945     return false;
6946
6947   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6948
6949   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6950     {
6951       xi = CONST_VECTOR_ELT (x, i);
6952       if (!CONST_DOUBLE_P (xi))
6953         return false;
6954
6955       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6956       if (!REAL_VALUES_EQUAL (r0, ri))
6957         return false;
6958     }
6959
6960   return aarch64_float_const_representable_p (x0);
6961 }
6962
6963 /* Return true for valid and false for invalid.  */
6964 bool
6965 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6966                               struct simd_immediate_info *info)
6967 {
6968 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6969   matches = 1;                                          \
6970   for (i = 0; i < idx; i += (STRIDE))                   \
6971     if (!(TEST))                                        \
6972       matches = 0;                                      \
6973   if (matches)                                          \
6974     {                                                   \
6975       immtype = (CLASS);                                \
6976       elsize = (ELSIZE);                                \
6977       eshift = (SHIFT);                                 \
6978       emvn = (NEG);                                     \
6979       break;                                            \
6980     }
6981
6982   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6983   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6984   unsigned char bytes[16];
6985   int immtype = -1, matches;
6986   unsigned int invmask = inverse ? 0xff : 0;
6987   int eshift, emvn;
6988
6989   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6990     {
6991       if (! (aarch64_simd_imm_zero_p (op, mode)
6992              || aarch64_vect_float_const_representable_p (op)))
6993         return false;
6994
6995       if (info)
6996         {
6997           info->value = CONST_VECTOR_ELT (op, 0);
6998           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6999           info->mvn = false;
7000           info->shift = 0;
7001         }
7002
7003       return true;
7004     }
7005
7006   /* Splat vector constant out into a byte vector.  */
7007   for (i = 0; i < n_elts; i++)
7008     {
7009       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7010          it must be laid out in the vector register in reverse order.  */
7011       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7012       unsigned HOST_WIDE_INT elpart;
7013       unsigned int part, parts;
7014
7015       if (GET_CODE (el) == CONST_INT)
7016         {
7017           elpart = INTVAL (el);
7018           parts = 1;
7019         }
7020       else if (GET_CODE (el) == CONST_DOUBLE)
7021         {
7022           elpart = CONST_DOUBLE_LOW (el);
7023           parts = 2;
7024         }
7025       else
7026         gcc_unreachable ();
7027
7028       for (part = 0; part < parts; part++)
7029         {
7030           unsigned int byte;
7031           for (byte = 0; byte < innersize; byte++)
7032             {
7033               bytes[idx++] = (elpart & 0xff) ^ invmask;
7034               elpart >>= BITS_PER_UNIT;
7035             }
7036           if (GET_CODE (el) == CONST_DOUBLE)
7037             elpart = CONST_DOUBLE_HIGH (el);
7038         }
7039     }
7040
7041   /* Sanity check.  */
7042   gcc_assert (idx == GET_MODE_SIZE (mode));
7043
7044   do
7045     {
7046       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7047              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7048
7049       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7050              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7051
7052       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7053              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7054
7055       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7056              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7057
7058       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7059
7060       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7061
7062       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7063              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7064
7065       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7066              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7067
7068       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7069              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7070
7071       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7072              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7073
7074       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7075
7076       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7077
7078       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7079              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7080
7081       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7082              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7083
7084       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7085              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7086
7087       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7088              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7089
7090       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7091
7092       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7093              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7094     }
7095   while (0);
7096
7097   if (immtype == -1)
7098     return false;
7099
7100   if (info)
7101     {
7102       info->element_width = elsize;
7103       info->mvn = emvn != 0;
7104       info->shift = eshift;
7105
7106       unsigned HOST_WIDE_INT imm = 0;
7107
7108       if (immtype >= 12 && immtype <= 15)
7109         info->msl = true;
7110
7111       /* Un-invert bytes of recognized vector, if necessary.  */
7112       if (invmask != 0)
7113         for (i = 0; i < idx; i++)
7114           bytes[i] ^= invmask;
7115
7116       if (immtype == 17)
7117         {
7118           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7119           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7120
7121           for (i = 0; i < 8; i++)
7122             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7123               << (i * BITS_PER_UNIT);
7124
7125
7126           info->value = GEN_INT (imm);
7127         }
7128       else
7129         {
7130           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7131             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7132
7133           /* Construct 'abcdefgh' because the assembler cannot handle
7134              generic constants.  */
7135           if (info->mvn)
7136             imm = ~imm;
7137           imm = (imm >> info->shift) & 0xff;
7138           info->value = GEN_INT (imm);
7139         }
7140     }
7141
7142   return true;
7143 #undef CHECK
7144 }
7145
7146 static bool
7147 aarch64_const_vec_all_same_int_p (rtx x,
7148                                   HOST_WIDE_INT minval,
7149                                   HOST_WIDE_INT maxval)
7150 {
7151   HOST_WIDE_INT firstval;
7152   int count, i;
7153
7154   if (GET_CODE (x) != CONST_VECTOR
7155       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7156     return false;
7157
7158   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7159   if (firstval < minval || firstval > maxval)
7160     return false;
7161
7162   count = CONST_VECTOR_NUNITS (x);
7163   for (i = 1; i < count; i++)
7164     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7165       return false;
7166
7167   return true;
7168 }
7169
7170 /* Check of immediate shift constants are within range.  */
7171 bool
7172 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7173 {
7174   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7175   if (left)
7176     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7177   else
7178     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7179 }
7180
7181 /* Return true if X is a uniform vector where all elements
7182    are either the floating-point constant 0.0 or the
7183    integer constant 0.  */
7184 bool
7185 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7186 {
7187   return x == CONST0_RTX (mode);
7188 }
7189
7190 bool
7191 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7192 {
7193   HOST_WIDE_INT imm = INTVAL (x);
7194   int i;
7195
7196   for (i = 0; i < 8; i++)
7197     {
7198       unsigned int byte = imm & 0xff;
7199       if (byte != 0xff && byte != 0)
7200        return false;
7201       imm >>= 8;
7202     }
7203
7204   return true;
7205 }
7206
7207 bool
7208 aarch64_mov_operand_p (rtx x,
7209                        enum aarch64_symbol_context context,
7210                        enum machine_mode mode)
7211 {
7212   if (GET_CODE (x) == HIGH
7213       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7214     return true;
7215
7216   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7217     return true;
7218
7219   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7220     return true;
7221
7222   return aarch64_classify_symbolic_expression (x, context)
7223     == SYMBOL_TINY_ABSOLUTE;
7224 }
7225
7226 /* Return a const_int vector of VAL.  */
7227 rtx
7228 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7229 {
7230   int nunits = GET_MODE_NUNITS (mode);
7231   rtvec v = rtvec_alloc (nunits);
7232   int i;
7233
7234   for (i=0; i < nunits; i++)
7235     RTVEC_ELT (v, i) = GEN_INT (val);
7236
7237   return gen_rtx_CONST_VECTOR (mode, v);
7238 }
7239
7240 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7241
7242 bool
7243 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7244 {
7245   enum machine_mode vmode;
7246
7247   gcc_assert (!VECTOR_MODE_P (mode));
7248   vmode = aarch64_preferred_simd_mode (mode);
7249   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7250   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7251 }
7252
7253 /* Construct and return a PARALLEL RTX vector.  */
7254 rtx
7255 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7256 {
7257   int nunits = GET_MODE_NUNITS (mode);
7258   rtvec v = rtvec_alloc (nunits / 2);
7259   int base = high ? nunits / 2 : 0;
7260   rtx t1;
7261   int i;
7262
7263   for (i=0; i < nunits / 2; i++)
7264     RTVEC_ELT (v, i) = GEN_INT (base + i);
7265
7266   t1 = gen_rtx_PARALLEL (mode, v);
7267   return t1;
7268 }
7269
7270 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7271    HIGH (exclusive).  */
7272 void
7273 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7274 {
7275   HOST_WIDE_INT lane;
7276   gcc_assert (GET_CODE (operand) == CONST_INT);
7277   lane = INTVAL (operand);
7278
7279   if (lane < low || lane >= high)
7280     error ("lane out of range");
7281 }
7282
7283 void
7284 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7285 {
7286   gcc_assert (GET_CODE (operand) == CONST_INT);
7287   HOST_WIDE_INT lane = INTVAL (operand);
7288
7289   if (lane < low || lane >= high)
7290     error ("constant out of range");
7291 }
7292
7293 /* Emit code to reinterpret one AdvSIMD type as another,
7294    without altering bits.  */
7295 void
7296 aarch64_simd_reinterpret (rtx dest, rtx src)
7297 {
7298   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7299 }
7300
7301 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7302    registers).  */
7303 void
7304 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7305                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7306                             rtx op1)
7307 {
7308   rtx mem = gen_rtx_MEM (mode, destaddr);
7309   rtx tmp1 = gen_reg_rtx (mode);
7310   rtx tmp2 = gen_reg_rtx (mode);
7311
7312   emit_insn (intfn (tmp1, op1, tmp2));
7313
7314   emit_move_insn (mem, tmp1);
7315   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7316   emit_move_insn (mem, tmp2);
7317 }
7318
7319 /* Return TRUE if OP is a valid vector addressing mode.  */
7320 bool
7321 aarch64_simd_mem_operand_p (rtx op)
7322 {
7323   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7324                         || GET_CODE (XEXP (op, 0)) == REG);
7325 }
7326
7327 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7328    not to early-clobber SRC registers in the process.
7329
7330    We assume that the operands described by SRC and DEST represent a
7331    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7332    number of components into which the copy has been decomposed.  */
7333 void
7334 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7335                                 rtx *src, unsigned int count)
7336 {
7337   unsigned int i;
7338
7339   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7340       || REGNO (operands[0]) < REGNO (operands[1]))
7341     {
7342       for (i = 0; i < count; i++)
7343         {
7344           operands[2 * i] = dest[i];
7345           operands[2 * i + 1] = src[i];
7346         }
7347     }
7348   else
7349     {
7350       for (i = 0; i < count; i++)
7351         {
7352           operands[2 * i] = dest[count - i - 1];
7353           operands[2 * i + 1] = src[count - i - 1];
7354         }
7355     }
7356 }
7357
7358 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7359    one of VSTRUCT modes: OI, CI or XI.  */
7360 int
7361 aarch64_simd_attr_length_move (rtx insn)
7362 {
7363   enum machine_mode mode;
7364
7365   extract_insn_cached (insn);
7366
7367   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7368     {
7369       mode = GET_MODE (recog_data.operand[0]);
7370       switch (mode)
7371         {
7372         case OImode:
7373           return 8;
7374         case CImode:
7375           return 12;
7376         case XImode:
7377           return 16;
7378         default:
7379           gcc_unreachable ();
7380         }
7381     }
7382   return 4;
7383 }
7384
7385 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7386    alignment of a vector to 128 bits.  */
7387 static HOST_WIDE_INT
7388 aarch64_simd_vector_alignment (const_tree type)
7389 {
7390   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7391   return MIN (align, 128);
7392 }
7393
7394 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7395 static bool
7396 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7397 {
7398   if (is_packed)
7399     return false;
7400
7401   /* We guarantee alignment for vectors up to 128-bits.  */
7402   if (tree_int_cst_compare (TYPE_SIZE (type),
7403                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7404     return false;
7405
7406   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7407   return true;
7408 }
7409
7410 /* If VALS is a vector constant that can be loaded into a register
7411    using DUP, generate instructions to do so and return an RTX to
7412    assign to the register.  Otherwise return NULL_RTX.  */
7413 static rtx
7414 aarch64_simd_dup_constant (rtx vals)
7415 {
7416   enum machine_mode mode = GET_MODE (vals);
7417   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7418   int n_elts = GET_MODE_NUNITS (mode);
7419   bool all_same = true;
7420   rtx x;
7421   int i;
7422
7423   if (GET_CODE (vals) != CONST_VECTOR)
7424     return NULL_RTX;
7425
7426   for (i = 1; i < n_elts; ++i)
7427     {
7428       x = CONST_VECTOR_ELT (vals, i);
7429       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7430         all_same = false;
7431     }
7432
7433   if (!all_same)
7434     return NULL_RTX;
7435
7436   /* We can load this constant by using DUP and a constant in a
7437      single ARM register.  This will be cheaper than a vector
7438      load.  */
7439   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7440   return gen_rtx_VEC_DUPLICATE (mode, x);
7441 }
7442
7443
7444 /* Generate code to load VALS, which is a PARALLEL containing only
7445    constants (for vec_init) or CONST_VECTOR, efficiently into a
7446    register.  Returns an RTX to copy into the register, or NULL_RTX
7447    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7448 static rtx
7449 aarch64_simd_make_constant (rtx vals)
7450 {
7451   enum machine_mode mode = GET_MODE (vals);
7452   rtx const_dup;
7453   rtx const_vec = NULL_RTX;
7454   int n_elts = GET_MODE_NUNITS (mode);
7455   int n_const = 0;
7456   int i;
7457
7458   if (GET_CODE (vals) == CONST_VECTOR)
7459     const_vec = vals;
7460   else if (GET_CODE (vals) == PARALLEL)
7461     {
7462       /* A CONST_VECTOR must contain only CONST_INTs and
7463          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7464          Only store valid constants in a CONST_VECTOR.  */
7465       for (i = 0; i < n_elts; ++i)
7466         {
7467           rtx x = XVECEXP (vals, 0, i);
7468           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7469             n_const++;
7470         }
7471       if (n_const == n_elts)
7472         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7473     }
7474   else
7475     gcc_unreachable ();
7476
7477   if (const_vec != NULL_RTX
7478       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7479     /* Load using MOVI/MVNI.  */
7480     return const_vec;
7481   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7482     /* Loaded using DUP.  */
7483     return const_dup;
7484   else if (const_vec != NULL_RTX)
7485     /* Load from constant pool. We can not take advantage of single-cycle
7486        LD1 because we need a PC-relative addressing mode.  */
7487     return const_vec;
7488   else
7489     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7490        We can not construct an initializer.  */
7491     return NULL_RTX;
7492 }
7493
7494 void
7495 aarch64_expand_vector_init (rtx target, rtx vals)
7496 {
7497   enum machine_mode mode = GET_MODE (target);
7498   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7499   int n_elts = GET_MODE_NUNITS (mode);
7500   int n_var = 0, one_var = -1;
7501   bool all_same = true;
7502   rtx x, mem;
7503   int i;
7504
7505   x = XVECEXP (vals, 0, 0);
7506   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7507     n_var = 1, one_var = 0;
7508
7509   for (i = 1; i < n_elts; ++i)
7510     {
7511       x = XVECEXP (vals, 0, i);
7512       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7513         ++n_var, one_var = i;
7514
7515       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7516         all_same = false;
7517     }
7518
7519   if (n_var == 0)
7520     {
7521       rtx constant = aarch64_simd_make_constant (vals);
7522       if (constant != NULL_RTX)
7523         {
7524           emit_move_insn (target, constant);
7525           return;
7526         }
7527     }
7528
7529   /* Splat a single non-constant element if we can.  */
7530   if (all_same)
7531     {
7532       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7533       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7534       return;
7535     }
7536
7537   /* One field is non-constant.  Load constant then overwrite varying
7538      field.  This is more efficient than using the stack.  */
7539   if (n_var == 1)
7540     {
7541       rtx copy = copy_rtx (vals);
7542       rtx index = GEN_INT (one_var);
7543       enum insn_code icode;
7544
7545       /* Load constant part of vector, substitute neighboring value for
7546          varying element.  */
7547       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7548       aarch64_expand_vector_init (target, copy);
7549
7550       /* Insert variable.  */
7551       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7552       icode = optab_handler (vec_set_optab, mode);
7553       gcc_assert (icode != CODE_FOR_nothing);
7554       emit_insn (GEN_FCN (icode) (target, x, index));
7555       return;
7556     }
7557
7558   /* Construct the vector in memory one field at a time
7559      and load the whole vector.  */
7560   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7561   for (i = 0; i < n_elts; i++)
7562     emit_move_insn (adjust_address_nv (mem, inner_mode,
7563                                     i * GET_MODE_SIZE (inner_mode)),
7564                     XVECEXP (vals, 0, i));
7565   emit_move_insn (target, mem);
7566
7567 }
7568
7569 static unsigned HOST_WIDE_INT
7570 aarch64_shift_truncation_mask (enum machine_mode mode)
7571 {
7572   return
7573     (aarch64_vector_mode_supported_p (mode)
7574      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7575 }
7576
7577 #ifndef TLS_SECTION_ASM_FLAG
7578 #define TLS_SECTION_ASM_FLAG 'T'
7579 #endif
7580
7581 void
7582 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7583                                tree decl ATTRIBUTE_UNUSED)
7584 {
7585   char flagchars[10], *f = flagchars;
7586
7587   /* If we have already declared this section, we can use an
7588      abbreviated form to switch back to it -- unless this section is
7589      part of a COMDAT groups, in which case GAS requires the full
7590      declaration every time.  */
7591   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7592       && (flags & SECTION_DECLARED))
7593     {
7594       fprintf (asm_out_file, "\t.section\t%s\n", name);
7595       return;
7596     }
7597
7598   if (!(flags & SECTION_DEBUG))
7599     *f++ = 'a';
7600   if (flags & SECTION_WRITE)
7601     *f++ = 'w';
7602   if (flags & SECTION_CODE)
7603     *f++ = 'x';
7604   if (flags & SECTION_SMALL)
7605     *f++ = 's';
7606   if (flags & SECTION_MERGE)
7607     *f++ = 'M';
7608   if (flags & SECTION_STRINGS)
7609     *f++ = 'S';
7610   if (flags & SECTION_TLS)
7611     *f++ = TLS_SECTION_ASM_FLAG;
7612   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7613     *f++ = 'G';
7614   *f = '\0';
7615
7616   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7617
7618   if (!(flags & SECTION_NOTYPE))
7619     {
7620       const char *type;
7621       const char *format;
7622
7623       if (flags & SECTION_BSS)
7624         type = "nobits";
7625       else
7626         type = "progbits";
7627
7628 #ifdef TYPE_OPERAND_FMT
7629       format = "," TYPE_OPERAND_FMT;
7630 #else
7631       format = ",@%s";
7632 #endif
7633
7634       fprintf (asm_out_file, format, type);
7635
7636       if (flags & SECTION_ENTSIZE)
7637         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7638       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7639         {
7640           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7641             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7642           else
7643             fprintf (asm_out_file, ",%s,comdat",
7644                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7645         }
7646     }
7647
7648   putc ('\n', asm_out_file);
7649 }
7650
7651 /* Select a format to encode pointers in exception handling data.  */
7652 int
7653 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7654 {
7655    int type;
7656    switch (aarch64_cmodel)
7657      {
7658      case AARCH64_CMODEL_TINY:
7659      case AARCH64_CMODEL_TINY_PIC:
7660      case AARCH64_CMODEL_SMALL:
7661      case AARCH64_CMODEL_SMALL_PIC:
7662        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7663           for everything.  */
7664        type = DW_EH_PE_sdata4;
7665        break;
7666      default:
7667        /* No assumptions here.  8-byte relocs required.  */
7668        type = DW_EH_PE_sdata8;
7669        break;
7670      }
7671    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7672 }
7673
7674 /* Emit load exclusive.  */
7675
7676 static void
7677 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7678                              rtx mem, rtx model_rtx)
7679 {
7680   rtx (*gen) (rtx, rtx, rtx);
7681
7682   switch (mode)
7683     {
7684     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7685     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7686     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7687     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7688     default:
7689       gcc_unreachable ();
7690     }
7691
7692   emit_insn (gen (rval, mem, model_rtx));
7693 }
7694
7695 /* Emit store exclusive.  */
7696
7697 static void
7698 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7699                               rtx rval, rtx mem, rtx model_rtx)
7700 {
7701   rtx (*gen) (rtx, rtx, rtx, rtx);
7702
7703   switch (mode)
7704     {
7705     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7706     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7707     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7708     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7709     default:
7710       gcc_unreachable ();
7711     }
7712
7713   emit_insn (gen (bval, rval, mem, model_rtx));
7714 }
7715
7716 /* Mark the previous jump instruction as unlikely.  */
7717
7718 static void
7719 aarch64_emit_unlikely_jump (rtx insn)
7720 {
7721   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7722
7723   insn = emit_jump_insn (insn);
7724   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7725 }
7726
7727 /* Expand a compare and swap pattern.  */
7728
7729 void
7730 aarch64_expand_compare_and_swap (rtx operands[])
7731 {
7732   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7733   enum machine_mode mode, cmp_mode;
7734   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7735
7736   bval = operands[0];
7737   rval = operands[1];
7738   mem = operands[2];
7739   oldval = operands[3];
7740   newval = operands[4];
7741   is_weak = operands[5];
7742   mod_s = operands[6];
7743   mod_f = operands[7];
7744   mode = GET_MODE (mem);
7745   cmp_mode = mode;
7746
7747   /* Normally the succ memory model must be stronger than fail, but in the
7748      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7749      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7750
7751   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7752       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7753     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7754
7755   switch (mode)
7756     {
7757     case QImode:
7758     case HImode:
7759       /* For short modes, we're going to perform the comparison in SImode,
7760          so do the zero-extension now.  */
7761       cmp_mode = SImode;
7762       rval = gen_reg_rtx (SImode);
7763       oldval = convert_modes (SImode, mode, oldval, true);
7764       /* Fall through.  */
7765
7766     case SImode:
7767     case DImode:
7768       /* Force the value into a register if needed.  */
7769       if (!aarch64_plus_operand (oldval, mode))
7770         oldval = force_reg (cmp_mode, oldval);
7771       break;
7772
7773     default:
7774       gcc_unreachable ();
7775     }
7776
7777   switch (mode)
7778     {
7779     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7780     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7781     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7782     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7783     default:
7784       gcc_unreachable ();
7785     }
7786
7787   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7788
7789   if (mode == QImode || mode == HImode)
7790     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7791
7792   x = gen_rtx_REG (CCmode, CC_REGNUM);
7793   x = gen_rtx_EQ (SImode, x, const0_rtx);
7794   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7795 }
7796
7797 /* Split a compare and swap pattern.  */
7798
7799 void
7800 aarch64_split_compare_and_swap (rtx operands[])
7801 {
7802   rtx rval, mem, oldval, newval, scratch;
7803   enum machine_mode mode;
7804   bool is_weak;
7805   rtx label1, label2, x, cond;
7806
7807   rval = operands[0];
7808   mem = operands[1];
7809   oldval = operands[2];
7810   newval = operands[3];
7811   is_weak = (operands[4] != const0_rtx);
7812   scratch = operands[7];
7813   mode = GET_MODE (mem);
7814
7815   label1 = NULL_RTX;
7816   if (!is_weak)
7817     {
7818       label1 = gen_label_rtx ();
7819       emit_label (label1);
7820     }
7821   label2 = gen_label_rtx ();
7822
7823   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7824
7825   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7826   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7827   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7828                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7829   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7830
7831   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7832
7833   if (!is_weak)
7834     {
7835       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7836       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7837                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7838       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7839     }
7840   else
7841     {
7842       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7843       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7844       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7845     }
7846
7847   emit_label (label2);
7848 }
7849
7850 /* Split an atomic operation.  */
7851
7852 void
7853 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7854                      rtx value, rtx model_rtx, rtx cond)
7855 {
7856   enum machine_mode mode = GET_MODE (mem);
7857   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7858   rtx label, x;
7859
7860   label = gen_label_rtx ();
7861   emit_label (label);
7862
7863   if (new_out)
7864     new_out = gen_lowpart (wmode, new_out);
7865   if (old_out)
7866     old_out = gen_lowpart (wmode, old_out);
7867   else
7868     old_out = new_out;
7869   value = simplify_gen_subreg (wmode, value, mode, 0);
7870
7871   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7872
7873   switch (code)
7874     {
7875     case SET:
7876       new_out = value;
7877       break;
7878
7879     case NOT:
7880       x = gen_rtx_AND (wmode, old_out, value);
7881       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7882       x = gen_rtx_NOT (wmode, new_out);
7883       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7884       break;
7885
7886     case MINUS:
7887       if (CONST_INT_P (value))
7888         {
7889           value = GEN_INT (-INTVAL (value));
7890           code = PLUS;
7891         }
7892       /* Fall through.  */
7893
7894     default:
7895       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7896       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7897       break;
7898     }
7899
7900   aarch64_emit_store_exclusive (mode, cond, mem,
7901                                 gen_lowpart (mode, new_out), model_rtx);
7902
7903   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7904   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7905                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7906   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7907 }
7908
7909 static void
7910 aarch64_print_extension (void)
7911 {
7912   const struct aarch64_option_extension *opt = NULL;
7913
7914   for (opt = all_extensions; opt->name != NULL; opt++)
7915     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7916       asm_fprintf (asm_out_file, "+%s", opt->name);
7917
7918   asm_fprintf (asm_out_file, "\n");
7919 }
7920
7921 static void
7922 aarch64_start_file (void)
7923 {
7924   if (selected_arch)
7925     {
7926       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7927       aarch64_print_extension ();
7928     }
7929   else if (selected_cpu)
7930     {
7931       const char *truncated_name
7932             = aarch64_rewrite_selected_cpu (selected_cpu->name);
7933       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7934       aarch64_print_extension ();
7935     }
7936   default_file_start();
7937 }
7938
7939 /* Target hook for c_mode_for_suffix.  */
7940 static enum machine_mode
7941 aarch64_c_mode_for_suffix (char suffix)
7942 {
7943   if (suffix == 'q')
7944     return TFmode;
7945
7946   return VOIDmode;
7947 }
7948
7949 /* We can only represent floating point constants which will fit in
7950    "quarter-precision" values.  These values are characterised by
7951    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7952    by:
7953
7954    (-1)^s * (n/16) * 2^r
7955
7956    Where:
7957      's' is the sign bit.
7958      'n' is an integer in the range 16 <= n <= 31.
7959      'r' is an integer in the range -3 <= r <= 4.  */
7960
7961 /* Return true iff X can be represented by a quarter-precision
7962    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7963 bool
7964 aarch64_float_const_representable_p (rtx x)
7965 {
7966   /* This represents our current view of how many bits
7967      make up the mantissa.  */
7968   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7969   int exponent;
7970   unsigned HOST_WIDE_INT mantissa, mask;
7971   REAL_VALUE_TYPE r, m;
7972   bool fail;
7973
7974   if (!CONST_DOUBLE_P (x))
7975     return false;
7976
7977   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7978
7979   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7980      know if we have +zero until we analyse the mantissa, but we
7981      can reject the other invalid values.  */
7982   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7983       || REAL_VALUE_MINUS_ZERO (r))
7984     return false;
7985
7986   /* Extract exponent.  */
7987   r = real_value_abs (&r);
7988   exponent = REAL_EXP (&r);
7989
7990   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7991      highest (sign) bit, with a fixed binary point at bit point_pos.
7992      m1 holds the low part of the mantissa, m2 the high part.
7993      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7994      bits for the mantissa, this can fail (low bits will be lost).  */
7995   real_ldexp (&m, &r, point_pos - exponent);
7996   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
7997
7998   /* If the low part of the mantissa has bits set we cannot represent
7999      the value.  */
8000   if (w.elt (0) != 0)
8001     return false;
8002   /* We have rejected the lower HOST_WIDE_INT, so update our
8003      understanding of how many bits lie in the mantissa and
8004      look only at the high HOST_WIDE_INT.  */
8005   mantissa = w.elt (1);
8006   point_pos -= HOST_BITS_PER_WIDE_INT;
8007
8008   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8009   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8010   if ((mantissa & mask) != 0)
8011     return false;
8012
8013   /* Having filtered unrepresentable values, we may now remove all
8014      but the highest 5 bits.  */
8015   mantissa >>= point_pos - 5;
8016
8017   /* We cannot represent the value 0.0, so reject it.  This is handled
8018      elsewhere.  */
8019   if (mantissa == 0)
8020     return false;
8021
8022   /* Then, as bit 4 is always set, we can mask it off, leaving
8023      the mantissa in the range [0, 15].  */
8024   mantissa &= ~(1 << 4);
8025   gcc_assert (mantissa <= 15);
8026
8027   /* GCC internally does not use IEEE754-like encoding (where normalized
8028      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8029      Our mantissa values are shifted 4 places to the left relative to
8030      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8031      by 5 places to correct for GCC's representation.  */
8032   exponent = 5 - exponent;
8033
8034   return (exponent >= 0 && exponent <= 7);
8035 }
8036
8037 char*
8038 aarch64_output_simd_mov_immediate (rtx const_vector,
8039                                    enum machine_mode mode,
8040                                    unsigned width)
8041 {
8042   bool is_valid;
8043   static char templ[40];
8044   const char *mnemonic;
8045   const char *shift_op;
8046   unsigned int lane_count = 0;
8047   char element_char;
8048
8049   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8050
8051   /* This will return true to show const_vector is legal for use as either
8052      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8053      also update INFO to show how the immediate should be generated.  */
8054   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8055   gcc_assert (is_valid);
8056
8057   element_char = sizetochar (info.element_width);
8058   lane_count = width / info.element_width;
8059
8060   mode = GET_MODE_INNER (mode);
8061   if (mode == SFmode || mode == DFmode)
8062     {
8063       gcc_assert (info.shift == 0 && ! info.mvn);
8064       if (aarch64_float_const_zero_rtx_p (info.value))
8065         info.value = GEN_INT (0);
8066       else
8067         {
8068 #define buf_size 20
8069           REAL_VALUE_TYPE r;
8070           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8071           char float_buf[buf_size] = {'\0'};
8072           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8073 #undef buf_size
8074
8075           if (lane_count == 1)
8076             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8077           else
8078             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8079                       lane_count, element_char, float_buf);
8080           return templ;
8081         }
8082     }
8083
8084   mnemonic = info.mvn ? "mvni" : "movi";
8085   shift_op = info.msl ? "msl" : "lsl";
8086
8087   if (lane_count == 1)
8088     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8089               mnemonic, UINTVAL (info.value));
8090   else if (info.shift)
8091     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8092               ", %s %d", mnemonic, lane_count, element_char,
8093               UINTVAL (info.value), shift_op, info.shift);
8094   else
8095     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8096               mnemonic, lane_count, element_char, UINTVAL (info.value));
8097   return templ;
8098 }
8099
8100 char*
8101 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8102                                           enum machine_mode mode)
8103 {
8104   enum machine_mode vmode;
8105
8106   gcc_assert (!VECTOR_MODE_P (mode));
8107   vmode = aarch64_simd_container_mode (mode, 64);
8108   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8109   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8110 }
8111
8112 /* Split operands into moves from op[1] + op[2] into op[0].  */
8113
8114 void
8115 aarch64_split_combinev16qi (rtx operands[3])
8116 {
8117   unsigned int dest = REGNO (operands[0]);
8118   unsigned int src1 = REGNO (operands[1]);
8119   unsigned int src2 = REGNO (operands[2]);
8120   enum machine_mode halfmode = GET_MODE (operands[1]);
8121   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8122   rtx destlo, desthi;
8123
8124   gcc_assert (halfmode == V16QImode);
8125
8126   if (src1 == dest && src2 == dest + halfregs)
8127     {
8128       /* No-op move.  Can't split to nothing; emit something.  */
8129       emit_note (NOTE_INSN_DELETED);
8130       return;
8131     }
8132
8133   /* Preserve register attributes for variable tracking.  */
8134   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8135   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8136                                GET_MODE_SIZE (halfmode));
8137
8138   /* Special case of reversed high/low parts.  */
8139   if (reg_overlap_mentioned_p (operands[2], destlo)
8140       && reg_overlap_mentioned_p (operands[1], desthi))
8141     {
8142       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8143       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8144       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8145     }
8146   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8147     {
8148       /* Try to avoid unnecessary moves if part of the result
8149          is in the right place already.  */
8150       if (src1 != dest)
8151         emit_move_insn (destlo, operands[1]);
8152       if (src2 != dest + halfregs)
8153         emit_move_insn (desthi, operands[2]);
8154     }
8155   else
8156     {
8157       if (src2 != dest + halfregs)
8158         emit_move_insn (desthi, operands[2]);
8159       if (src1 != dest)
8160         emit_move_insn (destlo, operands[1]);
8161     }
8162 }
8163
8164 /* vec_perm support.  */
8165
8166 #define MAX_VECT_LEN 16
8167
8168 struct expand_vec_perm_d
8169 {
8170   rtx target, op0, op1;
8171   unsigned char perm[MAX_VECT_LEN];
8172   enum machine_mode vmode;
8173   unsigned char nelt;
8174   bool one_vector_p;
8175   bool testing_p;
8176 };
8177
8178 /* Generate a variable permutation.  */
8179
8180 static void
8181 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8182 {
8183   enum machine_mode vmode = GET_MODE (target);
8184   bool one_vector_p = rtx_equal_p (op0, op1);
8185
8186   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8187   gcc_checking_assert (GET_MODE (op0) == vmode);
8188   gcc_checking_assert (GET_MODE (op1) == vmode);
8189   gcc_checking_assert (GET_MODE (sel) == vmode);
8190   gcc_checking_assert (TARGET_SIMD);
8191
8192   if (one_vector_p)
8193     {
8194       if (vmode == V8QImode)
8195         {
8196           /* Expand the argument to a V16QI mode by duplicating it.  */
8197           rtx pair = gen_reg_rtx (V16QImode);
8198           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8199           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8200         }
8201       else
8202         {
8203           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8204         }
8205     }
8206   else
8207     {
8208       rtx pair;
8209
8210       if (vmode == V8QImode)
8211         {
8212           pair = gen_reg_rtx (V16QImode);
8213           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8214           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8215         }
8216       else
8217         {
8218           pair = gen_reg_rtx (OImode);
8219           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8220           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8221         }
8222     }
8223 }
8224
8225 void
8226 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8227 {
8228   enum machine_mode vmode = GET_MODE (target);
8229   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8230   bool one_vector_p = rtx_equal_p (op0, op1);
8231   rtx rmask[MAX_VECT_LEN], mask;
8232
8233   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8234
8235   /* The TBL instruction does not use a modulo index, so we must take care
8236      of that ourselves.  */
8237   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8238   for (i = 0; i < nelt; ++i)
8239     rmask[i] = mask;
8240   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8241   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8242
8243   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8244 }
8245
8246 /* Recognize patterns suitable for the TRN instructions.  */
8247 static bool
8248 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8249 {
8250   unsigned int i, odd, mask, nelt = d->nelt;
8251   rtx out, in0, in1, x;
8252   rtx (*gen) (rtx, rtx, rtx);
8253   enum machine_mode vmode = d->vmode;
8254
8255   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8256     return false;
8257
8258   /* Note that these are little-endian tests.
8259      We correct for big-endian later.  */
8260   if (d->perm[0] == 0)
8261     odd = 0;
8262   else if (d->perm[0] == 1)
8263     odd = 1;
8264   else
8265     return false;
8266   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8267
8268   for (i = 0; i < nelt; i += 2)
8269     {
8270       if (d->perm[i] != i + odd)
8271         return false;
8272       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8273         return false;
8274     }
8275
8276   /* Success!  */
8277   if (d->testing_p)
8278     return true;
8279
8280   in0 = d->op0;
8281   in1 = d->op1;
8282   if (BYTES_BIG_ENDIAN)
8283     {
8284       x = in0, in0 = in1, in1 = x;
8285       odd = !odd;
8286     }
8287   out = d->target;
8288
8289   if (odd)
8290     {
8291       switch (vmode)
8292         {
8293         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8294         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8295         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8296         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8297         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8298         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8299         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8300         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8301         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8302         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8303         default:
8304           return false;
8305         }
8306     }
8307   else
8308     {
8309       switch (vmode)
8310         {
8311         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8312         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8313         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8314         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8315         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8316         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8317         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8318         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8319         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8320         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8321         default:
8322           return false;
8323         }
8324     }
8325
8326   emit_insn (gen (out, in0, in1));
8327   return true;
8328 }
8329
8330 /* Recognize patterns suitable for the UZP instructions.  */
8331 static bool
8332 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8333 {
8334   unsigned int i, odd, mask, nelt = d->nelt;
8335   rtx out, in0, in1, x;
8336   rtx (*gen) (rtx, rtx, rtx);
8337   enum machine_mode vmode = d->vmode;
8338
8339   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8340     return false;
8341
8342   /* Note that these are little-endian tests.
8343      We correct for big-endian later.  */
8344   if (d->perm[0] == 0)
8345     odd = 0;
8346   else if (d->perm[0] == 1)
8347     odd = 1;
8348   else
8349     return false;
8350   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8351
8352   for (i = 0; i < nelt; i++)
8353     {
8354       unsigned elt = (i * 2 + odd) & mask;
8355       if (d->perm[i] != elt)
8356         return false;
8357     }
8358
8359   /* Success!  */
8360   if (d->testing_p)
8361     return true;
8362
8363   in0 = d->op0;
8364   in1 = d->op1;
8365   if (BYTES_BIG_ENDIAN)
8366     {
8367       x = in0, in0 = in1, in1 = x;
8368       odd = !odd;
8369     }
8370   out = d->target;
8371
8372   if (odd)
8373     {
8374       switch (vmode)
8375         {
8376         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8377         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8378         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8379         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8380         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8381         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8382         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8383         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8384         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8385         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8386         default:
8387           return false;
8388         }
8389     }
8390   else
8391     {
8392       switch (vmode)
8393         {
8394         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8395         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8396         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8397         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8398         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8399         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8400         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8401         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8402         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8403         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8404         default:
8405           return false;
8406         }
8407     }
8408
8409   emit_insn (gen (out, in0, in1));
8410   return true;
8411 }
8412
8413 /* Recognize patterns suitable for the ZIP instructions.  */
8414 static bool
8415 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8416 {
8417   unsigned int i, high, mask, nelt = d->nelt;
8418   rtx out, in0, in1, x;
8419   rtx (*gen) (rtx, rtx, rtx);
8420   enum machine_mode vmode = d->vmode;
8421
8422   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8423     return false;
8424
8425   /* Note that these are little-endian tests.
8426      We correct for big-endian later.  */
8427   high = nelt / 2;
8428   if (d->perm[0] == high)
8429     /* Do Nothing.  */
8430     ;
8431   else if (d->perm[0] == 0)
8432     high = 0;
8433   else
8434     return false;
8435   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8436
8437   for (i = 0; i < nelt / 2; i++)
8438     {
8439       unsigned elt = (i + high) & mask;
8440       if (d->perm[i * 2] != elt)
8441         return false;
8442       elt = (elt + nelt) & mask;
8443       if (d->perm[i * 2 + 1] != elt)
8444         return false;
8445     }
8446
8447   /* Success!  */
8448   if (d->testing_p)
8449     return true;
8450
8451   in0 = d->op0;
8452   in1 = d->op1;
8453   if (BYTES_BIG_ENDIAN)
8454     {
8455       x = in0, in0 = in1, in1 = x;
8456       high = !high;
8457     }
8458   out = d->target;
8459
8460   if (high)
8461     {
8462       switch (vmode)
8463         {
8464         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8465         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8466         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8467         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8468         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8469         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8470         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8471         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8472         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8473         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8474         default:
8475           return false;
8476         }
8477     }
8478   else
8479     {
8480       switch (vmode)
8481         {
8482         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8483         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8484         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8485         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8486         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8487         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8488         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8489         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8490         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8491         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8492         default:
8493           return false;
8494         }
8495     }
8496
8497   emit_insn (gen (out, in0, in1));
8498   return true;
8499 }
8500
8501 static bool
8502 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8503 {
8504   rtx (*gen) (rtx, rtx, rtx);
8505   rtx out = d->target;
8506   rtx in0;
8507   enum machine_mode vmode = d->vmode;
8508   unsigned int i, elt, nelt = d->nelt;
8509   rtx lane;
8510
8511   /* TODO: This may not be big-endian safe.  */
8512   if (BYTES_BIG_ENDIAN)
8513     return false;
8514
8515   elt = d->perm[0];
8516   for (i = 1; i < nelt; i++)
8517     {
8518       if (elt != d->perm[i])
8519         return false;
8520     }
8521
8522   /* The generic preparation in aarch64_expand_vec_perm_const_1
8523      swaps the operand order and the permute indices if it finds
8524      d->perm[0] to be in the second operand.  Thus, we can always
8525      use d->op0 and need not do any extra arithmetic to get the
8526      correct lane number.  */
8527   in0 = d->op0;
8528   lane = GEN_INT (elt);
8529
8530   switch (vmode)
8531     {
8532     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8533     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8534     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8535     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8536     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8537     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8538     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8539     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8540     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8541     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8542     default:
8543       return false;
8544     }
8545
8546   emit_insn (gen (out, in0, lane));
8547   return true;
8548 }
8549
8550 static bool
8551 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8552 {
8553   rtx rperm[MAX_VECT_LEN], sel;
8554   enum machine_mode vmode = d->vmode;
8555   unsigned int i, nelt = d->nelt;
8556
8557   if (d->testing_p)
8558     return true;
8559
8560   /* Generic code will try constant permutation twice.  Once with the
8561      original mode and again with the elements lowered to QImode.
8562      So wait and don't do the selector expansion ourselves.  */
8563   if (vmode != V8QImode && vmode != V16QImode)
8564     return false;
8565
8566   for (i = 0; i < nelt; ++i)
8567     {
8568       int nunits = GET_MODE_NUNITS (vmode);
8569
8570       /* If big-endian and two vectors we end up with a weird mixed-endian
8571          mode on NEON.  Reverse the index within each word but not the word
8572          itself.  */
8573       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
8574                                            : d->perm[i]);
8575     }
8576   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8577   sel = force_reg (vmode, sel);
8578
8579   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8580   return true;
8581 }
8582
8583 static bool
8584 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8585 {
8586   /* The pattern matching functions above are written to look for a small
8587      number to begin the sequence (0, 1, N/2).  If we begin with an index
8588      from the second operand, we can swap the operands.  */
8589   if (d->perm[0] >= d->nelt)
8590     {
8591       unsigned i, nelt = d->nelt;
8592       rtx x;
8593
8594       gcc_assert (nelt == (nelt & -nelt));
8595       for (i = 0; i < nelt; ++i)
8596         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
8597
8598       x = d->op0;
8599       d->op0 = d->op1;
8600       d->op1 = x;
8601     }
8602
8603   if (TARGET_SIMD)
8604     {
8605       if (aarch64_evpc_zip (d))
8606         return true;
8607       else if (aarch64_evpc_uzp (d))
8608         return true;
8609       else if (aarch64_evpc_trn (d))
8610         return true;
8611       else if (aarch64_evpc_dup (d))
8612         return true;
8613       return aarch64_evpc_tbl (d);
8614     }
8615   return false;
8616 }
8617
8618 /* Expand a vec_perm_const pattern.  */
8619
8620 bool
8621 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8622 {
8623   struct expand_vec_perm_d d;
8624   int i, nelt, which;
8625
8626   d.target = target;
8627   d.op0 = op0;
8628   d.op1 = op1;
8629
8630   d.vmode = GET_MODE (target);
8631   gcc_assert (VECTOR_MODE_P (d.vmode));
8632   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8633   d.testing_p = false;
8634
8635   for (i = which = 0; i < nelt; ++i)
8636     {
8637       rtx e = XVECEXP (sel, 0, i);
8638       int ei = INTVAL (e) & (2 * nelt - 1);
8639       which |= (ei < nelt ? 1 : 2);
8640       d.perm[i] = ei;
8641     }
8642
8643   switch (which)
8644     {
8645     default:
8646       gcc_unreachable ();
8647
8648     case 3:
8649       d.one_vector_p = false;
8650       if (!rtx_equal_p (op0, op1))
8651         break;
8652
8653       /* The elements of PERM do not suggest that only the first operand
8654          is used, but both operands are identical.  Allow easier matching
8655          of the permutation by folding the permutation into the single
8656          input vector.  */
8657       /* Fall Through.  */
8658     case 2:
8659       for (i = 0; i < nelt; ++i)
8660         d.perm[i] &= nelt - 1;
8661       d.op0 = op1;
8662       d.one_vector_p = true;
8663       break;
8664
8665     case 1:
8666       d.op1 = op0;
8667       d.one_vector_p = true;
8668       break;
8669     }
8670
8671   return aarch64_expand_vec_perm_const_1 (&d);
8672 }
8673
8674 static bool
8675 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8676                                      const unsigned char *sel)
8677 {
8678   struct expand_vec_perm_d d;
8679   unsigned int i, nelt, which;
8680   bool ret;
8681
8682   d.vmode = vmode;
8683   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8684   d.testing_p = true;
8685   memcpy (d.perm, sel, nelt);
8686
8687   /* Calculate whether all elements are in one vector.  */
8688   for (i = which = 0; i < nelt; ++i)
8689     {
8690       unsigned char e = d.perm[i];
8691       gcc_assert (e < 2 * nelt);
8692       which |= (e < nelt ? 1 : 2);
8693     }
8694
8695   /* If all elements are from the second vector, reindex as if from the
8696      first vector.  */
8697   if (which == 2)
8698     for (i = 0; i < nelt; ++i)
8699       d.perm[i] -= nelt;
8700
8701   /* Check whether the mask can be applied to a single vector.  */
8702   d.one_vector_p = (which != 3);
8703
8704   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8705   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8706   if (!d.one_vector_p)
8707     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8708
8709   start_sequence ();
8710   ret = aarch64_expand_vec_perm_const_1 (&d);
8711   end_sequence ();
8712
8713   return ret;
8714 }
8715
8716 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8717 bool
8718 aarch64_cannot_change_mode_class (enum machine_mode from,
8719                                   enum machine_mode to,
8720                                   enum reg_class rclass)
8721 {
8722   /* Full-reg subregs are allowed on general regs or any class if they are
8723      the same size.  */
8724   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8725       || !reg_classes_intersect_p (FP_REGS, rclass))
8726     return false;
8727
8728   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8729      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8730      2. Scalar to Scalar for integer modes or same size float modes.
8731      3. Vector to Vector modes.
8732      4. On little-endian only, Vector-Structure to Vector modes.  */
8733   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8734     {
8735       if (aarch64_vector_mode_supported_p (from)
8736           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8737         return false;
8738
8739       if (GET_MODE_NUNITS (from) == 1
8740           && GET_MODE_NUNITS (to) == 1
8741           && (GET_MODE_CLASS (from) == MODE_INT
8742               || from == to))
8743         return false;
8744
8745       if (aarch64_vector_mode_supported_p (from)
8746           && aarch64_vector_mode_supported_p (to))
8747         return false;
8748
8749       /* Within an vector structure straddling multiple vector registers
8750          we are in a mixed-endian representation.  As such, we can't
8751          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
8752          switch between vectors and vector structures cheaply.  */
8753       if (!BYTES_BIG_ENDIAN)
8754         if ((aarch64_vector_mode_supported_p (from)
8755               && aarch64_vect_struct_mode_p (to))
8756             || (aarch64_vector_mode_supported_p (to)
8757               && aarch64_vect_struct_mode_p (from)))
8758           return false;
8759     }
8760
8761   return true;
8762 }
8763
8764 /* Implement MODES_TIEABLE_P.  */
8765
8766 bool
8767 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
8768 {
8769   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
8770     return true;
8771
8772   /* We specifically want to allow elements of "structure" modes to
8773      be tieable to the structure.  This more general condition allows
8774      other rarer situations too.  */
8775   if (TARGET_SIMD
8776       && aarch64_vector_mode_p (mode1)
8777       && aarch64_vector_mode_p (mode2))
8778     return true;
8779
8780   return false;
8781 }
8782
8783 #undef TARGET_ADDRESS_COST
8784 #define TARGET_ADDRESS_COST aarch64_address_cost
8785
8786 /* This hook will determines whether unnamed bitfields affect the alignment
8787    of the containing structure.  The hook returns true if the structure
8788    should inherit the alignment requirements of an unnamed bitfield's
8789    type.  */
8790 #undef TARGET_ALIGN_ANON_BITFIELD
8791 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8792
8793 #undef TARGET_ASM_ALIGNED_DI_OP
8794 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8795
8796 #undef TARGET_ASM_ALIGNED_HI_OP
8797 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8798
8799 #undef TARGET_ASM_ALIGNED_SI_OP
8800 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8801
8802 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8803 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8804   hook_bool_const_tree_hwi_hwi_const_tree_true
8805
8806 #undef TARGET_ASM_FILE_START
8807 #define TARGET_ASM_FILE_START aarch64_start_file
8808
8809 #undef TARGET_ASM_OUTPUT_MI_THUNK
8810 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8811
8812 #undef TARGET_ASM_SELECT_RTX_SECTION
8813 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8814
8815 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8816 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8817
8818 #undef TARGET_BUILD_BUILTIN_VA_LIST
8819 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8820
8821 #undef TARGET_CALLEE_COPIES
8822 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8823
8824 #undef TARGET_CAN_ELIMINATE
8825 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8826
8827 #undef TARGET_CANNOT_FORCE_CONST_MEM
8828 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8829
8830 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8831 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8832
8833 /* Only the least significant bit is used for initialization guard
8834    variables.  */
8835 #undef TARGET_CXX_GUARD_MASK_BIT
8836 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8837
8838 #undef TARGET_C_MODE_FOR_SUFFIX
8839 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8840
8841 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8842 #undef  TARGET_DEFAULT_TARGET_FLAGS
8843 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8844 #endif
8845
8846 #undef TARGET_CLASS_MAX_NREGS
8847 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8848
8849 #undef TARGET_BUILTIN_DECL
8850 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8851
8852 #undef  TARGET_EXPAND_BUILTIN
8853 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8854
8855 #undef TARGET_EXPAND_BUILTIN_VA_START
8856 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8857
8858 #undef TARGET_FOLD_BUILTIN
8859 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8860
8861 #undef TARGET_FUNCTION_ARG
8862 #define TARGET_FUNCTION_ARG aarch64_function_arg
8863
8864 #undef TARGET_FUNCTION_ARG_ADVANCE
8865 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8866
8867 #undef TARGET_FUNCTION_ARG_BOUNDARY
8868 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8869
8870 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8871 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8872
8873 #undef TARGET_FUNCTION_VALUE
8874 #define TARGET_FUNCTION_VALUE aarch64_function_value
8875
8876 #undef TARGET_FUNCTION_VALUE_REGNO_P
8877 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8878
8879 #undef TARGET_FRAME_POINTER_REQUIRED
8880 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8881
8882 #undef TARGET_GIMPLE_FOLD_BUILTIN
8883 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8884
8885 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8886 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8887
8888 #undef  TARGET_INIT_BUILTINS
8889 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8890
8891 #undef TARGET_LEGITIMATE_ADDRESS_P
8892 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8893
8894 #undef TARGET_LEGITIMATE_CONSTANT_P
8895 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8896
8897 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8898 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8899
8900 #undef TARGET_LRA_P
8901 #define TARGET_LRA_P aarch64_lra_p
8902
8903 #undef TARGET_MANGLE_TYPE
8904 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8905
8906 #undef TARGET_MEMORY_MOVE_COST
8907 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8908
8909 #undef TARGET_MUST_PASS_IN_STACK
8910 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8911
8912 /* This target hook should return true if accesses to volatile bitfields
8913    should use the narrowest mode possible.  It should return false if these
8914    accesses should use the bitfield container type.  */
8915 #undef TARGET_NARROW_VOLATILE_BITFIELD
8916 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8917
8918 #undef  TARGET_OPTION_OVERRIDE
8919 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8920
8921 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8922 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8923   aarch64_override_options_after_change
8924
8925 #undef TARGET_PASS_BY_REFERENCE
8926 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8927
8928 #undef TARGET_PREFERRED_RELOAD_CLASS
8929 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8930
8931 #undef TARGET_SECONDARY_RELOAD
8932 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8933
8934 #undef TARGET_SHIFT_TRUNCATION_MASK
8935 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8936
8937 #undef TARGET_SETUP_INCOMING_VARARGS
8938 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8939
8940 #undef TARGET_STRUCT_VALUE_RTX
8941 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8942
8943 #undef TARGET_REGISTER_MOVE_COST
8944 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8945
8946 #undef TARGET_RETURN_IN_MEMORY
8947 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8948
8949 #undef TARGET_RETURN_IN_MSB
8950 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8951
8952 #undef TARGET_RTX_COSTS
8953 #define TARGET_RTX_COSTS aarch64_rtx_costs
8954
8955 #undef TARGET_SCHED_ISSUE_RATE
8956 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8957
8958 #undef TARGET_TRAMPOLINE_INIT
8959 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8960
8961 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8962 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8963
8964 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8965 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8966
8967 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8968 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8969
8970 #undef TARGET_VECTORIZE_ADD_STMT_COST
8971 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8972
8973 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8974 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8975   aarch64_builtin_vectorization_cost
8976
8977 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8978 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8979
8980 #undef TARGET_VECTORIZE_BUILTINS
8981 #define TARGET_VECTORIZE_BUILTINS
8982
8983 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8984 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8985   aarch64_builtin_vectorized_function
8986
8987 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8988 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8989   aarch64_autovectorize_vector_sizes
8990
8991 /* Section anchor support.  */
8992
8993 #undef TARGET_MIN_ANCHOR_OFFSET
8994 #define TARGET_MIN_ANCHOR_OFFSET -256
8995
8996 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8997    byte offset; we can do much more for larger data types, but have no way
8998    to determine the size of the access.  We assume accesses are aligned.  */
8999 #undef TARGET_MAX_ANCHOR_OFFSET
9000 #define TARGET_MAX_ANCHOR_OFFSET 4095
9001
9002 #undef TARGET_VECTOR_ALIGNMENT
9003 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9004
9005 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9006 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9007   aarch64_simd_vector_alignment_reachable
9008
9009 /* vec_perm support.  */
9010
9011 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9012 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9013   aarch64_vectorize_vec_perm_const_ok
9014
9015
9016 #undef TARGET_FIXED_CONDITION_CODE_REGS
9017 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9018
9019 #undef TARGET_FLAGS_REGNUM
9020 #define TARGET_FLAGS_REGNUM CC_REGNUM
9021
9022 struct gcc_target targetm = TARGET_INITIALIZER;
9023
9024 #include "gt-aarch64.h"