gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 146
 147 /* The processor for which instructions should be scheduled.  */
 148 enum aarch64_processor aarch64_tune = cortexa53;
 149
 150 /* The current tuning set.  */
 151 const struct tune_params *aarch64_tune_params;
 152
 153 /* Mask to specify which instructions we are allowed to generate.  */
 154 unsigned long aarch64_isa_flags = 0;
 155
 156 /* Mask to specify which instruction scheduling options should be used.  */
 157 unsigned long aarch64_tune_flags = 0;
 158
 159 /* Tuning parameters.  */
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS
 162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 163 #else
 164 #define NAMED_PARAM(NAME, VAL) (VAL)
 165 #endif
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_addrcost_table generic_addrcost_table =
 175 {
 176 #if HAVE_DESIGNATED_INITIALIZERS
 177   .addr_scale_costs =
 178 #endif
 179     {
 180       NAMED_PARAM (qi, 0),
 181       NAMED_PARAM (hi, 0),
 182       NAMED_PARAM (si, 0),
 183       NAMED_PARAM (ti, 0),
 184     },
 185   NAMED_PARAM (pre_modify, 0),
 186   NAMED_PARAM (post_modify, 0),
 187   NAMED_PARAM (register_offset, 0),
 188   NAMED_PARAM (register_extend, 0),
 189   NAMED_PARAM (imm_offset, 0)
 190 };
 191
 192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 193 __extension__
 194 #endif
 195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 196 {
 197 #if HAVE_DESIGNATED_INITIALIZERS
 198   .addr_scale_costs =
 199 #endif
 200     {
 201       NAMED_PARAM (qi, 0),
 202       NAMED_PARAM (hi, 1),
 203       NAMED_PARAM (si, 0),
 204       NAMED_PARAM (ti, 1),
 205     },
 206   NAMED_PARAM (pre_modify, 0),
 207   NAMED_PARAM (post_modify, 0),
 208   NAMED_PARAM (register_offset, 0),
 209   NAMED_PARAM (register_extend, 0),
 210   NAMED_PARAM (imm_offset, 0),
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct cpu_regmove_cost generic_regmove_cost =
 217 {
 218   NAMED_PARAM (GP2GP, 1),
 219   NAMED_PARAM (GP2FP, 2),
 220   NAMED_PARAM (FP2GP, 2),
 221   /* We currently do not provide direct support for TFmode Q->Q move.
 222      Therefore we need to raise the cost above 2 in order to have
 223      reload handle the situation.  */
 224   NAMED_PARAM (FP2FP, 4)
 225 };
 226
 227 /* Generic costs for vector insn classes.  */
 228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 229 __extension__
 230 #endif
 231 static const struct cpu_vector_cost generic_vector_cost =
 232 {
 233   NAMED_PARAM (scalar_stmt_cost, 1),
 234   NAMED_PARAM (scalar_load_cost, 1),
 235   NAMED_PARAM (scalar_store_cost, 1),
 236   NAMED_PARAM (vec_stmt_cost, 1),
 237   NAMED_PARAM (vec_to_scalar_cost, 1),
 238   NAMED_PARAM (scalar_to_vec_cost, 1),
 239   NAMED_PARAM (vec_align_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_store_cost, 1),
 242   NAMED_PARAM (vec_store_cost, 1),
 243   NAMED_PARAM (cond_taken_branch_cost, 3),
 244   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 245 };
 246
 247 /* Generic costs for vector insn classes.  */
 248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 249 __extension__
 250 #endif
 251 static const struct cpu_vector_cost cortexa57_vector_cost =
 252 {
 253   NAMED_PARAM (scalar_stmt_cost, 1),
 254   NAMED_PARAM (scalar_load_cost, 4),
 255   NAMED_PARAM (scalar_store_cost, 1),
 256   NAMED_PARAM (vec_stmt_cost, 3),
 257   NAMED_PARAM (vec_to_scalar_cost, 8),
 258   NAMED_PARAM (scalar_to_vec_cost, 8),
 259   NAMED_PARAM (vec_align_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_store_cost, 1),
 262   NAMED_PARAM (vec_store_cost, 1),
 263   NAMED_PARAM (cond_taken_branch_cost, 1),
 264   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 265 };
 266
 267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 268 __extension__
 269 #endif
 270 static const struct tune_params generic_tunings =
 271 {
 272   &cortexa57_extra_costs,
 273   &generic_addrcost_table,
 274   &generic_regmove_cost,
 275   &generic_vector_cost,
 276   NAMED_PARAM (memmov_cost, 4),
 277   NAMED_PARAM (issue_rate, 2)
 278 };
 279
 280 static const struct tune_params cortexa53_tunings =
 281 {
 282   &cortexa53_extra_costs,
 283   &generic_addrcost_table,
 284   &generic_regmove_cost,
 285   &generic_vector_cost,
 286   NAMED_PARAM (memmov_cost, 4),
 287   NAMED_PARAM (issue_rate, 2)
 288 };
 289
 290 static const struct tune_params cortexa57_tunings =
 291 {
 292   &cortexa57_extra_costs,
 293   &cortexa57_addrcost_table,
 294   &generic_regmove_cost,
 295   &cortexa57_vector_cost,
 296   NAMED_PARAM (memmov_cost, 4),
 297   NAMED_PARAM (issue_rate, 3)
 298 };
 299
 300 /* A processor implementing AArch64.  */
 301 struct processor
 302 {
 303   const char *const name;
 304   enum aarch64_processor core;
 305   const char *arch;
 306   const unsigned long flags;
 307   const struct tune_params *const tune;
 308 };
 309
 310 /* Processor cores implementing AArch64.  */
 311 static const struct processor all_cores[] =
 312 {
 313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 314   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 315 #include "aarch64-cores.def"
 316 #undef AARCH64_CORE
 317   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 318   {NULL, aarch64_none, NULL, 0, NULL}
 319 };
 320
 321 /* Architectures implementing AArch64.  */
 322 static const struct processor all_architectures[] =
 323 {
 324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 325   {NAME, CORE, #ARCH, FLAGS, NULL},
 326 #include "aarch64-arches.def"
 327 #undef AARCH64_ARCH
 328   {NULL, aarch64_none, NULL, 0, NULL}
 329 };
 330
 331 /* Target specification.  These are populated as commandline arguments
 332    are processed, or NULL if not specified.  */
 333 static const struct processor *selected_arch;
 334 static const struct processor *selected_cpu;
 335 static const struct processor *selected_tune;
 336
 337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 338
 339 /* An ISA extension in the co-processor and main instruction set space.  */
 340 struct aarch64_option_extension
 341 {
 342   const char *const name;
 343   const unsigned long flags_on;
 344   const unsigned long flags_off;
 345 };
 346
 347 /* ISA extensions in AArch64.  */
 348 static const struct aarch64_option_extension all_extensions[] =
 349 {
 350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 351   {NAME, FLAGS_ON, FLAGS_OFF},
 352 #include "aarch64-option-extensions.def"
 353 #undef AARCH64_OPT_EXTENSION
 354   {NULL, 0, 0}
 355 };
 356
 357 /* Used to track the size of an address when generating a pre/post
 358    increment address.  */
 359 static enum machine_mode aarch64_memory_reference_mode;
 360
 361 /* Used to force GTY into this file.  */
 362 static GTY(()) int gty_dummy;
 363
 364 /* A table of valid AArch64 "bitmask immediate" values for
 365    logical instructions.  */
 366
 367 #define AARCH64_NUM_BITMASKS  5334
 368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 369
 370 typedef enum aarch64_cond_code
 371 {
 372   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 373   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 374   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 375 }
 376 aarch64_cc;
 377
 378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 379
 380 /* The condition codes of the processor, and the inverse function.  */
 381 static const char * const aarch64_condition_codes[] =
 382 {
 383   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 384   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 385 };
 386
 387 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 388 unsigned
 389 aarch64_dbx_register_number (unsigned regno)
 390 {
 391    if (GP_REGNUM_P (regno))
 392      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 393    else if (regno == SP_REGNUM)
 394      return AARCH64_DWARF_SP;
 395    else if (FP_REGNUM_P (regno))
 396      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 397
 398    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 399       equivalent DWARF register.  */
 400    return DWARF_FRAME_REGISTERS;
 401 }
 402
 403 /* Return TRUE if MODE is any of the large INT modes.  */
 404 static bool
 405 aarch64_vect_struct_mode_p (enum machine_mode mode)
 406 {
 407   return mode == OImode || mode == CImode || mode == XImode;
 408 }
 409
 410 /* Return TRUE if MODE is any of the vector modes.  */
 411 static bool
 412 aarch64_vector_mode_p (enum machine_mode mode)
 413 {
 414   return aarch64_vector_mode_supported_p (mode)
 415          || aarch64_vect_struct_mode_p (mode);
 416 }
 417
 418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 419 static bool
 420 aarch64_array_mode_supported_p (enum machine_mode mode,
 421                                 unsigned HOST_WIDE_INT nelems)
 422 {
 423   if (TARGET_SIMD
 424       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 425       && (nelems >= 2 && nelems <= 4))
 426     return true;
 427
 428   return false;
 429 }
 430
 431 /* Implement HARD_REGNO_NREGS.  */
 432
 433 int
 434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 435 {
 436   switch (aarch64_regno_regclass (regno))
 437     {
 438     case FP_REGS:
 439     case FP_LO_REGS:
 440       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 441     default:
 442       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 443     }
 444   gcc_unreachable ();
 445 }
 446
 447 /* Implement HARD_REGNO_MODE_OK.  */
 448
 449 int
 450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 451 {
 452   if (GET_MODE_CLASS (mode) == MODE_CC)
 453     return regno == CC_REGNUM;
 454
 455   if (regno == SP_REGNUM)
 456     /* The purpose of comparing with ptr_mode is to support the
 457        global register variable associated with the stack pointer
 458        register via the syntax of asm ("wsp") in ILP32.  */
 459     return mode == Pmode || mode == ptr_mode;
 460
 461   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 462     return mode == Pmode;
 463
 464   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 465     return 1;
 466
 467   if (FP_REGNUM_P (regno))
 468     {
 469       if (aarch64_vect_struct_mode_p (mode))
 470         return
 471           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 472       else
 473         return 1;
 474     }
 475
 476   return 0;
 477 }
 478
 479 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 480 enum machine_mode
 481 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 482                                      enum machine_mode mode)
 483 {
 484   /* Handle modes that fit within single registers.  */
 485   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 486     {
 487       if (GET_MODE_SIZE (mode) >= 4)
 488         return mode;
 489       else
 490         return SImode;
 491     }
 492   /* Fall back to generic for multi-reg and very large modes.  */
 493   else
 494     return choose_hard_reg_mode (regno, nregs, false);
 495 }
 496
 497 /* Return true if calls to DECL should be treated as
 498    long-calls (ie called via a register).  */
 499 static bool
 500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 501 {
 502   return false;
 503 }
 504
 505 /* Return true if calls to symbol-ref SYM should be treated as
 506    long-calls (ie called via a register).  */
 507 bool
 508 aarch64_is_long_call_p (rtx sym)
 509 {
 510   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 511 }
 512
 513 /* Return true if the offsets to a zero/sign-extract operation
 514    represent an expression that matches an extend operation.  The
 515    operands represent the paramters from
 516
 517    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 518 bool
 519 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 520                                 rtx extract_imm)
 521 {
 522   HOST_WIDE_INT mult_val, extract_val;
 523
 524   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 525     return false;
 526
 527   mult_val = INTVAL (mult_imm);
 528   extract_val = INTVAL (extract_imm);
 529
 530   if (extract_val > 8
 531       && extract_val < GET_MODE_BITSIZE (mode)
 532       && exact_log2 (extract_val & ~7) > 0
 533       && (extract_val & 7) <= 4
 534       && mult_val == (1 << (extract_val & 7)))
 535     return true;
 536
 537   return false;
 538 }
 539
 540 /* Emit an insn that's a simple single-set.  Both the operands must be
 541    known to be valid.  */
 542 inline static rtx
 543 emit_set_insn (rtx x, rtx y)
 544 {
 545   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 546 }
 547
 548 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 549    return the rtx for register 0 in the proper mode.  */
 550 rtx
 551 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 552 {
 553   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 554   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 555
 556   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 557   return cc_reg;
 558 }
 559
 560 /* Build the SYMBOL_REF for __tls_get_addr.  */
 561
 562 static GTY(()) rtx tls_get_addr_libfunc;
 563
 564 rtx
 565 aarch64_tls_get_addr (void)
 566 {
 567   if (!tls_get_addr_libfunc)
 568     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 569   return tls_get_addr_libfunc;
 570 }
 571
 572 /* Return the TLS model to use for ADDR.  */
 573
 574 static enum tls_model
 575 tls_symbolic_operand_type (rtx addr)
 576 {
 577   enum tls_model tls_kind = TLS_MODEL_NONE;
 578   rtx sym, addend;
 579
 580   if (GET_CODE (addr) == CONST)
 581     {
 582       split_const (addr, &sym, &addend);
 583       if (GET_CODE (sym) == SYMBOL_REF)
 584         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 585     }
 586   else if (GET_CODE (addr) == SYMBOL_REF)
 587     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 588
 589   return tls_kind;
 590 }
 591
 592 /* We'll allow lo_sum's in addresses in our legitimate addresses
 593    so that combine would take care of combining addresses where
 594    necessary, but for generation purposes, we'll generate the address
 595    as :
 596    RTL                               Absolute
 597    tmp = hi (symbol_ref);            adrp  x1, foo
 598    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 599                                      nop
 600
 601    PIC                               TLS
 602    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 603    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 604                                      bl   __tls_get_addr
 605                                      nop
 606
 607    Load TLS symbol, depending on TLS mechanism and TLS access model.
 608
 609    Global Dynamic - Traditional TLS:
 610    adrp tmp, :tlsgd:imm
 611    add  dest, tmp, #:tlsgd_lo12:imm
 612    bl   __tls_get_addr
 613
 614    Global Dynamic - TLS Descriptors:
 615    adrp dest, :tlsdesc:imm
 616    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 617    add  dest, dest, #:tlsdesc_lo12:imm
 618    blr  tmp
 619    mrs  tp, tpidr_el0
 620    add  dest, dest, tp
 621
 622    Initial Exec:
 623    mrs  tp, tpidr_el0
 624    adrp tmp, :gottprel:imm
 625    ldr  dest, [tmp, #:gottprel_lo12:imm]
 626    add  dest, dest, tp
 627
 628    Local Exec:
 629    mrs  tp, tpidr_el0
 630    add  t0, tp, #:tprel_hi12:imm
 631    add  t0, #:tprel_lo12_nc:imm
 632 */
 633
 634 static void
 635 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 636                                    enum aarch64_symbol_type type)
 637 {
 638   switch (type)
 639     {
 640     case SYMBOL_SMALL_ABSOLUTE:
 641       {
 642         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 643         rtx tmp_reg = dest;
 644         enum machine_mode mode = GET_MODE (dest);
 645
 646         gcc_assert (mode == Pmode || mode == ptr_mode);
 647
 648         if (can_create_pseudo_p ())
 649           tmp_reg = gen_reg_rtx (mode);
 650
 651         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 652         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 653         return;
 654       }
 655
 656     case SYMBOL_TINY_ABSOLUTE:
 657       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 658       return;
 659
 660     case SYMBOL_SMALL_GOT:
 661       {
 662         /* In ILP32, the mode of dest can be either SImode or DImode,
 663            while the got entry is always of SImode size.  The mode of
 664            dest depends on how dest is used: if dest is assigned to a
 665            pointer (e.g. in the memory), it has SImode; it may have
 666            DImode if dest is dereferenced to access the memeory.
 667            This is why we have to handle three different ldr_got_small
 668            patterns here (two patterns for ILP32).  */
 669         rtx tmp_reg = dest;
 670         enum machine_mode mode = GET_MODE (dest);
 671
 672         if (can_create_pseudo_p ())
 673           tmp_reg = gen_reg_rtx (mode);
 674
 675         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 676         if (mode == ptr_mode)
 677           {
 678             if (mode == DImode)
 679               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 680             else
 681               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 682           }
 683         else
 684           {
 685             gcc_assert (mode == Pmode);
 686             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 687           }
 688
 689         return;
 690       }
 691
 692     case SYMBOL_SMALL_TLSGD:
 693       {
 694         rtx insns;
 695         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 696
 697         start_sequence ();
 698         emit_call_insn (gen_tlsgd_small (result, imm));
 699         insns = get_insns ();
 700         end_sequence ();
 701
 702         RTL_CONST_CALL_P (insns) = 1;
 703         emit_libcall_block (insns, dest, result, imm);
 704         return;
 705       }
 706
 707     case SYMBOL_SMALL_TLSDESC:
 708       {
 709         enum machine_mode mode = GET_MODE (dest);
 710         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 711         rtx tp;
 712
 713         gcc_assert (mode == Pmode || mode == ptr_mode);
 714
 715         /* In ILP32, the got entry is always of SImode size.  Unlike
 716            small GOT, the dest is fixed at reg 0.  */
 717         if (TARGET_ILP32)
 718           emit_insn (gen_tlsdesc_small_si (imm));
 719         else
 720           emit_insn (gen_tlsdesc_small_di (imm));
 721         tp = aarch64_load_tp (NULL);
 722
 723         if (mode != Pmode)
 724           tp = gen_lowpart (mode, tp);
 725
 726         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 727         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 728         return;
 729       }
 730
 731     case SYMBOL_SMALL_GOTTPREL:
 732       {
 733         /* In ILP32, the mode of dest can be either SImode or DImode,
 734            while the got entry is always of SImode size.  The mode of
 735            dest depends on how dest is used: if dest is assigned to a
 736            pointer (e.g. in the memory), it has SImode; it may have
 737            DImode if dest is dereferenced to access the memeory.
 738            This is why we have to handle three different tlsie_small
 739            patterns here (two patterns for ILP32).  */
 740         enum machine_mode mode = GET_MODE (dest);
 741         rtx tmp_reg = gen_reg_rtx (mode);
 742         rtx tp = aarch64_load_tp (NULL);
 743
 744         if (mode == ptr_mode)
 745           {
 746             if (mode == DImode)
 747               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 748             else
 749               {
 750                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 751                 tp = gen_lowpart (mode, tp);
 752               }
 753           }
 754         else
 755           {
 756             gcc_assert (mode == Pmode);
 757             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 758           }
 759
 760         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 761         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 762         return;
 763       }
 764
 765     case SYMBOL_SMALL_TPREL:
 766       {
 767         rtx tp = aarch64_load_tp (NULL);
 768         emit_insn (gen_tlsle_small (dest, tp, imm));
 769         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 770         return;
 771       }
 772
 773     case SYMBOL_TINY_GOT:
 774       emit_insn (gen_ldr_got_tiny (dest, imm));
 775       return;
 776
 777     default:
 778       gcc_unreachable ();
 779     }
 780 }
 781
 782 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 783    handle all moves if !can_create_pseudo_p ().  The distinction is
 784    important because, unlike emit_move_insn, the move expanders know
 785    how to force Pmode objects into the constant pool even when the
 786    constant pool address is not itself legitimate.  */
 787 static rtx
 788 aarch64_emit_move (rtx dest, rtx src)
 789 {
 790   return (can_create_pseudo_p ()
 791           ? emit_move_insn (dest, src)
 792           : emit_move_insn_1 (dest, src));
 793 }
 794
 795 /* Split a 128-bit move operation into two 64-bit move operations,
 796    taking care to handle partial overlap of register to register
 797    copies.  Special cases are needed when moving between GP regs and
 798    FP regs.  SRC can be a register, constant or memory; DST a register
 799    or memory.  If either operand is memory it must not have any side
 800    effects.  */
 801 void
 802 aarch64_split_128bit_move (rtx dst, rtx src)
 803 {
 804   rtx dst_lo, dst_hi;
 805   rtx src_lo, src_hi;
 806
 807   enum machine_mode mode = GET_MODE (dst);
 808
 809   gcc_assert (mode == TImode || mode == TFmode);
 810   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 811   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 812
 813   if (REG_P (dst) && REG_P (src))
 814     {
 815       int src_regno = REGNO (src);
 816       int dst_regno = REGNO (dst);
 817
 818       /* Handle FP <-> GP regs.  */
 819       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 820         {
 821           src_lo = gen_lowpart (word_mode, src);
 822           src_hi = gen_highpart (word_mode, src);
 823
 824           if (mode == TImode)
 825             {
 826               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 827               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 828             }
 829           else
 830             {
 831               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 832               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 833             }
 834           return;
 835         }
 836       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 837         {
 838           dst_lo = gen_lowpart (word_mode, dst);
 839           dst_hi = gen_highpart (word_mode, dst);
 840
 841           if (mode == TImode)
 842             {
 843               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 844               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 845             }
 846           else
 847             {
 848               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 849               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 850             }
 851           return;
 852         }
 853     }
 854
 855   dst_lo = gen_lowpart (word_mode, dst);
 856   dst_hi = gen_highpart (word_mode, dst);
 857   src_lo = gen_lowpart (word_mode, src);
 858   src_hi = gen_highpart_mode (word_mode, mode, src);
 859
 860   /* At most one pairing may overlap.  */
 861   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 862     {
 863       aarch64_emit_move (dst_hi, src_hi);
 864       aarch64_emit_move (dst_lo, src_lo);
 865     }
 866   else
 867     {
 868       aarch64_emit_move (dst_lo, src_lo);
 869       aarch64_emit_move (dst_hi, src_hi);
 870     }
 871 }
 872
 873 bool
 874 aarch64_split_128bit_move_p (rtx dst, rtx src)
 875 {
 876   return (! REG_P (src)
 877           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 878 }
 879
 880 /* Split a complex SIMD combine.  */
 881
 882 void
 883 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 884 {
 885   enum machine_mode src_mode = GET_MODE (src1);
 886   enum machine_mode dst_mode = GET_MODE (dst);
 887
 888   gcc_assert (VECTOR_MODE_P (dst_mode));
 889
 890   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 891     {
 892       rtx (*gen) (rtx, rtx, rtx);
 893
 894       switch (src_mode)
 895         {
 896         case V8QImode:
 897           gen = gen_aarch64_simd_combinev8qi;
 898           break;
 899         case V4HImode:
 900           gen = gen_aarch64_simd_combinev4hi;
 901           break;
 902         case V2SImode:
 903           gen = gen_aarch64_simd_combinev2si;
 904           break;
 905         case V2SFmode:
 906           gen = gen_aarch64_simd_combinev2sf;
 907           break;
 908         case DImode:
 909           gen = gen_aarch64_simd_combinedi;
 910           break;
 911         case DFmode:
 912           gen = gen_aarch64_simd_combinedf;
 913           break;
 914         default:
 915           gcc_unreachable ();
 916         }
 917
 918       emit_insn (gen (dst, src1, src2));
 919       return;
 920     }
 921 }
 922
 923 /* Split a complex SIMD move.  */
 924
 925 void
 926 aarch64_split_simd_move (rtx dst, rtx src)
 927 {
 928   enum machine_mode src_mode = GET_MODE (src);
 929   enum machine_mode dst_mode = GET_MODE (dst);
 930
 931   gcc_assert (VECTOR_MODE_P (dst_mode));
 932
 933   if (REG_P (dst) && REG_P (src))
 934     {
 935       rtx (*gen) (rtx, rtx);
 936
 937       gcc_assert (VECTOR_MODE_P (src_mode));
 938
 939       switch (src_mode)
 940         {
 941         case V16QImode:
 942           gen = gen_aarch64_split_simd_movv16qi;
 943           break;
 944         case V8HImode:
 945           gen = gen_aarch64_split_simd_movv8hi;
 946           break;
 947         case V4SImode:
 948           gen = gen_aarch64_split_simd_movv4si;
 949           break;
 950         case V2DImode:
 951           gen = gen_aarch64_split_simd_movv2di;
 952           break;
 953         case V4SFmode:
 954           gen = gen_aarch64_split_simd_movv4sf;
 955           break;
 956         case V2DFmode:
 957           gen = gen_aarch64_split_simd_movv2df;
 958           break;
 959         default:
 960           gcc_unreachable ();
 961         }
 962
 963       emit_insn (gen (dst, src));
 964       return;
 965     }
 966 }
 967
 968 static rtx
 969 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 970 {
 971   if (can_create_pseudo_p ())
 972     return force_reg (mode, value);
 973   else
 974     {
 975       x = aarch64_emit_move (x, value);
 976       return x;
 977     }
 978 }
 979
 980
 981 static rtx
 982 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 983 {
 984   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 985     {
 986       rtx high;
 987       /* Load the full offset into a register.  This
 988          might be improvable in the future.  */
 989       high = GEN_INT (offset);
 990       offset = 0;
 991       high = aarch64_force_temporary (mode, temp, high);
 992       reg = aarch64_force_temporary (mode, temp,
 993                                      gen_rtx_PLUS (mode, high, reg));
 994     }
 995   return plus_constant (mode, reg, offset);
 996 }
 997
 998 void
 999 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1000 {
1001   enum machine_mode mode = GET_MODE (dest);
1002   unsigned HOST_WIDE_INT mask;
1003   int i;
1004   bool first;
1005   unsigned HOST_WIDE_INT val;
1006   bool subtargets;
1007   rtx subtarget;
1008   int one_match, zero_match;
1009
1010   gcc_assert (mode == SImode || mode == DImode);
1011
1012   /* Check on what type of symbol it is.  */
1013   if (GET_CODE (imm) == SYMBOL_REF
1014       || GET_CODE (imm) == LABEL_REF
1015       || GET_CODE (imm) == CONST)
1016     {
1017       rtx mem, base, offset;
1018       enum aarch64_symbol_type sty;
1019
1020       /* If we have (const (plus symbol offset)), separate out the offset
1021          before we start classifying the symbol.  */
1022       split_const (imm, &base, &offset);
1023
1024       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025       switch (sty)
1026         {
1027         case SYMBOL_FORCE_TO_MEM:
1028           if (offset != const0_rtx
1029               && targetm.cannot_force_const_mem (mode, imm))
1030             {
1031               gcc_assert (can_create_pseudo_p ());
1032               base = aarch64_force_temporary (mode, dest, base);
1033               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034               aarch64_emit_move (dest, base);
1035               return;
1036             }
1037           mem = force_const_mem (ptr_mode, imm);
1038           gcc_assert (mem);
1039           if (mode != ptr_mode)
1040             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1041           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042           return;
1043
1044         case SYMBOL_SMALL_TLSGD:
1045         case SYMBOL_SMALL_TLSDESC:
1046         case SYMBOL_SMALL_GOTTPREL:
1047         case SYMBOL_SMALL_GOT:
1048         case SYMBOL_TINY_GOT:
1049           if (offset != const0_rtx)
1050             {
1051               gcc_assert(can_create_pseudo_p ());
1052               base = aarch64_force_temporary (mode, dest, base);
1053               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054               aarch64_emit_move (dest, base);
1055               return;
1056             }
1057           /* FALLTHRU */
1058
1059         case SYMBOL_SMALL_TPREL:
1060         case SYMBOL_SMALL_ABSOLUTE:
1061         case SYMBOL_TINY_ABSOLUTE:
1062           aarch64_load_symref_appropriately (dest, imm, sty);
1063           return;
1064
1065         default:
1066           gcc_unreachable ();
1067         }
1068     }
1069
1070   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1071     {
1072       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073       return;
1074     }
1075
1076   if (!CONST_INT_P (imm))
1077     {
1078       if (GET_CODE (imm) == HIGH)
1079         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080       else
1081         {
1082           rtx mem = force_const_mem (mode, imm);
1083           gcc_assert (mem);
1084           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1085         }
1086
1087       return;
1088     }
1089
1090   if (mode == SImode)
1091     {
1092       /* We know we can't do this in 1 insn, and we must be able to do it
1093          in two; so don't mess around looking for sequences that don't buy
1094          us anything.  */
1095       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098       return;
1099     }
1100
1101   /* Remaining cases are all for DImode.  */
1102
1103   val = INTVAL (imm);
1104   subtargets = optimize && can_create_pseudo_p ();
1105
1106   one_match = 0;
1107   zero_match = 0;
1108   mask = 0xffff;
1109
1110   for (i = 0; i < 64; i += 16, mask <<= 16)
1111     {
1112       if ((val & mask) == 0)
1113         zero_match++;
1114       else if ((val & mask) == mask)
1115         one_match++;
1116     }
1117
1118   if (one_match == 2)
1119     {
1120       mask = 0xffff;
1121       for (i = 0; i < 64; i += 16, mask <<= 16)
1122         {
1123           if ((val & mask) != mask)
1124             {
1125               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127                                          GEN_INT ((val >> i) & 0xffff)));
1128               return;
1129             }
1130         }
1131       gcc_unreachable ();
1132     }
1133
1134   if (zero_match == 2)
1135     goto simple_sequence;
1136
1137   mask = 0x0ffff0000UL;
1138   for (i = 16; i < 64; i += 16, mask <<= 16)
1139     {
1140       HOST_WIDE_INT comp = mask & ~(mask - 1);
1141
1142       if (aarch64_uimm12_shift (val - (val & mask)))
1143         {
1144           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1145
1146           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147           emit_insn (gen_adddi3 (dest, subtarget,
1148                                  GEN_INT (val - (val & mask))));
1149           return;
1150         }
1151       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1152         {
1153           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1154
1155           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156                                   GEN_INT ((val + comp) & mask)));
1157           emit_insn (gen_adddi3 (dest, subtarget,
1158                                  GEN_INT (val - ((val + comp) & mask))));
1159           return;
1160         }
1161       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1162         {
1163           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1164
1165           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166                                   GEN_INT ((val - comp) | ~mask)));
1167           emit_insn (gen_adddi3 (dest, subtarget,
1168                                  GEN_INT (val - ((val - comp) | ~mask))));
1169           return;
1170         }
1171       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1172         {
1173           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1174
1175           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176                                   GEN_INT (val | ~mask)));
1177           emit_insn (gen_adddi3 (dest, subtarget,
1178                                  GEN_INT (val - (val | ~mask))));
1179           return;
1180         }
1181     }
1182
1183   /* See if we can do it by arithmetically combining two
1184      immediates.  */
1185   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1186     {
1187       int j;
1188       mask = 0xffff;
1189
1190       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1192         {
1193           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195                                   GEN_INT (aarch64_bitmasks[i])));
1196           emit_insn (gen_adddi3 (dest, subtarget,
1197                                  GEN_INT (val - aarch64_bitmasks[i])));
1198           return;
1199         }
1200
1201       for (j = 0; j < 64; j += 16, mask <<= 16)
1202         {
1203           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1204             {
1205               emit_insn (gen_rtx_SET (VOIDmode, dest,
1206                                       GEN_INT (aarch64_bitmasks[i])));
1207               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208                                          GEN_INT ((val >> j) & 0xffff)));
1209               return;
1210             }
1211         }
1212     }
1213
1214   /* See if we can do it by logically combining two immediates.  */
1215   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1216     {
1217       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1218         {
1219           int j;
1220
1221           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1223               {
1224                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226                                         GEN_INT (aarch64_bitmasks[i])));
1227                 emit_insn (gen_iordi3 (dest, subtarget,
1228                                        GEN_INT (aarch64_bitmasks[j])));
1229                 return;
1230               }
1231         }
1232       else if ((val & aarch64_bitmasks[i]) == val)
1233         {
1234           int j;
1235
1236           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1238               {
1239
1240                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242                                         GEN_INT (aarch64_bitmasks[j])));
1243                 emit_insn (gen_anddi3 (dest, subtarget,
1244                                        GEN_INT (aarch64_bitmasks[i])));
1245                 return;
1246               }
1247         }
1248     }
1249
1250  simple_sequence:
1251   first = true;
1252   mask = 0xffff;
1253   for (i = 0; i < 64; i += 16, mask <<= 16)
1254     {
1255       if ((val & mask) != 0)
1256         {
1257           if (first)
1258             {
1259               emit_insn (gen_rtx_SET (VOIDmode, dest,
1260                                       GEN_INT (val & mask)));
1261               first = false;
1262             }
1263           else
1264             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265                                        GEN_INT ((val >> i) & 0xffff)));
1266         }
1267     }
1268 }
1269
1270 static bool
1271 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1272 {
1273   /* Indirect calls are not currently supported.  */
1274   if (decl == NULL)
1275     return false;
1276
1277   /* Cannot tail-call to long-calls, since these are outside of the
1278      range of a branch instruction (we could handle this if we added
1279      support for indirect tail-calls.  */
1280   if (aarch64_decl_is_long_call_p (decl))
1281     return false;
1282
1283   return true;
1284 }
1285
1286 /* Implement TARGET_PASS_BY_REFERENCE.  */
1287
1288 static bool
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1290                            enum machine_mode mode,
1291                            const_tree type,
1292                            bool named ATTRIBUTE_UNUSED)
1293 {
1294   HOST_WIDE_INT size;
1295   enum machine_mode dummymode;
1296   int nregs;
1297
1298   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1299   size = (mode == BLKmode && type)
1300     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1301
1302   /* Aggregates are passed by reference based on their size.  */
1303   if (type && AGGREGATE_TYPE_P (type))
1304     {
1305       size = int_size_in_bytes (type);
1306     }
1307
1308   /* Variable sized arguments are always returned by reference.  */
1309   if (size < 0)
1310     return true;
1311
1312   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1313   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1314                                                &dummymode, &nregs,
1315                                                NULL))
1316     return false;
1317
1318   /* Arguments which are variable sized or larger than 2 registers are
1319      passed by reference unless they are a homogenous floating point
1320      aggregate.  */
1321   return size > 2 * UNITS_PER_WORD;
1322 }
1323
1324 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1325 static bool
1326 aarch64_return_in_msb (const_tree valtype)
1327 {
1328   enum machine_mode dummy_mode;
1329   int dummy_int;
1330
1331   /* Never happens in little-endian mode.  */
1332   if (!BYTES_BIG_ENDIAN)
1333     return false;
1334
1335   /* Only composite types smaller than or equal to 16 bytes can
1336      be potentially returned in registers.  */
1337   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1338       || int_size_in_bytes (valtype) <= 0
1339       || int_size_in_bytes (valtype) > 16)
1340     return false;
1341
1342   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344      is always passed/returned in the least significant bits of fp/simd
1345      register(s).  */
1346   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1347                                                &dummy_mode, &dummy_int, NULL))
1348     return false;
1349
1350   return true;
1351 }
1352
1353 /* Implement TARGET_FUNCTION_VALUE.
1354    Define how to find the value returned by a function.  */
1355
1356 static rtx
1357 aarch64_function_value (const_tree type, const_tree func,
1358                         bool outgoing ATTRIBUTE_UNUSED)
1359 {
1360   enum machine_mode mode;
1361   int unsignedp;
1362   int count;
1363   enum machine_mode ag_mode;
1364
1365   mode = TYPE_MODE (type);
1366   if (INTEGRAL_TYPE_P (type))
1367     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1368
1369   if (aarch64_return_in_msb (type))
1370     {
1371       HOST_WIDE_INT size = int_size_in_bytes (type);
1372
1373       if (size % UNITS_PER_WORD != 0)
1374         {
1375           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1376           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1377         }
1378     }
1379
1380   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1381                                                &ag_mode, &count, NULL))
1382     {
1383       if (!aarch64_composite_type_p (type, mode))
1384         {
1385           gcc_assert (count == 1 && mode == ag_mode);
1386           return gen_rtx_REG (mode, V0_REGNUM);
1387         }
1388       else
1389         {
1390           int i;
1391           rtx par;
1392
1393           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1394           for (i = 0; i < count; i++)
1395             {
1396               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1397               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1398                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1399               XVECEXP (par, 0, i) = tmp;
1400             }
1401           return par;
1402         }
1403     }
1404   else
1405     return gen_rtx_REG (mode, R0_REGNUM);
1406 }
1407
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409    Return true if REGNO is the number of a hard register in which the values
1410    of called function may come back.  */
1411
1412 static bool
1413 aarch64_function_value_regno_p (const unsigned int regno)
1414 {
1415   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1416      of 16-byte return values are: 128-bit integers and 16-byte small
1417      structures (excluding homogeneous floating-point aggregates).  */
1418   if (regno == R0_REGNUM || regno == R1_REGNUM)
1419     return true;
1420
1421   /* Up to four fp/simd registers can return a function value, e.g. a
1422      homogeneous floating-point aggregate having four members.  */
1423   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1424     return !TARGET_GENERAL_REGS_ONLY;
1425
1426   return false;
1427 }
1428
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1430
1431    If the type T of the result of a function is such that
1432      void func (T arg)
1433    would require that arg be passed as a value in a register (or set of
1434    registers) according to the parameter passing rules, then the result
1435    is returned in the same registers as would be used for such an
1436    argument.  */
1437
1438 static bool
1439 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1440 {
1441   HOST_WIDE_INT size;
1442   enum machine_mode ag_mode;
1443   int count;
1444
1445   if (!AGGREGATE_TYPE_P (type)
1446       && TREE_CODE (type) != COMPLEX_TYPE
1447       && TREE_CODE (type) != VECTOR_TYPE)
1448     /* Simple scalar types always returned in registers.  */
1449     return false;
1450
1451   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1452                                                type,
1453                                                &ag_mode,
1454                                                &count,
1455                                                NULL))
1456     return false;
1457
1458   /* Types larger than 2 registers returned in memory.  */
1459   size = int_size_in_bytes (type);
1460   return (size < 0 || size > 2 * UNITS_PER_WORD);
1461 }
1462
1463 static bool
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1465                                const_tree type, int *nregs)
1466 {
1467   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1468   return aarch64_vfp_is_call_or_return_candidate (mode,
1469                                                   type,
1470                                                   &pcum->aapcs_vfp_rmode,
1471                                                   nregs,
1472                                                   NULL);
1473 }
1474
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476    bits.  The idea is to suppress any stronger alignment requested by
1477    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478    This is a helper function for local use only.  */
1479
1480 static unsigned int
1481 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1482 {
1483   unsigned int alignment;
1484
1485   if (type)
1486     {
1487       if (!integer_zerop (TYPE_SIZE (type)))
1488         {
1489           if (TYPE_MODE (type) == mode)
1490             alignment = TYPE_ALIGN (type);
1491           else
1492             alignment = GET_MODE_ALIGNMENT (mode);
1493         }
1494       else
1495         alignment = 0;
1496     }
1497   else
1498     alignment = GET_MODE_ALIGNMENT (mode);
1499
1500   return alignment;
1501 }
1502
1503 /* Layout a function argument according to the AAPCS64 rules.  The rule
1504    numbers refer to the rule numbers in the AAPCS64.  */
1505
1506 static void
1507 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1508                     const_tree type,
1509                     bool named ATTRIBUTE_UNUSED)
1510 {
1511   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1512   int ncrn, nvrn, nregs;
1513   bool allocate_ncrn, allocate_nvrn;
1514
1515   /* We need to do this once per argument.  */
1516   if (pcum->aapcs_arg_processed)
1517     return;
1518
1519   pcum->aapcs_arg_processed = true;
1520
1521   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1522   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1523                                                  mode,
1524                                                  type,
1525                                                  &nregs);
1526
1527   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1528      The following code thus handles passing by SIMD/FP registers first.  */
1529
1530   nvrn = pcum->aapcs_nvrn;
1531
1532   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1533      and homogenous short-vector aggregates (HVA).  */
1534   if (allocate_nvrn)
1535     {
1536       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537         {
1538           pcum->aapcs_nextnvrn = nvrn + nregs;
1539           if (!aarch64_composite_type_p (type, mode))
1540             {
1541               gcc_assert (nregs == 1);
1542               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543             }
1544           else
1545             {
1546               rtx par;
1547               int i;
1548               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1549               for (i = 0; i < nregs; i++)
1550                 {
1551                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1552                                          V0_REGNUM + nvrn + i);
1553                   tmp = gen_rtx_EXPR_LIST
1554                     (VOIDmode, tmp,
1555                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1556                   XVECEXP (par, 0, i) = tmp;
1557                 }
1558               pcum->aapcs_reg = par;
1559             }
1560           return;
1561         }
1562       else
1563         {
1564           /* C.3 NSRN is set to 8.  */
1565           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1566           goto on_stack;
1567         }
1568     }
1569
1570   ncrn = pcum->aapcs_ncrn;
1571   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1572            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1573
1574
1575   /* C6 - C9.  though the sign and zero extension semantics are
1576      handled elsewhere.  This is the case where the argument fits
1577      entirely general registers.  */
1578   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1579     {
1580       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1581
1582       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1583
1584       /* C.8 if the argument has an alignment of 16 then the NGRN is
1585          rounded up to the next even number.  */
1586       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1587         {
1588           ++ncrn;
1589           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1590         }
1591       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1592          A reg is still generated for it, but the caller should be smart
1593          enough not to use it.  */
1594       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1595         {
1596           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1597         }
1598       else
1599         {
1600           rtx par;
1601           int i;
1602
1603           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1604           for (i = 0; i < nregs; i++)
1605             {
1606               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1607               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1608                                        GEN_INT (i * UNITS_PER_WORD));
1609               XVECEXP (par, 0, i) = tmp;
1610             }
1611           pcum->aapcs_reg = par;
1612         }
1613
1614       pcum->aapcs_nextncrn = ncrn + nregs;
1615       return;
1616     }
1617
1618   /* C.11  */
1619   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1620
1621   /* The argument is passed on stack; record the needed number of words for
1622      this argument (we can re-use NREGS) and align the total size if
1623      necessary.  */
1624 on_stack:
1625   pcum->aapcs_stack_words = nregs;
1626   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1627     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1628                                                16 / UNITS_PER_WORD) + 1;
1629   return;
1630 }
1631
1632 /* Implement TARGET_FUNCTION_ARG.  */
1633
1634 static rtx
1635 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1636                       const_tree type, bool named)
1637 {
1638   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1639   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1640
1641   if (mode == VOIDmode)
1642     return NULL_RTX;
1643
1644   aarch64_layout_arg (pcum_v, mode, type, named);
1645   return pcum->aapcs_reg;
1646 }
1647
1648 void
1649 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1650                            const_tree fntype ATTRIBUTE_UNUSED,
1651                            rtx libname ATTRIBUTE_UNUSED,
1652                            const_tree fndecl ATTRIBUTE_UNUSED,
1653                            unsigned n_named ATTRIBUTE_UNUSED)
1654 {
1655   pcum->aapcs_ncrn = 0;
1656   pcum->aapcs_nvrn = 0;
1657   pcum->aapcs_nextncrn = 0;
1658   pcum->aapcs_nextnvrn = 0;
1659   pcum->pcs_variant = ARM_PCS_AAPCS64;
1660   pcum->aapcs_reg = NULL_RTX;
1661   pcum->aapcs_arg_processed = false;
1662   pcum->aapcs_stack_words = 0;
1663   pcum->aapcs_stack_size = 0;
1664
1665   return;
1666 }
1667
1668 static void
1669 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1670                               enum machine_mode mode,
1671                               const_tree type,
1672                               bool named)
1673 {
1674   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1675   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1676     {
1677       aarch64_layout_arg (pcum_v, mode, type, named);
1678       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1679                   != (pcum->aapcs_stack_words != 0));
1680       pcum->aapcs_arg_processed = false;
1681       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1682       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1683       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1684       pcum->aapcs_stack_words = 0;
1685       pcum->aapcs_reg = NULL_RTX;
1686     }
1687 }
1688
1689 bool
1690 aarch64_function_arg_regno_p (unsigned regno)
1691 {
1692   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1693           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1694 }
1695
1696 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1697    PARM_BOUNDARY bits of alignment, but will be given anything up
1698    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1699    that both before and after the layout of each argument, the Next
1700    Stacked Argument Address (NSAA) will have a minimum alignment of
1701    8 bytes.  */
1702
1703 static unsigned int
1704 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1705 {
1706   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1707
1708   if (alignment < PARM_BOUNDARY)
1709     alignment = PARM_BOUNDARY;
1710   if (alignment > STACK_BOUNDARY)
1711     alignment = STACK_BOUNDARY;
1712   return alignment;
1713 }
1714
1715 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1716
1717    Return true if an argument passed on the stack should be padded upwards,
1718    i.e. if the least-significant byte of the stack slot has useful data.
1719
1720    Small aggregate types are placed in the lowest memory address.
1721
1722    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1723
1724 bool
1725 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1726 {
1727   /* On little-endian targets, the least significant byte of every stack
1728      argument is passed at the lowest byte address of the stack slot.  */
1729   if (!BYTES_BIG_ENDIAN)
1730     return true;
1731
1732   /* Otherwise, integral, floating-point and pointer types are padded downward:
1733      the least significant byte of a stack argument is passed at the highest
1734      byte address of the stack slot.  */
1735   if (type
1736       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1737          || POINTER_TYPE_P (type))
1738       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1739     return false;
1740
1741   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1742   return true;
1743 }
1744
1745 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1746
1747    It specifies padding for the last (may also be the only)
1748    element of a block move between registers and memory.  If
1749    assuming the block is in the memory, padding upward means that
1750    the last element is padded after its highest significant byte,
1751    while in downward padding, the last element is padded at the
1752    its least significant byte side.
1753
1754    Small aggregates and small complex types are always padded
1755    upwards.
1756
1757    We don't need to worry about homogeneous floating-point or
1758    short-vector aggregates; their move is not affected by the
1759    padding direction determined here.  Regardless of endianness,
1760    each element of such an aggregate is put in the least
1761    significant bits of a fp/simd register.
1762
1763    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1764    register has useful data, and return the opposite if the most
1765    significant byte does.  */
1766
1767 bool
1768 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1769                      bool first ATTRIBUTE_UNUSED)
1770 {
1771
1772   /* Small composite types are always padded upward.  */
1773   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1774     {
1775       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1776                             : GET_MODE_SIZE (mode));
1777       if (size < 2 * UNITS_PER_WORD)
1778         return true;
1779     }
1780
1781   /* Otherwise, use the default padding.  */
1782   return !BYTES_BIG_ENDIAN;
1783 }
1784
1785 static enum machine_mode
1786 aarch64_libgcc_cmp_return_mode (void)
1787 {
1788   return SImode;
1789 }
1790
1791 static bool
1792 aarch64_frame_pointer_required (void)
1793 {
1794   /* If the function contains dynamic stack allocations, we need to
1795      use the frame pointer to access the static parts of the frame.  */
1796   if (cfun->calls_alloca)
1797     return true;
1798
1799   /* In aarch64_override_options_after_change
1800      flag_omit_leaf_frame_pointer turns off the frame pointer by
1801      default.  Turn it back on now if we've not got a leaf
1802      function.  */
1803   if (flag_omit_leaf_frame_pointer
1804       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1805     return true;
1806
1807   return false;
1808 }
1809
1810 /* Mark the registers that need to be saved by the callee and calculate
1811    the size of the callee-saved registers area and frame record (both FP
1812    and LR may be omitted).  */
1813 static void
1814 aarch64_layout_frame (void)
1815 {
1816   HOST_WIDE_INT offset = 0;
1817   int regno;
1818
1819   if (reload_completed && cfun->machine->frame.laid_out)
1820     return;
1821
1822   /* First mark all the registers that really need to be saved...  */
1823   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1824     cfun->machine->frame.reg_offset[regno] = -1;
1825
1826   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827     cfun->machine->frame.reg_offset[regno] = -1;
1828
1829   /* ... that includes the eh data registers (if needed)...  */
1830   if (crtl->calls_eh_return)
1831     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1832       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1833
1834   /* ... and any callee saved register that dataflow says is live.  */
1835   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836     if (df_regs_ever_live_p (regno)
1837         && !call_used_regs[regno])
1838       cfun->machine->frame.reg_offset[regno] = 0;
1839
1840   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1841     if (df_regs_ever_live_p (regno)
1842         && !call_used_regs[regno])
1843       cfun->machine->frame.reg_offset[regno] = 0;
1844
1845   if (frame_pointer_needed)
1846     {
1847       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1848       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1849       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1850     }
1851
1852   /* Now assign stack slots for them.  */
1853   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1854     if (cfun->machine->frame.reg_offset[regno] != -1)
1855       {
1856         cfun->machine->frame.reg_offset[regno] = offset;
1857         offset += UNITS_PER_WORD;
1858       }
1859
1860   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1861     if (cfun->machine->frame.reg_offset[regno] != -1)
1862       {
1863         cfun->machine->frame.reg_offset[regno] = offset;
1864         offset += UNITS_PER_WORD;
1865       }
1866
1867   if (frame_pointer_needed)
1868     {
1869       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1870       offset += UNITS_PER_WORD;
1871     }
1872
1873   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1874     {
1875       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1876       offset += UNITS_PER_WORD;
1877     }
1878
1879   cfun->machine->frame.padding0 =
1880     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1881   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1882
1883   cfun->machine->frame.saved_regs_size = offset;
1884   cfun->machine->frame.laid_out = true;
1885 }
1886
1887 /* Make the last instruction frame-related and note that it performs
1888    the operation described by FRAME_PATTERN.  */
1889
1890 static void
1891 aarch64_set_frame_expr (rtx frame_pattern)
1892 {
1893   rtx insn;
1894
1895   insn = get_last_insn ();
1896   RTX_FRAME_RELATED_P (insn) = 1;
1897   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1898   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1899                                       frame_pattern,
1900                                       REG_NOTES (insn));
1901 }
1902
1903 static bool
1904 aarch64_register_saved_on_entry (int regno)
1905 {
1906   return cfun->machine->frame.reg_offset[regno] != -1;
1907 }
1908
1909
1910 static void
1911 aarch64_save_or_restore_fprs (int start_offset, int increment,
1912                               bool restore, rtx base_rtx)
1913
1914 {
1915   unsigned regno;
1916   unsigned regno2;
1917   rtx insn;
1918   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1919     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1920
1921
1922   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1923     {
1924       if (aarch64_register_saved_on_entry (regno))
1925         {
1926           rtx mem;
1927           mem = gen_mem_ref (DFmode,
1928                              plus_constant (Pmode,
1929                                             base_rtx,
1930                                             start_offset));
1931
1932           for (regno2 = regno + 1;
1933                regno2 <= V31_REGNUM
1934                  && !aarch64_register_saved_on_entry (regno2);
1935                regno2++)
1936             {
1937               /* Empty loop.  */
1938             }
1939           if (regno2 <= V31_REGNUM &&
1940               aarch64_register_saved_on_entry (regno2))
1941             {
1942               rtx mem2;
1943               /* Next highest register to be saved.  */
1944               mem2 = gen_mem_ref (DFmode,
1945                                   plus_constant
1946                                   (Pmode,
1947                                    base_rtx,
1948                                    start_offset + increment));
1949               if (restore == false)
1950                 {
1951                   insn = emit_insn
1952                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1953                                         mem2, gen_rtx_REG (DFmode, regno2)));
1954
1955                 }
1956               else
1957                 {
1958                   insn = emit_insn
1959                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1960                                        gen_rtx_REG (DFmode, regno2), mem2));
1961
1962                   add_reg_note (insn, REG_CFA_RESTORE,
1963                                 gen_rtx_REG (DFmode, regno));
1964                   add_reg_note (insn, REG_CFA_RESTORE,
1965                                 gen_rtx_REG (DFmode, regno2));
1966                 }
1967
1968                   /* The first part of a frame-related parallel insn
1969                      is always assumed to be relevant to the frame
1970                      calculations; subsequent parts, are only
1971                      frame-related if explicitly marked.  */
1972               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1973               regno = regno2;
1974               start_offset += increment * 2;
1975             }
1976           else
1977             {
1978               if (restore == false)
1979                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1980               else
1981                 {
1982                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1983                   add_reg_note (insn, REG_CFA_RESTORE,
1984                                 gen_rtx_REG (DImode, regno));
1985                 }
1986               start_offset += increment;
1987             }
1988           RTX_FRAME_RELATED_P (insn) = 1;
1989         }
1990     }
1991
1992 }
1993
1994
1995 /* offset from the stack pointer of where the saves and
1996    restore's have to happen.  */
1997 static void
1998 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1999                                             bool restore)
2000 {
2001   rtx insn;
2002   rtx base_rtx = stack_pointer_rtx;
2003   HOST_WIDE_INT start_offset = offset;
2004   HOST_WIDE_INT increment = UNITS_PER_WORD;
2005   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2006   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2007   unsigned regno;
2008   unsigned regno2;
2009
2010   for (regno = R0_REGNUM; regno <= limit; regno++)
2011     {
2012       if (aarch64_register_saved_on_entry (regno))
2013         {
2014           rtx mem;
2015           mem = gen_mem_ref (Pmode,
2016                              plus_constant (Pmode,
2017                                             base_rtx,
2018                                             start_offset));
2019
2020           for (regno2 = regno + 1;
2021                regno2 <= limit
2022                  && !aarch64_register_saved_on_entry (regno2);
2023                regno2++)
2024             {
2025               /* Empty loop.  */
2026             }
2027           if (regno2 <= limit &&
2028               aarch64_register_saved_on_entry (regno2))
2029             {
2030               rtx mem2;
2031               /* Next highest register to be saved.  */
2032               mem2 = gen_mem_ref (Pmode,
2033                                   plus_constant
2034                                   (Pmode,
2035                                    base_rtx,
2036                                    start_offset + increment));
2037               if (restore == false)
2038                 {
2039                   insn = emit_insn
2040                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2041                                         mem2, gen_rtx_REG (DImode, regno2)));
2042
2043                 }
2044               else
2045                 {
2046                   insn = emit_insn
2047                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2048                                      gen_rtx_REG (DImode, regno2), mem2));
2049
2050                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2051                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2052                 }
2053
2054                   /* The first part of a frame-related parallel insn
2055                      is always assumed to be relevant to the frame
2056                      calculations; subsequent parts, are only
2057                      frame-related if explicitly marked.  */
2058               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2059                                             1)) = 1;
2060               regno = regno2;
2061               start_offset += increment * 2;
2062             }
2063           else
2064             {
2065               if (restore == false)
2066                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2067               else
2068                 {
2069                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2070                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2071                 }
2072               start_offset += increment;
2073             }
2074           RTX_FRAME_RELATED_P (insn) = 1;
2075         }
2076     }
2077
2078   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2079
2080 }
2081
2082 /* AArch64 stack frames generated by this compiler look like:
2083
2084         +-------------------------------+
2085         |                               |
2086         |  incoming stack arguments     |
2087         |                               |
2088         +-------------------------------+ <-- arg_pointer_rtx
2089         |                               |
2090         |  callee-allocated save area   |
2091         |  for register varargs         |
2092         |                               |
2093         +-------------------------------+ <-- frame_pointer_rtx
2094         |                               |
2095         |  local variables              |
2096         |                               |
2097         +-------------------------------+
2098         |  padding0                     | \
2099         +-------------------------------+  |
2100         |                               |  |
2101         |                               |  |
2102         |  callee-saved registers       |  | frame.saved_regs_size
2103         |                               |  |
2104         +-------------------------------+  |
2105         |  LR'                          |  |
2106         +-------------------------------+  |
2107         |  FP'                          | /
2108       P +-------------------------------+ <-- hard_frame_pointer_rtx
2109         |  dynamic allocation           |
2110         +-------------------------------+
2111         |                               |
2112         |  outgoing stack arguments     |
2113         |                               |
2114         +-------------------------------+ <-- stack_pointer_rtx
2115
2116    Dynamic stack allocations such as alloca insert data at point P.
2117    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2118    hard_frame_pointer_rtx unchanged.  */
2119
2120 /* Generate the prologue instructions for entry into a function.
2121    Establish the stack frame by decreasing the stack pointer with a
2122    properly calculated size and, if necessary, create a frame record
2123    filled with the values of LR and previous frame pointer.  The
2124    current FP is also set up if it is in use.  */
2125
2126 void
2127 aarch64_expand_prologue (void)
2128 {
2129   /* sub sp, sp, #<frame_size>
2130      stp {fp, lr}, [sp, #<frame_size> - 16]
2131      add fp, sp, #<frame_size> - hardfp_offset
2132      stp {cs_reg}, [fp, #-16] etc.
2133
2134      sub sp, sp, <final_adjustment_if_any>
2135   */
2136   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2137   HOST_WIDE_INT frame_size, offset;
2138   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2139   rtx insn;
2140
2141   aarch64_layout_frame ();
2142   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2143   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2144               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2145   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2146                 + crtl->outgoing_args_size);
2147   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2148                                           STACK_BOUNDARY / BITS_PER_UNIT);
2149
2150   if (flag_stack_usage_info)
2151     current_function_static_stack_size = frame_size;
2152
2153   fp_offset = (offset
2154                - original_frame_size
2155                - cfun->machine->frame.saved_regs_size);
2156
2157   /* Store pairs and load pairs have a range only -512 to 504.  */
2158   if (offset >= 512)
2159     {
2160       /* When the frame has a large size, an initial decrease is done on
2161          the stack pointer to jump over the callee-allocated save area for
2162          register varargs, the local variable area and/or the callee-saved
2163          register area.  This will allow the pre-index write-back
2164          store pair instructions to be used for setting up the stack frame
2165          efficiently.  */
2166       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2167       if (offset >= 512)
2168         offset = cfun->machine->frame.saved_regs_size;
2169
2170       frame_size -= (offset + crtl->outgoing_args_size);
2171       fp_offset = 0;
2172
2173       if (frame_size >= 0x1000000)
2174         {
2175           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2176           emit_move_insn (op0, GEN_INT (-frame_size));
2177           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2178           aarch64_set_frame_expr (gen_rtx_SET
2179                                   (Pmode, stack_pointer_rtx,
2180                                    plus_constant (Pmode,
2181                                                   stack_pointer_rtx,
2182                                                   -frame_size)));
2183         }
2184       else if (frame_size > 0)
2185         {
2186           if ((frame_size & 0xfff) != frame_size)
2187             {
2188               insn = emit_insn (gen_add2_insn
2189                                 (stack_pointer_rtx,
2190                                  GEN_INT (-(frame_size
2191                                             & ~(HOST_WIDE_INT)0xfff))));
2192               RTX_FRAME_RELATED_P (insn) = 1;
2193             }
2194           if ((frame_size & 0xfff) != 0)
2195             {
2196               insn = emit_insn (gen_add2_insn
2197                                 (stack_pointer_rtx,
2198                                  GEN_INT (-(frame_size
2199                                             & (HOST_WIDE_INT)0xfff))));
2200               RTX_FRAME_RELATED_P (insn) = 1;
2201             }
2202         }
2203     }
2204   else
2205     frame_size = -1;
2206
2207   if (offset > 0)
2208     {
2209       /* Save the frame pointer and lr if the frame pointer is needed
2210          first.  Make the frame pointer point to the location of the
2211          old frame pointer on the stack.  */
2212       if (frame_pointer_needed)
2213         {
2214           rtx mem_fp, mem_lr;
2215
2216           if (fp_offset)
2217             {
2218               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2219                                                GEN_INT (-offset)));
2220               RTX_FRAME_RELATED_P (insn) = 1;
2221               aarch64_set_frame_expr (gen_rtx_SET
2222                                       (Pmode, stack_pointer_rtx,
2223                                        gen_rtx_MINUS (Pmode,
2224                                                       stack_pointer_rtx,
2225                                                       GEN_INT (offset))));
2226               mem_fp = gen_frame_mem (DImode,
2227                                       plus_constant (Pmode,
2228                                                      stack_pointer_rtx,
2229                                                      fp_offset));
2230               mem_lr = gen_frame_mem (DImode,
2231                                       plus_constant (Pmode,
2232                                                      stack_pointer_rtx,
2233                                                      fp_offset
2234                                                      + UNITS_PER_WORD));
2235               insn = emit_insn (gen_store_pairdi (mem_fp,
2236                                                   hard_frame_pointer_rtx,
2237                                                   mem_lr,
2238                                                   gen_rtx_REG (DImode,
2239                                                                LR_REGNUM)));
2240             }
2241           else
2242             {
2243               insn = emit_insn (gen_storewb_pairdi_di
2244                                 (stack_pointer_rtx, stack_pointer_rtx,
2245                                  hard_frame_pointer_rtx,
2246                                  gen_rtx_REG (DImode, LR_REGNUM),
2247                                  GEN_INT (-offset),
2248                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2249               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2250             }
2251
2252           /* The first part of a frame-related parallel insn is always
2253              assumed to be relevant to the frame calculations;
2254              subsequent parts, are only frame-related if explicitly
2255              marked.  */
2256           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257           RTX_FRAME_RELATED_P (insn) = 1;
2258
2259           /* Set up frame pointer to point to the location of the
2260              previous frame pointer on the stack.  */
2261           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2262                                            stack_pointer_rtx,
2263                                            GEN_INT (fp_offset)));
2264           aarch64_set_frame_expr (gen_rtx_SET
2265                                   (Pmode, hard_frame_pointer_rtx,
2266                                    plus_constant (Pmode,
2267                                                   stack_pointer_rtx,
2268                                                   fp_offset)));
2269           RTX_FRAME_RELATED_P (insn) = 1;
2270           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2271                                            hard_frame_pointer_rtx));
2272         }
2273       else
2274         {
2275           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2276                                            GEN_INT (-offset)));
2277           RTX_FRAME_RELATED_P (insn) = 1;
2278         }
2279
2280       aarch64_save_or_restore_callee_save_registers
2281         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2282     }
2283
2284   /* when offset >= 512,
2285      sub sp, sp, #<outgoing_args_size> */
2286   if (frame_size > -1)
2287     {
2288       if (crtl->outgoing_args_size > 0)
2289         {
2290           insn = emit_insn (gen_add2_insn
2291                             (stack_pointer_rtx,
2292                              GEN_INT (- crtl->outgoing_args_size)));
2293           RTX_FRAME_RELATED_P (insn) = 1;
2294         }
2295     }
2296 }
2297
2298 /* Generate the epilogue instructions for returning from a function.  */
2299 void
2300 aarch64_expand_epilogue (bool for_sibcall)
2301 {
2302   HOST_WIDE_INT original_frame_size, frame_size, offset;
2303   HOST_WIDE_INT fp_offset;
2304   rtx insn;
2305   rtx cfa_reg;
2306
2307   aarch64_layout_frame ();
2308   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2309   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2310                 + crtl->outgoing_args_size);
2311   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2312                                           STACK_BOUNDARY / BITS_PER_UNIT);
2313
2314   fp_offset = (offset
2315                - original_frame_size
2316                - cfun->machine->frame.saved_regs_size);
2317
2318   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2319
2320   /* Store pairs and load pairs have a range only -512 to 504.  */
2321   if (offset >= 512)
2322     {
2323       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2324       if (offset >= 512)
2325         offset = cfun->machine->frame.saved_regs_size;
2326
2327       frame_size -= (offset + crtl->outgoing_args_size);
2328       fp_offset = 0;
2329       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2330         {
2331           insn = emit_insn (gen_add2_insn
2332                             (stack_pointer_rtx,
2333                              GEN_INT (crtl->outgoing_args_size)));
2334           RTX_FRAME_RELATED_P (insn) = 1;
2335         }
2336     }
2337   else
2338     frame_size = -1;
2339
2340   /* If there were outgoing arguments or we've done dynamic stack
2341      allocation, then restore the stack pointer from the frame
2342      pointer.  This is at most one insn and more efficient than using
2343      GCC's internal mechanism.  */
2344   if (frame_pointer_needed
2345       && (crtl->outgoing_args_size || cfun->calls_alloca))
2346     {
2347       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2348                                        hard_frame_pointer_rtx,
2349                                        GEN_INT (- fp_offset)));
2350       RTX_FRAME_RELATED_P (insn) = 1;
2351       /* As SP is set to (FP - fp_offset), according to the rules in
2352          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353          from the value of SP from now on.  */
2354       cfa_reg = stack_pointer_rtx;
2355     }
2356
2357   aarch64_save_or_restore_callee_save_registers
2358     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2359
2360   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2361   if (offset > 0)
2362     {
2363       if (frame_pointer_needed)
2364         {
2365           rtx mem_fp, mem_lr;
2366
2367           if (fp_offset)
2368             {
2369               mem_fp = gen_frame_mem (DImode,
2370                                       plus_constant (Pmode,
2371                                                      stack_pointer_rtx,
2372                                                      fp_offset));
2373               mem_lr = gen_frame_mem (DImode,
2374                                       plus_constant (Pmode,
2375                                                      stack_pointer_rtx,
2376                                                      fp_offset
2377                                                      + UNITS_PER_WORD));
2378               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2379                                                  mem_fp,
2380                                                  gen_rtx_REG (DImode,
2381                                                               LR_REGNUM),
2382                                                  mem_lr));
2383             }
2384           else
2385             {
2386               insn = emit_insn (gen_loadwb_pairdi_di
2387                                 (stack_pointer_rtx,
2388                                  stack_pointer_rtx,
2389                                  hard_frame_pointer_rtx,
2390                                  gen_rtx_REG (DImode, LR_REGNUM),
2391                                  GEN_INT (offset),
2392                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2393               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2394               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2395                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2396                                           plus_constant (Pmode, cfa_reg,
2397                                                          offset))));
2398             }
2399
2400           /* The first part of a frame-related parallel insn
2401              is always assumed to be relevant to the frame
2402              calculations; subsequent parts, are only
2403              frame-related if explicitly marked.  */
2404           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2405           RTX_FRAME_RELATED_P (insn) = 1;
2406           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2407           add_reg_note (insn, REG_CFA_RESTORE,
2408                         gen_rtx_REG (DImode, LR_REGNUM));
2409
2410           if (fp_offset)
2411             {
2412               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413                                                GEN_INT (offset)));
2414               RTX_FRAME_RELATED_P (insn) = 1;
2415             }
2416         }
2417       else
2418         {
2419           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420                                            GEN_INT (offset)));
2421           RTX_FRAME_RELATED_P (insn) = 1;
2422         }
2423     }
2424
2425   /* Stack adjustment for exception handler.  */
2426   if (crtl->calls_eh_return)
2427     {
2428       /* We need to unwind the stack by the offset computed by
2429          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2430          based on SP.  Ideally we would update the SP and define the
2431          CFA along the lines of:
2432
2433          SP = SP + EH_RETURN_STACKADJ_RTX
2434          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2435
2436          However the dwarf emitter only understands a constant
2437          register offset.
2438
2439          The solution chosen here is to use the otherwise unused IP0
2440          as a temporary register to hold the current SP value.  The
2441          CFA is described using IP0 then SP is modified.  */
2442
2443       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2444
2445       insn = emit_move_insn (ip0, stack_pointer_rtx);
2446       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2447       RTX_FRAME_RELATED_P (insn) = 1;
2448
2449       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2450
2451       /* Ensure the assignment to IP0 does not get optimized away.  */
2452       emit_use (ip0);
2453     }
2454
2455   if (frame_size > -1)
2456     {
2457       if (frame_size >= 0x1000000)
2458         {
2459           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2460           emit_move_insn (op0, GEN_INT (frame_size));
2461           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2462           aarch64_set_frame_expr (gen_rtx_SET
2463                                   (Pmode, stack_pointer_rtx,
2464                                    plus_constant (Pmode,
2465                                                   stack_pointer_rtx,
2466                                                   frame_size)));
2467         }
2468       else if (frame_size > 0)
2469         {
2470           if ((frame_size & 0xfff) != 0)
2471             {
2472               insn = emit_insn (gen_add2_insn
2473                                 (stack_pointer_rtx,
2474                                  GEN_INT ((frame_size
2475                                            & (HOST_WIDE_INT) 0xfff))));
2476               RTX_FRAME_RELATED_P (insn) = 1;
2477             }
2478           if ((frame_size & 0xfff) != frame_size)
2479             {
2480               insn = emit_insn (gen_add2_insn
2481                                 (stack_pointer_rtx,
2482                                  GEN_INT ((frame_size
2483                                            & ~ (HOST_WIDE_INT) 0xfff))));
2484               RTX_FRAME_RELATED_P (insn) = 1;
2485             }
2486         }
2487
2488         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2489                                              plus_constant (Pmode,
2490                                                             stack_pointer_rtx,
2491                                                             offset)));
2492     }
2493
2494   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2495   if (!for_sibcall)
2496     emit_jump_insn (ret_rtx);
2497 }
2498
2499 /* Return the place to copy the exception unwinding return address to.
2500    This will probably be a stack slot, but could (in theory be the
2501    return register).  */
2502 rtx
2503 aarch64_final_eh_return_addr (void)
2504 {
2505   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2506   aarch64_layout_frame ();
2507   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2508   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2509                 + crtl->outgoing_args_size);
2510   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2511                                           STACK_BOUNDARY / BITS_PER_UNIT);
2512   fp_offset = offset
2513     - original_frame_size
2514     - cfun->machine->frame.saved_regs_size;
2515
2516   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2517     return gen_rtx_REG (DImode, LR_REGNUM);
2518
2519   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2520      result in a store to save LR introduced by builtin_eh_return () being
2521      incorrectly deleted because the alias is not detected.
2522      So in the calculation of the address to copy the exception unwinding
2523      return address to, we note 2 cases.
2524      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525      we return a SP-relative location since all the addresses are SP-relative
2526      in this case.  This prevents the store from being optimized away.
2527      If the fp_offset is not 0, then the addresses will be FP-relative and
2528      therefore we return a FP-relative location.  */
2529
2530   if (frame_pointer_needed)
2531     {
2532       if (fp_offset)
2533         return gen_frame_mem (DImode,
2534                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2535       else
2536         return gen_frame_mem (DImode,
2537                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2538     }
2539
2540   /* If FP is not needed, we calculate the location of LR, which would be
2541      at the top of the saved registers block.  */
2542
2543   return gen_frame_mem (DImode,
2544                         plus_constant (Pmode,
2545                                        stack_pointer_rtx,
2546                                        fp_offset
2547                                        + cfun->machine->frame.saved_regs_size
2548                                        - 2 * UNITS_PER_WORD));
2549 }
2550
2551 /* Possibly output code to build up a constant in a register.  For
2552    the benefit of the costs infrastructure, returns the number of
2553    instructions which would be emitted.  GENERATE inhibits or
2554    enables code generation.  */
2555
2556 static int
2557 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2558 {
2559   int insns = 0;
2560
2561   if (aarch64_bitmask_imm (val, DImode))
2562     {
2563       if (generate)
2564         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2565       insns = 1;
2566     }
2567   else
2568     {
2569       int i;
2570       int ncount = 0;
2571       int zcount = 0;
2572       HOST_WIDE_INT valp = val >> 16;
2573       HOST_WIDE_INT valm;
2574       HOST_WIDE_INT tval;
2575
2576       for (i = 16; i < 64; i += 16)
2577         {
2578           valm = (valp & 0xffff);
2579
2580           if (valm != 0)
2581             ++ zcount;
2582
2583           if (valm != 0xffff)
2584             ++ ncount;
2585
2586           valp >>= 16;
2587         }
2588
2589       /* zcount contains the number of additional MOVK instructions
2590          required if the constant is built up with an initial MOVZ instruction,
2591          while ncount is the number of MOVK instructions required if starting
2592          with a MOVN instruction.  Choose the sequence that yields the fewest
2593          number of instructions, preferring MOVZ instructions when they are both
2594          the same.  */
2595       if (ncount < zcount)
2596         {
2597           if (generate)
2598             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2600           tval = 0xffff;
2601           insns++;
2602         }
2603       else
2604         {
2605           if (generate)
2606             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2607                             GEN_INT (val & 0xffff));
2608           tval = 0;
2609           insns++;
2610         }
2611
2612       val >>= 16;
2613
2614       for (i = 16; i < 64; i += 16)
2615         {
2616           if ((val & 0xffff) != tval)
2617             {
2618               if (generate)
2619                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2620                                            GEN_INT (i),
2621                                            GEN_INT (val & 0xffff)));
2622               insns++;
2623             }
2624           val >>= 16;
2625         }
2626     }
2627   return insns;
2628 }
2629
2630 static void
2631 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2632 {
2633   HOST_WIDE_INT mdelta = delta;
2634   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2635   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2636
2637   if (mdelta < 0)
2638     mdelta = -mdelta;
2639
2640   if (mdelta >= 4096 * 4096)
2641     {
2642       (void) aarch64_build_constant (scratchreg, delta, true);
2643       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2644     }
2645   else if (mdelta > 0)
2646     {
2647       if (mdelta >= 4096)
2648         {
2649           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2650           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2651           if (delta < 0)
2652             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2653                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2654           else
2655             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2656                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2657         }
2658       if (mdelta % 4096 != 0)
2659         {
2660           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2661           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2662                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2663         }
2664     }
2665 }
2666
2667 /* Output code to add DELTA to the first argument, and then jump
2668    to FUNCTION.  Used for C++ multiple inheritance.  */
2669 static void
2670 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2671                          HOST_WIDE_INT delta,
2672                          HOST_WIDE_INT vcall_offset,
2673                          tree function)
2674 {
2675   /* The this pointer is always in x0.  Note that this differs from
2676      Arm where the this pointer maybe bumped to r1 if r0 is required
2677      to return a pointer to an aggregate.  On AArch64 a result value
2678      pointer will be in x8.  */
2679   int this_regno = R0_REGNUM;
2680   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2681
2682   reload_completed = 1;
2683   emit_note (NOTE_INSN_PROLOGUE_END);
2684
2685   if (vcall_offset == 0)
2686     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2687   else
2688     {
2689       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2690
2691       this_rtx = gen_rtx_REG (Pmode, this_regno);
2692       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2693       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2694
2695       addr = this_rtx;
2696       if (delta != 0)
2697         {
2698           if (delta >= -256 && delta < 256)
2699             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2700                                        plus_constant (Pmode, this_rtx, delta));
2701           else
2702             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2703         }
2704
2705       if (Pmode == ptr_mode)
2706         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2707       else
2708         aarch64_emit_move (temp0,
2709                            gen_rtx_ZERO_EXTEND (Pmode,
2710                                                 gen_rtx_MEM (ptr_mode, addr)));
2711
2712       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2713           addr = plus_constant (Pmode, temp0, vcall_offset);
2714       else
2715         {
2716           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2717           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2718         }
2719
2720       if (Pmode == ptr_mode)
2721         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2722       else
2723         aarch64_emit_move (temp1,
2724                            gen_rtx_SIGN_EXTEND (Pmode,
2725                                                 gen_rtx_MEM (ptr_mode, addr)));
2726
2727       emit_insn (gen_add2_insn (this_rtx, temp1));
2728     }
2729
2730   /* Generate a tail call to the target function.  */
2731   if (!TREE_USED (function))
2732     {
2733       assemble_external (function);
2734       TREE_USED (function) = 1;
2735     }
2736   funexp = XEXP (DECL_RTL (function), 0);
2737   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2738   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2739   SIBLING_CALL_P (insn) = 1;
2740
2741   insn = get_insns ();
2742   shorten_branches (insn);
2743   final_start_function (insn, file, 1);
2744   final (insn, file, 1);
2745   final_end_function ();
2746
2747   /* Stop pretending to be a post-reload pass.  */
2748   reload_completed = 0;
2749 }
2750
2751 static int
2752 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2753 {
2754   if (GET_CODE (*x) == SYMBOL_REF)
2755     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2756
2757   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758      TLS offsets, not real symbol references.  */
2759   if (GET_CODE (*x) == UNSPEC
2760       && XINT (*x, 1) == UNSPEC_TLS)
2761     return -1;
2762
2763   return 0;
2764 }
2765
2766 static bool
2767 aarch64_tls_referenced_p (rtx x)
2768 {
2769   if (!TARGET_HAVE_TLS)
2770     return false;
2771
2772   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2773 }
2774
2775
2776 static int
2777 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2778 {
2779   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2780   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2781
2782   if (*imm1 < *imm2)
2783     return -1;
2784   if (*imm1 > *imm2)
2785     return +1;
2786   return 0;
2787 }
2788
2789
2790 static void
2791 aarch64_build_bitmask_table (void)
2792 {
2793   unsigned HOST_WIDE_INT mask, imm;
2794   unsigned int log_e, e, s, r;
2795   unsigned int nimms = 0;
2796
2797   for (log_e = 1; log_e <= 6; log_e++)
2798     {
2799       e = 1 << log_e;
2800       if (e == 64)
2801         mask = ~(HOST_WIDE_INT) 0;
2802       else
2803         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2804       for (s = 1; s < e; s++)
2805         {
2806           for (r = 0; r < e; r++)
2807             {
2808               /* set s consecutive bits to 1 (s < 64) */
2809               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2810               /* rotate right by r */
2811               if (r != 0)
2812                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2813               /* replicate the constant depending on SIMD size */
2814               switch (log_e) {
2815               case 1: imm |= (imm <<  2);
2816               case 2: imm |= (imm <<  4);
2817               case 3: imm |= (imm <<  8);
2818               case 4: imm |= (imm << 16);
2819               case 5: imm |= (imm << 32);
2820               case 6:
2821                 break;
2822               default:
2823                 gcc_unreachable ();
2824               }
2825               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2826               aarch64_bitmasks[nimms++] = imm;
2827             }
2828         }
2829     }
2830
2831   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2832   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2833          aarch64_bitmasks_cmp);
2834 }
2835
2836
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838    a left shift of 0 or 12 bits.  */
2839 bool
2840 aarch64_uimm12_shift (HOST_WIDE_INT val)
2841 {
2842   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2843           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2844           );
2845 }
2846
2847
2848 /* Return true if val is an immediate that can be loaded into a
2849    register by a MOVZ instruction.  */
2850 static bool
2851 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2852 {
2853   if (GET_MODE_SIZE (mode) > 4)
2854     {
2855       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2856           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2857         return 1;
2858     }
2859   else
2860     {
2861       /* Ignore sign extension.  */
2862       val &= (HOST_WIDE_INT) 0xffffffff;
2863     }
2864   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2865           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2866 }
2867
2868
2869 /* Return true if val is a valid bitmask immediate.  */
2870 bool
2871 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2872 {
2873   if (GET_MODE_SIZE (mode) < 8)
2874     {
2875       /* Replicate bit pattern.  */
2876       val &= (HOST_WIDE_INT) 0xffffffff;
2877       val |= val << 32;
2878     }
2879   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2880                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2881 }
2882
2883
2884 /* Return true if val is an immediate that can be loaded into a
2885    register in a single instruction.  */
2886 bool
2887 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2888 {
2889   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2890     return 1;
2891   return aarch64_bitmask_imm (val, mode);
2892 }
2893
2894 static bool
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2896 {
2897   rtx base, offset;
2898
2899   if (GET_CODE (x) == HIGH)
2900     return true;
2901
2902   split_const (x, &base, &offset);
2903   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2904     {
2905       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2906           != SYMBOL_FORCE_TO_MEM)
2907         return true;
2908       else
2909         /* Avoid generating a 64-bit relocation in ILP32; leave
2910            to aarch64_expand_mov_immediate to handle it properly.  */
2911         return mode != ptr_mode;
2912     }
2913
2914   return aarch64_tls_referenced_p (x);
2915 }
2916
2917 /* Return true if register REGNO is a valid index register.
2918    STRICT_P is true if REG_OK_STRICT is in effect.  */
2919
2920 bool
2921 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2922 {
2923   if (!HARD_REGISTER_NUM_P (regno))
2924     {
2925       if (!strict_p)
2926         return true;
2927
2928       if (!reg_renumber)
2929         return false;
2930
2931       regno = reg_renumber[regno];
2932     }
2933   return GP_REGNUM_P (regno);
2934 }
2935
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937    STRICT_P is true if REG_OK_STRICT is in effect.  */
2938
2939 bool
2940 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2941 {
2942   if (!HARD_REGISTER_NUM_P (regno))
2943     {
2944       if (!strict_p)
2945         return true;
2946
2947       if (!reg_renumber)
2948         return false;
2949
2950       regno = reg_renumber[regno];
2951     }
2952
2953   /* The fake registers will be eliminated to either the stack or
2954      hard frame pointer, both of which are usually valid base registers.
2955      Reload deals with the cases where the eliminated form isn't valid.  */
2956   return (GP_REGNUM_P (regno)
2957           || regno == SP_REGNUM
2958           || regno == FRAME_POINTER_REGNUM
2959           || regno == ARG_POINTER_REGNUM);
2960 }
2961
2962 /* Return true if X is a valid base register for mode MODE.
2963    STRICT_P is true if REG_OK_STRICT is in effect.  */
2964
2965 static bool
2966 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2967 {
2968   if (!strict_p && GET_CODE (x) == SUBREG)
2969     x = SUBREG_REG (x);
2970
2971   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2972 }
2973
2974 /* Return true if address offset is a valid index.  If it is, fill in INFO
2975    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2976
2977 static bool
2978 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2979                         enum machine_mode mode, bool strict_p)
2980 {
2981   enum aarch64_address_type type;
2982   rtx index;
2983   int shift;
2984
2985   /* (reg:P) */
2986   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2987       && GET_MODE (x) == Pmode)
2988     {
2989       type = ADDRESS_REG_REG;
2990       index = x;
2991       shift = 0;
2992     }
2993   /* (sign_extend:DI (reg:SI)) */
2994   else if ((GET_CODE (x) == SIGN_EXTEND
2995             || GET_CODE (x) == ZERO_EXTEND)
2996            && GET_MODE (x) == DImode
2997            && GET_MODE (XEXP (x, 0)) == SImode)
2998     {
2999       type = (GET_CODE (x) == SIGN_EXTEND)
3000         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3001       index = XEXP (x, 0);
3002       shift = 0;
3003     }
3004   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005   else if (GET_CODE (x) == MULT
3006            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3007                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3008            && GET_MODE (XEXP (x, 0)) == DImode
3009            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3010            && CONST_INT_P (XEXP (x, 1)))
3011     {
3012       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3013         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3014       index = XEXP (XEXP (x, 0), 0);
3015       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3016     }
3017   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018   else if (GET_CODE (x) == ASHIFT
3019            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3020                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3021            && GET_MODE (XEXP (x, 0)) == DImode
3022            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3023            && CONST_INT_P (XEXP (x, 1)))
3024     {
3025       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3026         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3027       index = XEXP (XEXP (x, 0), 0);
3028       shift = INTVAL (XEXP (x, 1));
3029     }
3030   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031   else if ((GET_CODE (x) == SIGN_EXTRACT
3032             || GET_CODE (x) == ZERO_EXTRACT)
3033            && GET_MODE (x) == DImode
3034            && GET_CODE (XEXP (x, 0)) == MULT
3035            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3037     {
3038       type = (GET_CODE (x) == SIGN_EXTRACT)
3039         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3040       index = XEXP (XEXP (x, 0), 0);
3041       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042       if (INTVAL (XEXP (x, 1)) != 32 + shift
3043           || INTVAL (XEXP (x, 2)) != 0)
3044         shift = -1;
3045     }
3046   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047      (const_int 0xffffffff<<shift)) */
3048   else if (GET_CODE (x) == AND
3049            && GET_MODE (x) == DImode
3050            && GET_CODE (XEXP (x, 0)) == MULT
3051            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3052            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3053            && CONST_INT_P (XEXP (x, 1)))
3054     {
3055       type = ADDRESS_REG_UXTW;
3056       index = XEXP (XEXP (x, 0), 0);
3057       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3058       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3059         shift = -1;
3060     }
3061   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062   else if ((GET_CODE (x) == SIGN_EXTRACT
3063             || GET_CODE (x) == ZERO_EXTRACT)
3064            && GET_MODE (x) == DImode
3065            && GET_CODE (XEXP (x, 0)) == ASHIFT
3066            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3068     {
3069       type = (GET_CODE (x) == SIGN_EXTRACT)
3070         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3071       index = XEXP (XEXP (x, 0), 0);
3072       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073       if (INTVAL (XEXP (x, 1)) != 32 + shift
3074           || INTVAL (XEXP (x, 2)) != 0)
3075         shift = -1;
3076     }
3077   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078      (const_int 0xffffffff<<shift)) */
3079   else if (GET_CODE (x) == AND
3080            && GET_MODE (x) == DImode
3081            && GET_CODE (XEXP (x, 0)) == ASHIFT
3082            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3083            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3084            && CONST_INT_P (XEXP (x, 1)))
3085     {
3086       type = ADDRESS_REG_UXTW;
3087       index = XEXP (XEXP (x, 0), 0);
3088       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3089       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3090         shift = -1;
3091     }
3092   /* (mult:P (reg:P) (const_int scale)) */
3093   else if (GET_CODE (x) == MULT
3094            && GET_MODE (x) == Pmode
3095            && GET_MODE (XEXP (x, 0)) == Pmode
3096            && CONST_INT_P (XEXP (x, 1)))
3097     {
3098       type = ADDRESS_REG_REG;
3099       index = XEXP (x, 0);
3100       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3101     }
3102   /* (ashift:P (reg:P) (const_int shift)) */
3103   else if (GET_CODE (x) == ASHIFT
3104            && GET_MODE (x) == Pmode
3105            && GET_MODE (XEXP (x, 0)) == Pmode
3106            && CONST_INT_P (XEXP (x, 1)))
3107     {
3108       type = ADDRESS_REG_REG;
3109       index = XEXP (x, 0);
3110       shift = INTVAL (XEXP (x, 1));
3111     }
3112   else
3113     return false;
3114
3115   if (GET_CODE (index) == SUBREG)
3116     index = SUBREG_REG (index);
3117
3118   if ((shift == 0 ||
3119        (shift > 0 && shift <= 3
3120         && (1 << shift) == GET_MODE_SIZE (mode)))
3121       && REG_P (index)
3122       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3123     {
3124       info->type = type;
3125       info->offset = index;
3126       info->shift = shift;
3127       return true;
3128     }
3129
3130   return false;
3131 }
3132
3133 static inline bool
3134 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3135 {
3136   return (offset >= -64 * GET_MODE_SIZE (mode)
3137           && offset < 64 * GET_MODE_SIZE (mode)
3138           && offset % GET_MODE_SIZE (mode) == 0);
3139 }
3140
3141 static inline bool
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3143                                HOST_WIDE_INT offset)
3144 {
3145   return offset >= -256 && offset < 256;
3146 }
3147
3148 static inline bool
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3150 {
3151   return (offset >= 0
3152           && offset < 4096 * GET_MODE_SIZE (mode)
3153           && offset % GET_MODE_SIZE (mode) == 0);
3154 }
3155
3156 /* Return true if X is a valid address for machine mode MODE.  If it is,
3157    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3158    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3159
3160 static bool
3161 aarch64_classify_address (struct aarch64_address_info *info,
3162                           rtx x, enum machine_mode mode,
3163                           RTX_CODE outer_code, bool strict_p)
3164 {
3165   enum rtx_code code = GET_CODE (x);
3166   rtx op0, op1;
3167   bool allow_reg_index_p =
3168     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3169
3170   /* Don't support anything other than POST_INC or REG addressing for
3171      AdvSIMD.  */
3172   if (aarch64_vector_mode_p (mode)
3173       && (code != POST_INC && code != REG))
3174     return false;
3175
3176   switch (code)
3177     {
3178     case REG:
3179     case SUBREG:
3180       info->type = ADDRESS_REG_IMM;
3181       info->base = x;
3182       info->offset = const0_rtx;
3183       return aarch64_base_register_rtx_p (x, strict_p);
3184
3185     case PLUS:
3186       op0 = XEXP (x, 0);
3187       op1 = XEXP (x, 1);
3188       if (GET_MODE_SIZE (mode) != 0
3189           && CONST_INT_P (op1)
3190           && aarch64_base_register_rtx_p (op0, strict_p))
3191         {
3192           HOST_WIDE_INT offset = INTVAL (op1);
3193
3194           info->type = ADDRESS_REG_IMM;
3195           info->base = op0;
3196           info->offset = op1;
3197
3198           /* TImode and TFmode values are allowed in both pairs of X
3199              registers and individual Q registers.  The available
3200              address modes are:
3201              X,X: 7-bit signed scaled offset
3202              Q:   9-bit signed offset
3203              We conservatively require an offset representable in either mode.
3204            */
3205           if (mode == TImode || mode == TFmode)
3206             return (offset_7bit_signed_scaled_p (mode, offset)
3207                     && offset_9bit_signed_unscaled_p (mode, offset));
3208
3209           if (outer_code == PARALLEL)
3210             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3211                     && offset_7bit_signed_scaled_p (mode, offset));
3212           else
3213             return (offset_9bit_signed_unscaled_p (mode, offset)
3214                     || offset_12bit_unsigned_scaled_p (mode, offset));
3215         }
3216
3217       if (allow_reg_index_p)
3218         {
3219           /* Look for base + (scaled/extended) index register.  */
3220           if (aarch64_base_register_rtx_p (op0, strict_p)
3221               && aarch64_classify_index (info, op1, mode, strict_p))
3222             {
3223               info->base = op0;
3224               return true;
3225             }
3226           if (aarch64_base_register_rtx_p (op1, strict_p)
3227               && aarch64_classify_index (info, op0, mode, strict_p))
3228             {
3229               info->base = op1;
3230               return true;
3231             }
3232         }
3233
3234       return false;
3235
3236     case POST_INC:
3237     case POST_DEC:
3238     case PRE_INC:
3239     case PRE_DEC:
3240       info->type = ADDRESS_REG_WB;
3241       info->base = XEXP (x, 0);
3242       info->offset = NULL_RTX;
3243       return aarch64_base_register_rtx_p (info->base, strict_p);
3244
3245     case POST_MODIFY:
3246     case PRE_MODIFY:
3247       info->type = ADDRESS_REG_WB;
3248       info->base = XEXP (x, 0);
3249       if (GET_CODE (XEXP (x, 1)) == PLUS
3250           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3251           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3252           && aarch64_base_register_rtx_p (info->base, strict_p))
3253         {
3254           HOST_WIDE_INT offset;
3255           info->offset = XEXP (XEXP (x, 1), 1);
3256           offset = INTVAL (info->offset);
3257
3258           /* TImode and TFmode values are allowed in both pairs of X
3259              registers and individual Q registers.  The available
3260              address modes are:
3261              X,X: 7-bit signed scaled offset
3262              Q:   9-bit signed offset
3263              We conservatively require an offset representable in either mode.
3264            */
3265           if (mode == TImode || mode == TFmode)
3266             return (offset_7bit_signed_scaled_p (mode, offset)
3267                     && offset_9bit_signed_unscaled_p (mode, offset));
3268
3269           if (outer_code == PARALLEL)
3270             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3271                     && offset_7bit_signed_scaled_p (mode, offset));
3272           else
3273             return offset_9bit_signed_unscaled_p (mode, offset);
3274         }
3275       return false;
3276
3277     case CONST:
3278     case SYMBOL_REF:
3279     case LABEL_REF:
3280       /* load literal: pc-relative constant pool entry.  Only supported
3281          for SI mode or larger.  */
3282       info->type = ADDRESS_SYMBOLIC;
3283       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3284         {
3285           rtx sym, addend;
3286
3287           split_const (x, &sym, &addend);
3288           return (GET_CODE (sym) == LABEL_REF
3289                   || (GET_CODE (sym) == SYMBOL_REF
3290                       && CONSTANT_POOL_ADDRESS_P (sym)));
3291         }
3292       return false;
3293
3294     case LO_SUM:
3295       info->type = ADDRESS_LO_SUM;
3296       info->base = XEXP (x, 0);
3297       info->offset = XEXP (x, 1);
3298       if (allow_reg_index_p
3299           && aarch64_base_register_rtx_p (info->base, strict_p))
3300         {
3301           rtx sym, offs;
3302           split_const (info->offset, &sym, &offs);
3303           if (GET_CODE (sym) == SYMBOL_REF
3304               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3305                   == SYMBOL_SMALL_ABSOLUTE))
3306             {
3307               /* The symbol and offset must be aligned to the access size.  */
3308               unsigned int align;
3309               unsigned int ref_size;
3310
3311               if (CONSTANT_POOL_ADDRESS_P (sym))
3312                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3313               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3314                 {
3315                   tree exp = SYMBOL_REF_DECL (sym);
3316                   align = TYPE_ALIGN (TREE_TYPE (exp));
3317                   align = CONSTANT_ALIGNMENT (exp, align);
3318                 }
3319               else if (SYMBOL_REF_DECL (sym))
3320                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3321               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3322                        && SYMBOL_REF_BLOCK (sym) != NULL)
3323                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3324               else
3325                 align = BITS_PER_UNIT;
3326
3327               ref_size = GET_MODE_SIZE (mode);
3328               if (ref_size == 0)
3329                 ref_size = GET_MODE_SIZE (DImode);
3330
3331               return ((INTVAL (offs) & (ref_size - 1)) == 0
3332                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3333             }
3334         }
3335       return false;
3336
3337     default:
3338       return false;
3339     }
3340 }
3341
3342 bool
3343 aarch64_symbolic_address_p (rtx x)
3344 {
3345   rtx offset;
3346
3347   split_const (x, &x, &offset);
3348   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3349 }
3350
3351 /* Classify the base of symbolic expression X, given that X appears in
3352    context CONTEXT.  */
3353
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x,
3356                                       enum aarch64_symbol_context context)
3357 {
3358   rtx offset;
3359
3360   split_const (x, &x, &offset);
3361   return aarch64_classify_symbol (x, context);
3362 }
3363
3364
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3366    mode MODE.  */
3367 static bool
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3369 {
3370   struct aarch64_address_info addr;
3371
3372   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3373 }
3374
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3377    pair operation.  */
3378 bool
3379 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3380                               RTX_CODE outer_code, bool strict_p)
3381 {
3382   struct aarch64_address_info addr;
3383
3384   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3385 }
3386
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3388 bool
3389 aarch64_float_const_zero_rtx_p (rtx x)
3390 {
3391   REAL_VALUE_TYPE r;
3392
3393   if (GET_MODE (x) == VOIDmode)
3394     return false;
3395
3396   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3397   if (REAL_VALUE_MINUS_ZERO (r))
3398     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3399   return REAL_VALUES_EQUAL (r, dconst0);
3400 }
3401
3402 /* Return the fixed registers used for condition codes.  */
3403
3404 static bool
3405 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3406 {
3407   *p1 = CC_REGNUM;
3408   *p2 = INVALID_REGNUM;
3409   return true;
3410 }
3411
3412 enum machine_mode
3413 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3414 {
3415   /* All floating point compares return CCFP if it is an equality
3416      comparison, and CCFPE otherwise.  */
3417   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3418     {
3419       switch (code)
3420         {
3421         case EQ:
3422         case NE:
3423         case UNORDERED:
3424         case ORDERED:
3425         case UNLT:
3426         case UNLE:
3427         case UNGT:
3428         case UNGE:
3429         case UNEQ:
3430         case LTGT:
3431           return CCFPmode;
3432
3433         case LT:
3434         case LE:
3435         case GT:
3436         case GE:
3437           return CCFPEmode;
3438
3439         default:
3440           gcc_unreachable ();
3441         }
3442     }
3443
3444   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3445       && y == const0_rtx
3446       && (code == EQ || code == NE || code == LT || code == GE)
3447       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3448           || GET_CODE (x) == NEG))
3449     return CC_NZmode;
3450
3451   /* A compare with a shifted operand.  Because of canonicalization,
3452      the comparison will have to be swapped when we emit the assembly
3453      code.  */
3454   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3455       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3456       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3457           || GET_CODE (x) == LSHIFTRT
3458           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3459     return CC_SWPmode;
3460
3461   /* Similarly for a negated operand, but we can only do this for
3462      equalities.  */
3463   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3464       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3465       && (code == EQ || code == NE)
3466       && GET_CODE (x) == NEG)
3467     return CC_Zmode;
3468
3469   /* A compare of a mode narrower than SI mode against zero can be done
3470      by extending the value in the comparison.  */
3471   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3472       && y == const0_rtx)
3473     /* Only use sign-extension if we really need it.  */
3474     return ((code == GT || code == GE || code == LE || code == LT)
3475             ? CC_SESWPmode : CC_ZESWPmode);
3476
3477   /* For everything else, return CCmode.  */
3478   return CCmode;
3479 }
3480
3481 static unsigned
3482 aarch64_get_condition_code (rtx x)
3483 {
3484   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3485   enum rtx_code comp_code = GET_CODE (x);
3486
3487   if (GET_MODE_CLASS (mode) != MODE_CC)
3488     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3489
3490   switch (mode)
3491     {
3492     case CCFPmode:
3493     case CCFPEmode:
3494       switch (comp_code)
3495         {
3496         case GE: return AARCH64_GE;
3497         case GT: return AARCH64_GT;
3498         case LE: return AARCH64_LS;
3499         case LT: return AARCH64_MI;
3500         case NE: return AARCH64_NE;
3501         case EQ: return AARCH64_EQ;
3502         case ORDERED: return AARCH64_VC;
3503         case UNORDERED: return AARCH64_VS;
3504         case UNLT: return AARCH64_LT;
3505         case UNLE: return AARCH64_LE;
3506         case UNGT: return AARCH64_HI;
3507         case UNGE: return AARCH64_PL;
3508         default: gcc_unreachable ();
3509         }
3510       break;
3511
3512     case CCmode:
3513       switch (comp_code)
3514         {
3515         case NE: return AARCH64_NE;
3516         case EQ: return AARCH64_EQ;
3517         case GE: return AARCH64_GE;
3518         case GT: return AARCH64_GT;
3519         case LE: return AARCH64_LE;
3520         case LT: return AARCH64_LT;
3521         case GEU: return AARCH64_CS;
3522         case GTU: return AARCH64_HI;
3523         case LEU: return AARCH64_LS;
3524         case LTU: return AARCH64_CC;
3525         default: gcc_unreachable ();
3526         }
3527       break;
3528
3529     case CC_SWPmode:
3530     case CC_ZESWPmode:
3531     case CC_SESWPmode:
3532       switch (comp_code)
3533         {
3534         case NE: return AARCH64_NE;
3535         case EQ: return AARCH64_EQ;
3536         case GE: return AARCH64_LE;
3537         case GT: return AARCH64_LT;
3538         case LE: return AARCH64_GE;
3539         case LT: return AARCH64_GT;
3540         case GEU: return AARCH64_LS;
3541         case GTU: return AARCH64_CC;
3542         case LEU: return AARCH64_CS;
3543         case LTU: return AARCH64_HI;
3544         default: gcc_unreachable ();
3545         }
3546       break;
3547
3548     case CC_NZmode:
3549       switch (comp_code)
3550         {
3551         case NE: return AARCH64_NE;
3552         case EQ: return AARCH64_EQ;
3553         case GE: return AARCH64_PL;
3554         case LT: return AARCH64_MI;
3555         default: gcc_unreachable ();
3556         }
3557       break;
3558
3559     case CC_Zmode:
3560       switch (comp_code)
3561         {
3562         case NE: return AARCH64_NE;
3563         case EQ: return AARCH64_EQ;
3564         default: gcc_unreachable ();
3565         }
3566       break;
3567
3568     default:
3569       gcc_unreachable ();
3570       break;
3571     }
3572 }
3573
3574 static unsigned
3575 bit_count (unsigned HOST_WIDE_INT value)
3576 {
3577   unsigned count = 0;
3578
3579   while (value)
3580     {
3581       count++;
3582       value &= value - 1;
3583     }
3584
3585   return count;
3586 }
3587
3588 void
3589 aarch64_print_operand (FILE *f, rtx x, char code)
3590 {
3591   switch (code)
3592     {
3593     /* An integer or symbol address without a preceding # sign.  */
3594     case 'c':
3595       switch (GET_CODE (x))
3596         {
3597         case CONST_INT:
3598           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3599           break;
3600
3601         case SYMBOL_REF:
3602           output_addr_const (f, x);
3603           break;
3604
3605         case CONST:
3606           if (GET_CODE (XEXP (x, 0)) == PLUS
3607               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3608             {
3609               output_addr_const (f, x);
3610               break;
3611             }
3612           /* Fall through.  */
3613
3614         default:
3615           output_operand_lossage ("Unsupported operand for code '%c'", code);
3616         }
3617       break;
3618
3619     case 'e':
3620       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3621       {
3622         int n;
3623
3624         if (GET_CODE (x) != CONST_INT
3625             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3626           {
3627             output_operand_lossage ("invalid operand for '%%%c'", code);
3628             return;
3629           }
3630
3631         switch (n)
3632           {
3633           case 3:
3634             fputc ('b', f);
3635             break;
3636           case 4:
3637             fputc ('h', f);
3638             break;
3639           case 5:
3640             fputc ('w', f);
3641             break;
3642           default:
3643             output_operand_lossage ("invalid operand for '%%%c'", code);
3644             return;
3645           }
3646       }
3647       break;
3648
3649     case 'p':
3650       {
3651         int n;
3652
3653         /* Print N such that 2^N == X.  */
3654         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3655           {
3656             output_operand_lossage ("invalid operand for '%%%c'", code);
3657             return;
3658           }
3659
3660         asm_fprintf (f, "%d", n);
3661       }
3662       break;
3663
3664     case 'P':
3665       /* Print the number of non-zero bits in X (a const_int).  */
3666       if (GET_CODE (x) != CONST_INT)
3667         {
3668           output_operand_lossage ("invalid operand for '%%%c'", code);
3669           return;
3670         }
3671
3672       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3673       break;
3674
3675     case 'H':
3676       /* Print the higher numbered register of a pair (TImode) of regs.  */
3677       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3678         {
3679           output_operand_lossage ("invalid operand for '%%%c'", code);
3680           return;
3681         }
3682
3683       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3684       break;
3685
3686     case 'm':
3687       /* Print a condition (eq, ne, etc).  */
3688
3689       /* CONST_TRUE_RTX means always -- that's the default.  */
3690       if (x == const_true_rtx)
3691         return;
3692
3693       if (!COMPARISON_P (x))
3694         {
3695           output_operand_lossage ("invalid operand for '%%%c'", code);
3696           return;
3697         }
3698
3699       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3700       break;
3701
3702     case 'M':
3703       /* Print the inverse of a condition (eq <-> ne, etc).  */
3704
3705       /* CONST_TRUE_RTX means never -- that's the default.  */
3706       if (x == const_true_rtx)
3707         {
3708           fputs ("nv", f);
3709           return;
3710         }
3711
3712       if (!COMPARISON_P (x))
3713         {
3714           output_operand_lossage ("invalid operand for '%%%c'", code);
3715           return;
3716         }
3717
3718       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3719                                   (aarch64_get_condition_code (x))], f);
3720       break;
3721
3722     case 'b':
3723     case 'h':
3724     case 's':
3725     case 'd':
3726     case 'q':
3727       /* Print a scalar FP/SIMD register name.  */
3728       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3729         {
3730           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3731           return;
3732         }
3733       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3734       break;
3735
3736     case 'S':
3737     case 'T':
3738     case 'U':
3739     case 'V':
3740       /* Print the first FP/SIMD register name in a list.  */
3741       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3742         {
3743           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3744           return;
3745         }
3746       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3747       break;
3748
3749     case 'X':
3750       /* Print bottom 16 bits of integer constant in hex.  */
3751       if (GET_CODE (x) != CONST_INT)
3752         {
3753           output_operand_lossage ("invalid operand for '%%%c'", code);
3754           return;
3755         }
3756       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3757       break;
3758
3759     case 'w':
3760     case 'x':
3761       /* Print a general register name or the zero register (32-bit or
3762          64-bit).  */
3763       if (x == const0_rtx
3764           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3765         {
3766           asm_fprintf (f, "%czr", code);
3767           break;
3768         }
3769
3770       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3771         {
3772           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3773           break;
3774         }
3775
3776       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3777         {
3778           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3779           break;
3780         }
3781
3782       /* Fall through */
3783
3784     case 0:
3785       /* Print a normal operand, if it's a general register, then we
3786          assume DImode.  */
3787       if (x == NULL)
3788         {
3789           output_operand_lossage ("missing operand");
3790           return;
3791         }
3792
3793       switch (GET_CODE (x))
3794         {
3795         case REG:
3796           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3797           break;
3798
3799         case MEM:
3800           aarch64_memory_reference_mode = GET_MODE (x);
3801           output_address (XEXP (x, 0));
3802           break;
3803
3804         case LABEL_REF:
3805         case SYMBOL_REF:
3806           output_addr_const (asm_out_file, x);
3807           break;
3808
3809         case CONST_INT:
3810           asm_fprintf (f, "%wd", INTVAL (x));
3811           break;
3812
3813         case CONST_VECTOR:
3814           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3815             {
3816               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3817                                                             HOST_WIDE_INT_MIN,
3818                                                             HOST_WIDE_INT_MAX));
3819               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3820             }
3821           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3822             {
3823               fputc ('0', f);
3824             }
3825           else
3826             gcc_unreachable ();
3827           break;
3828
3829         case CONST_DOUBLE:
3830           /* CONST_DOUBLE can represent a double-width integer.
3831              In this case, the mode of x is VOIDmode.  */
3832           if (GET_MODE (x) == VOIDmode)
3833             ; /* Do Nothing.  */
3834           else if (aarch64_float_const_zero_rtx_p (x))
3835             {
3836               fputc ('0', f);
3837               break;
3838             }
3839           else if (aarch64_float_const_representable_p (x))
3840             {
3841 #define buf_size 20
3842               char float_buf[buf_size] = {'\0'};
3843               REAL_VALUE_TYPE r;
3844               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845               real_to_decimal_for_mode (float_buf, &r,
3846                                         buf_size, buf_size,
3847                                         1, GET_MODE (x));
3848               asm_fprintf (asm_out_file, "%s", float_buf);
3849               break;
3850 #undef buf_size
3851             }
3852           output_operand_lossage ("invalid constant");
3853           return;
3854         default:
3855           output_operand_lossage ("invalid operand");
3856           return;
3857         }
3858       break;
3859
3860     case 'A':
3861       if (GET_CODE (x) == HIGH)
3862         x = XEXP (x, 0);
3863
3864       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3865         {
3866         case SYMBOL_SMALL_GOT:
3867           asm_fprintf (asm_out_file, ":got:");
3868           break;
3869
3870         case SYMBOL_SMALL_TLSGD:
3871           asm_fprintf (asm_out_file, ":tlsgd:");
3872           break;
3873
3874         case SYMBOL_SMALL_TLSDESC:
3875           asm_fprintf (asm_out_file, ":tlsdesc:");
3876           break;
3877
3878         case SYMBOL_SMALL_GOTTPREL:
3879           asm_fprintf (asm_out_file, ":gottprel:");
3880           break;
3881
3882         case SYMBOL_SMALL_TPREL:
3883           asm_fprintf (asm_out_file, ":tprel:");
3884           break;
3885
3886         case SYMBOL_TINY_GOT:
3887           gcc_unreachable ();
3888           break;
3889
3890         default:
3891           break;
3892         }
3893       output_addr_const (asm_out_file, x);
3894       break;
3895
3896     case 'L':
3897       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3898         {
3899         case SYMBOL_SMALL_GOT:
3900           asm_fprintf (asm_out_file, ":lo12:");
3901           break;
3902
3903         case SYMBOL_SMALL_TLSGD:
3904           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3905           break;
3906
3907         case SYMBOL_SMALL_TLSDESC:
3908           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3909           break;
3910
3911         case SYMBOL_SMALL_GOTTPREL:
3912           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3913           break;
3914
3915         case SYMBOL_SMALL_TPREL:
3916           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3917           break;
3918
3919         case SYMBOL_TINY_GOT:
3920           asm_fprintf (asm_out_file, ":got:");
3921           break;
3922
3923         default:
3924           break;
3925         }
3926       output_addr_const (asm_out_file, x);
3927       break;
3928
3929     case 'G':
3930
3931       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3932         {
3933         case SYMBOL_SMALL_TPREL:
3934           asm_fprintf (asm_out_file, ":tprel_hi12:");
3935           break;
3936         default:
3937           break;
3938         }
3939       output_addr_const (asm_out_file, x);
3940       break;
3941
3942     default:
3943       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3944       return;
3945     }
3946 }
3947
3948 void
3949 aarch64_print_operand_address (FILE *f, rtx x)
3950 {
3951   struct aarch64_address_info addr;
3952
3953   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3954                              MEM, true))
3955     switch (addr.type)
3956       {
3957       case ADDRESS_REG_IMM:
3958         if (addr.offset == const0_rtx)
3959           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3960         else
3961           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3962                        INTVAL (addr.offset));
3963         return;
3964
3965       case ADDRESS_REG_REG:
3966         if (addr.shift == 0)
3967           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3968                        reg_names [REGNO (addr.offset)]);
3969         else
3970           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3971                        reg_names [REGNO (addr.offset)], addr.shift);
3972         return;
3973
3974       case ADDRESS_REG_UXTW:
3975         if (addr.shift == 0)
3976           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3977                        REGNO (addr.offset) - R0_REGNUM);
3978         else
3979           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3980                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3981         return;
3982
3983       case ADDRESS_REG_SXTW:
3984         if (addr.shift == 0)
3985           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3986                        REGNO (addr.offset) - R0_REGNUM);
3987         else
3988           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3989                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3990         return;
3991
3992       case ADDRESS_REG_WB:
3993         switch (GET_CODE (x))
3994           {
3995           case PRE_INC:
3996             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3997                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3998             return;
3999           case POST_INC:
4000             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4001                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4002             return;
4003           case PRE_DEC:
4004             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4005                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4006             return;
4007           case POST_DEC:
4008             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4009                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4010             return;
4011           case PRE_MODIFY:
4012             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4013                          INTVAL (addr.offset));
4014             return;
4015           case POST_MODIFY:
4016             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4017                          INTVAL (addr.offset));
4018             return;
4019           default:
4020             break;
4021           }
4022         break;
4023
4024       case ADDRESS_LO_SUM:
4025         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4026         output_addr_const (f, addr.offset);
4027         asm_fprintf (f, "]");
4028         return;
4029
4030       case ADDRESS_SYMBOLIC:
4031         break;
4032       }
4033
4034   output_addr_const (f, x);
4035 }
4036
4037 bool
4038 aarch64_label_mentioned_p (rtx x)
4039 {
4040   const char *fmt;
4041   int i;
4042
4043   if (GET_CODE (x) == LABEL_REF)
4044     return true;
4045
4046   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047      referencing instruction, but they are constant offsets, not
4048      symbols.  */
4049   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4050     return false;
4051
4052   fmt = GET_RTX_FORMAT (GET_CODE (x));
4053   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4054     {
4055       if (fmt[i] == 'E')
4056         {
4057           int j;
4058
4059           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4060             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4061               return 1;
4062         }
4063       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4064         return 1;
4065     }
4066
4067   return 0;
4068 }
4069
4070 /* Implement REGNO_REG_CLASS.  */
4071
4072 enum reg_class
4073 aarch64_regno_regclass (unsigned regno)
4074 {
4075   if (GP_REGNUM_P (regno))
4076     return CORE_REGS;
4077
4078   if (regno == SP_REGNUM)
4079     return STACK_REG;
4080
4081   if (regno == FRAME_POINTER_REGNUM
4082       || regno == ARG_POINTER_REGNUM)
4083     return POINTER_REGS;
4084
4085   if (FP_REGNUM_P (regno))
4086     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4087
4088   return NO_REGS;
4089 }
4090
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092    operand.  If we find one, push the reload and return the new rtx.  */
4093
4094 rtx
4095 aarch64_legitimize_reload_address (rtx *x_p,
4096                                    enum machine_mode mode,
4097                                    int opnum, int type,
4098                                    int ind_levels ATTRIBUTE_UNUSED)
4099 {
4100   rtx x = *x_p;
4101
4102   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4103   if (aarch64_vector_mode_p (mode)
4104       && GET_CODE (x) == PLUS
4105       && REG_P (XEXP (x, 0))
4106       && CONST_INT_P (XEXP (x, 1)))
4107     {
4108       rtx orig_rtx = x;
4109       x = copy_rtx (x);
4110       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4111                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4112                    opnum, (enum reload_type) type);
4113       return x;
4114     }
4115
4116   /* We must recognize output that we have already generated ourselves.  */
4117   if (GET_CODE (x) == PLUS
4118       && GET_CODE (XEXP (x, 0)) == PLUS
4119       && REG_P (XEXP (XEXP (x, 0), 0))
4120       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4121       && CONST_INT_P (XEXP (x, 1)))
4122     {
4123       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4124                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4125                    opnum, (enum reload_type) type);
4126       return x;
4127     }
4128
4129   /* We wish to handle large displacements off a base register by splitting
4130      the addend across an add and the mem insn.  This can cut the number of
4131      extra insns needed from 3 to 1.  It is only useful for load/store of a
4132      single register with 12 bit offset field.  */
4133   if (GET_CODE (x) == PLUS
4134       && REG_P (XEXP (x, 0))
4135       && CONST_INT_P (XEXP (x, 1))
4136       && HARD_REGISTER_P (XEXP (x, 0))
4137       && mode != TImode
4138       && mode != TFmode
4139       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4140     {
4141       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4142       HOST_WIDE_INT low = val & 0xfff;
4143       HOST_WIDE_INT high = val - low;
4144       HOST_WIDE_INT offs;
4145       rtx cst;
4146       enum machine_mode xmode = GET_MODE (x);
4147
4148       /* In ILP32, xmode can be either DImode or SImode.  */
4149       gcc_assert (xmode == DImode || xmode == SImode);
4150
4151       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4152          BLKmode alignment.  */
4153       if (GET_MODE_SIZE (mode) == 0)
4154         return NULL_RTX;
4155
4156       offs = low % GET_MODE_SIZE (mode);
4157
4158       /* Align misaligned offset by adjusting high part to compensate.  */
4159       if (offs != 0)
4160         {
4161           if (aarch64_uimm12_shift (high + offs))
4162             {
4163               /* Align down.  */
4164               low = low - offs;
4165               high = high + offs;
4166             }
4167           else
4168             {
4169               /* Align up.  */
4170               offs = GET_MODE_SIZE (mode) - offs;
4171               low = low + offs;
4172               high = high + (low & 0x1000) - offs;
4173               low &= 0xfff;
4174             }
4175         }
4176
4177       /* Check for overflow.  */
4178       if (high + low != val)
4179         return NULL_RTX;
4180
4181       cst = GEN_INT (high);
4182       if (!aarch64_uimm12_shift (high))
4183         cst = force_const_mem (xmode, cst);
4184
4185       /* Reload high part into base reg, leaving the low part
4186          in the mem instruction.
4187          Note that replacing this gen_rtx_PLUS with plus_constant is
4188          wrong in this case because we rely on the
4189          (plus (plus reg c1) c2) structure being preserved so that
4190          XEXP (*p, 0) in push_reload below uses the correct term.  */
4191       x = gen_rtx_PLUS (xmode,
4192                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4193                         GEN_INT (low));
4194
4195       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4196                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4197                    opnum, (enum reload_type) type);
4198       return x;
4199     }
4200
4201   return NULL_RTX;
4202 }
4203
4204
4205 static reg_class_t
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4207                           reg_class_t rclass,
4208                           enum machine_mode mode,
4209                           secondary_reload_info *sri)
4210 {
4211   /* Without the TARGET_SIMD instructions we cannot move a Q register
4212      to a Q register directly.  We need a scratch.  */
4213   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4214       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4215       && reg_class_subset_p (rclass, FP_REGS))
4216     {
4217       if (mode == TFmode)
4218         sri->icode = CODE_FOR_aarch64_reload_movtf;
4219       else if (mode == TImode)
4220         sri->icode = CODE_FOR_aarch64_reload_movti;
4221       return NO_REGS;
4222     }
4223
4224   /* A TFmode or TImode memory access should be handled via an FP_REGS
4225      because AArch64 has richer addressing modes for LDR/STR instructions
4226      than LDP/STP instructions.  */
4227   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4228       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4229     return FP_REGS;
4230
4231   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4232       return CORE_REGS;
4233
4234   return NO_REGS;
4235 }
4236
4237 static bool
4238 aarch64_can_eliminate (const int from, const int to)
4239 {
4240   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4242
4243   if (frame_pointer_needed)
4244     {
4245       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246         return true;
4247       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4248         return false;
4249       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4250           && !cfun->calls_alloca)
4251         return true;
4252       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4253         return true;
4254
4255       return false;
4256     }
4257
4258   return true;
4259 }
4260
4261 HOST_WIDE_INT
4262 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4263 {
4264   HOST_WIDE_INT frame_size;
4265   HOST_WIDE_INT offset;
4266
4267   aarch64_layout_frame ();
4268   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4269                 + crtl->outgoing_args_size
4270                 + cfun->machine->saved_varargs_size);
4271
4272   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4273   offset = frame_size;
4274
4275   if (to == HARD_FRAME_POINTER_REGNUM)
4276     {
4277       if (from == ARG_POINTER_REGNUM)
4278         return offset - crtl->outgoing_args_size;
4279
4280       if (from == FRAME_POINTER_REGNUM)
4281         return cfun->machine->frame.saved_regs_size + get_frame_size ();
4282     }
4283
4284   if (to == STACK_POINTER_REGNUM)
4285     {
4286       if (from == FRAME_POINTER_REGNUM)
4287         {
4288           HOST_WIDE_INT elim = crtl->outgoing_args_size
4289             + cfun->machine->frame.saved_regs_size
4290             + get_frame_size ();
4291           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4292           return elim;
4293         }
4294     }
4295
4296   return offset;
4297 }
4298
4299
4300 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4301    previous frame.  */
4302
4303 rtx
4304 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4305 {
4306   if (count != 0)
4307     return const0_rtx;
4308   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4309 }
4310
4311
4312 static void
4313 aarch64_asm_trampoline_template (FILE *f)
4314 {
4315   if (TARGET_ILP32)
4316     {
4317       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4318       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4319     }
4320   else
4321     {
4322       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4323       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4324     }
4325   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4326   assemble_aligned_integer (4, const0_rtx);
4327   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4329 }
4330
4331 static void
4332 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4333 {
4334   rtx fnaddr, mem, a_tramp;
4335   const int tramp_code_sz = 16;
4336
4337   /* Don't need to copy the trailing D-words, we fill those in below.  */
4338   emit_block_move (m_tramp, assemble_trampoline_template (),
4339                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4340   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4341   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4342   if (GET_MODE (fnaddr) != ptr_mode)
4343     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4344   emit_move_insn (mem, fnaddr);
4345
4346   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4347   emit_move_insn (mem, chain_value);
4348
4349   /* XXX We should really define a "clear_cache" pattern and use
4350      gen_clear_cache().  */
4351   a_tramp = XEXP (m_tramp, 0);
4352   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4353                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4354                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4355                      ptr_mode);
4356 }
4357
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4360 {
4361   switch (regclass)
4362     {
4363     case CORE_REGS:
4364     case POINTER_REGS:
4365     case GENERAL_REGS:
4366     case ALL_REGS:
4367     case FP_REGS:
4368     case FP_LO_REGS:
4369       return
4370         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4371                                        (GET_MODE_SIZE (mode) + 7) / 8;
4372     case STACK_REG:
4373       return 1;
4374
4375     case NO_REGS:
4376       return 0;
4377
4378     default:
4379       break;
4380     }
4381   gcc_unreachable ();
4382 }
4383
4384 static reg_class_t
4385 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4386 {
4387   if (regclass == POINTER_REGS)
4388     return GENERAL_REGS;
4389
4390   if (regclass == STACK_REG)
4391     {
4392       if (REG_P(x)
4393           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4394           return regclass;
4395
4396       return NO_REGS;
4397     }
4398
4399   /* If it's an integer immediate that MOVI can't handle, then
4400      FP_REGS is not an option, so we return NO_REGS instead.  */
4401   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4402       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4403     return NO_REGS;
4404
4405   /* Register eliminiation can result in a request for
4406      SP+constant->FP_REGS.  We cannot support such operations which
4407      use SP as source and an FP_REG as destination, so reject out
4408      right now.  */
4409   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4410     {
4411       rtx lhs = XEXP (x, 0);
4412
4413       /* Look through a possible SUBREG introduced by ILP32.  */
4414       if (GET_CODE (lhs) == SUBREG)
4415         lhs = SUBREG_REG (lhs);
4416
4417       gcc_assert (REG_P (lhs));
4418       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4419                                       POINTER_REGS));
4420       return NO_REGS;
4421     }
4422
4423   return regclass;
4424 }
4425
4426 void
4427 aarch64_asm_output_labelref (FILE* f, const char *name)
4428 {
4429   asm_fprintf (f, "%U%s", name);
4430 }
4431
4432 static void
4433 aarch64_elf_asm_constructor (rtx symbol, int priority)
4434 {
4435   if (priority == DEFAULT_INIT_PRIORITY)
4436     default_ctor_section_asm_out_constructor (symbol, priority);
4437   else
4438     {
4439       section *s;
4440       char buf[18];
4441       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4442       s = get_section (buf, SECTION_WRITE, NULL);
4443       switch_to_section (s);
4444       assemble_align (POINTER_SIZE);
4445       assemble_aligned_integer (POINTER_BYTES, symbol);
4446     }
4447 }
4448
4449 static void
4450 aarch64_elf_asm_destructor (rtx symbol, int priority)
4451 {
4452   if (priority == DEFAULT_INIT_PRIORITY)
4453     default_dtor_section_asm_out_destructor (symbol, priority);
4454   else
4455     {
4456       section *s;
4457       char buf[18];
4458       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4459       s = get_section (buf, SECTION_WRITE, NULL);
4460       switch_to_section (s);
4461       assemble_align (POINTER_SIZE);
4462       assemble_aligned_integer (POINTER_BYTES, symbol);
4463     }
4464 }
4465
4466 const char*
4467 aarch64_output_casesi (rtx *operands)
4468 {
4469   char buf[100];
4470   char label[100];
4471   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4472   int index;
4473   static const char *const patterns[4][2] =
4474   {
4475     {
4476       "ldrb\t%w3, [%0,%w1,uxtw]",
4477       "add\t%3, %4, %w3, sxtb #2"
4478     },
4479     {
4480       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481       "add\t%3, %4, %w3, sxth #2"
4482     },
4483     {
4484       "ldr\t%w3, [%0,%w1,uxtw #2]",
4485       "add\t%3, %4, %w3, sxtw #2"
4486     },
4487     /* We assume that DImode is only generated when not optimizing and
4488        that we don't really need 64-bit address offsets.  That would
4489        imply an object file with 8GB of code in a single function!  */
4490     {
4491       "ldr\t%w3, [%0,%w1,uxtw #2]",
4492       "add\t%3, %4, %w3, sxtw #2"
4493     }
4494   };
4495
4496   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4497
4498   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4499
4500   gcc_assert (index >= 0 && index <= 3);
4501
4502   /* Need to implement table size reduction, by chaning the code below.  */
4503   output_asm_insn (patterns[index][0], operands);
4504   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4505   snprintf (buf, sizeof (buf),
4506             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4507   output_asm_insn (buf, operands);
4508   output_asm_insn (patterns[index][1], operands);
4509   output_asm_insn ("br\t%3", operands);
4510   assemble_label (asm_out_file, label);
4511   return "";
4512 }
4513
4514
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4517    operator.  */
4518
4519 int
4520 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4521 {
4522   if (shift >= 0 && shift <= 3)
4523     {
4524       int size;
4525       for (size = 8; size <= 32; size *= 2)
4526         {
4527           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4528           if (mask == bits << shift)
4529             return size;
4530         }
4531     }
4532   return 0;
4533 }
4534
4535 static bool
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4537                                    const_rtx x ATTRIBUTE_UNUSED)
4538 {
4539   /* We can't use blocks for constants when we're using a per-function
4540      constant pool.  */
4541   return false;
4542 }
4543
4544 static section *
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4546                             rtx x ATTRIBUTE_UNUSED,
4547                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4548 {
4549   /* Force all constant pool entries into the current function section.  */
4550   return function_section (current_function_decl);
4551 }
4552
4553
4554 /* Costs.  */
4555
4556 /* Helper function for rtx cost calculation.  Strip a shift expression
4557    from X.  Returns the inner operand if successful, or the original
4558    expression on failure.  */
4559 static rtx
4560 aarch64_strip_shift (rtx x)
4561 {
4562   rtx op = x;
4563
4564   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565      we can convert both to ROR during final output.  */
4566   if ((GET_CODE (op) == ASHIFT
4567        || GET_CODE (op) == ASHIFTRT
4568        || GET_CODE (op) == LSHIFTRT
4569        || GET_CODE (op) == ROTATERT
4570        || GET_CODE (op) == ROTATE)
4571       && CONST_INT_P (XEXP (op, 1)))
4572     return XEXP (op, 0);
4573
4574   if (GET_CODE (op) == MULT
4575       && CONST_INT_P (XEXP (op, 1))
4576       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4577     return XEXP (op, 0);
4578
4579   return x;
4580 }
4581
4582 /* Helper function for rtx cost calculation.  Strip an extend
4583    expression from X.  Returns the inner operand if successful, or the
4584    original expression on failure.  We deal with a number of possible
4585    canonicalization variations here.  */
4586 static rtx
4587 aarch64_strip_extend (rtx x)
4588 {
4589   rtx op = x;
4590
4591   /* Zero and sign extraction of a widened value.  */
4592   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4593       && XEXP (op, 2) == const0_rtx
4594       && GET_CODE (XEXP (op, 0)) == MULT
4595       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4596                                          XEXP (op, 1)))
4597     return XEXP (XEXP (op, 0), 0);
4598
4599   /* It can also be represented (for zero-extend) as an AND with an
4600      immediate.  */
4601   if (GET_CODE (op) == AND
4602       && GET_CODE (XEXP (op, 0)) == MULT
4603       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4604       && CONST_INT_P (XEXP (op, 1))
4605       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4606                            INTVAL (XEXP (op, 1))) != 0)
4607     return XEXP (XEXP (op, 0), 0);
4608
4609   /* Now handle extended register, as this may also have an optional
4610      left shift by 1..4.  */
4611   if (GET_CODE (op) == ASHIFT
4612       && CONST_INT_P (XEXP (op, 1))
4613       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4614     op = XEXP (op, 0);
4615
4616   if (GET_CODE (op) == ZERO_EXTEND
4617       || GET_CODE (op) == SIGN_EXTEND)
4618     op = XEXP (op, 0);
4619
4620   if (op != x)
4621     return op;
4622
4623   return x;
4624 }
4625
4626 /* Helper function for rtx cost calculation.  Calculate the cost of
4627    a MULT, which may be part of a multiply-accumulate rtx.  Return
4628    the calculated cost of the expression, recursing manually in to
4629    operands where needed.  */
4630
4631 static int
4632 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4633 {
4634   rtx op0, op1;
4635   const struct cpu_cost_table *extra_cost
4636     = aarch64_tune_params->insn_extra_cost;
4637   int cost = 0;
4638   bool maybe_fma = (outer == PLUS || outer == MINUS);
4639   enum machine_mode mode = GET_MODE (x);
4640
4641   gcc_checking_assert (code == MULT);
4642
4643   op0 = XEXP (x, 0);
4644   op1 = XEXP (x, 1);
4645
4646   if (VECTOR_MODE_P (mode))
4647     mode = GET_MODE_INNER (mode);
4648
4649   /* Integer multiply/fma.  */
4650   if (GET_MODE_CLASS (mode) == MODE_INT)
4651     {
4652       /* The multiply will be canonicalized as a shift, cost it as such.  */
4653       if (CONST_INT_P (op1)
4654           && exact_log2 (INTVAL (op1)) > 0)
4655         {
4656           if (speed)
4657             {
4658               if (maybe_fma)
4659                 /* ADD (shifted register).  */
4660                 cost += extra_cost->alu.arith_shift;
4661               else
4662                 /* LSL (immediate).  */
4663                 cost += extra_cost->alu.shift;
4664             }
4665
4666           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4667
4668           return cost;
4669         }
4670
4671       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4672       if ((GET_CODE (op0) == ZERO_EXTEND
4673            && GET_CODE (op1) == ZERO_EXTEND)
4674           || (GET_CODE (op0) == SIGN_EXTEND
4675               && GET_CODE (op1) == SIGN_EXTEND))
4676         {
4677           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4678                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4679
4680           if (speed)
4681             {
4682               if (maybe_fma)
4683                 /* MADD/SMADDL/UMADDL.  */
4684                 cost += extra_cost->mult[0].extend_add;
4685               else
4686                 /* MUL/SMULL/UMULL.  */
4687                 cost += extra_cost->mult[0].extend;
4688             }
4689
4690           return cost;
4691         }
4692
4693       /* This is either an integer multiply or an FMA.  In both cases
4694          we want to recurse and cost the operands.  */
4695       cost += rtx_cost (op0, MULT, 0, speed)
4696               + rtx_cost (op1, MULT, 1, speed);
4697
4698       if (speed)
4699         {
4700           if (maybe_fma)
4701             /* MADD.  */
4702             cost += extra_cost->mult[mode == DImode].add;
4703           else
4704             /* MUL.  */
4705             cost += extra_cost->mult[mode == DImode].simple;
4706         }
4707
4708       return cost;
4709     }
4710   else
4711     {
4712       if (speed)
4713         {
4714           /* Floating-point FMA can also support negations of the
4715              operands.  */
4716           if (GET_CODE (op0) == NEG)
4717             {
4718               maybe_fma = true;
4719               op0 = XEXP (op0, 0);
4720             }
4721           if (GET_CODE (op1) == NEG)
4722             {
4723               maybe_fma = true;
4724               op1 = XEXP (op1, 0);
4725             }
4726
4727           if (maybe_fma)
4728             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4729             cost += extra_cost->fp[mode == DFmode].fma;
4730           else
4731             /* FMUL.  */
4732             cost += extra_cost->fp[mode == DFmode].mult;
4733         }
4734
4735       cost += rtx_cost (op0, MULT, 0, speed)
4736               + rtx_cost (op1, MULT, 1, speed);
4737       return cost;
4738     }
4739 }
4740
4741 static int
4742 aarch64_address_cost (rtx x,
4743                       enum machine_mode mode,
4744                       addr_space_t as ATTRIBUTE_UNUSED,
4745                       bool speed)
4746 {
4747   enum rtx_code c = GET_CODE (x);
4748   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4749   struct aarch64_address_info info;
4750   int cost = 0;
4751   info.shift = 0;
4752
4753   if (!aarch64_classify_address (&info, x, mode, c, false))
4754     {
4755       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4756         {
4757           /* This is a CONST or SYMBOL ref which will be split
4758              in a different way depending on the code model in use.
4759              Cost it through the generic infrastructure.  */
4760           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4761           /* Divide through by the cost of one instruction to
4762              bring it to the same units as the address costs.  */
4763           cost_symbol_ref /= COSTS_N_INSNS (1);
4764           /* The cost is then the cost of preparing the address,
4765              followed by an immediate (possibly 0) offset.  */
4766           return cost_symbol_ref + addr_cost->imm_offset;
4767         }
4768       else
4769         {
4770           /* This is most likely a jump table from a case
4771              statement.  */
4772           return addr_cost->register_offset;
4773         }
4774     }
4775
4776   switch (info.type)
4777     {
4778       case ADDRESS_LO_SUM:
4779       case ADDRESS_SYMBOLIC:
4780       case ADDRESS_REG_IMM:
4781         cost += addr_cost->imm_offset;
4782         break;
4783
4784       case ADDRESS_REG_WB:
4785         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4786           cost += addr_cost->pre_modify;
4787         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4788           cost += addr_cost->post_modify;
4789         else
4790           gcc_unreachable ();
4791
4792         break;
4793
4794       case ADDRESS_REG_REG:
4795         cost += addr_cost->register_offset;
4796         break;
4797
4798       case ADDRESS_REG_UXTW:
4799       case ADDRESS_REG_SXTW:
4800         cost += addr_cost->register_extend;
4801         break;
4802
4803       default:
4804         gcc_unreachable ();
4805     }
4806
4807
4808   if (info.shift > 0)
4809     {
4810       /* For the sake of calculating the cost of the shifted register
4811          component, we can treat same sized modes in the same way.  */
4812       switch (GET_MODE_BITSIZE (mode))
4813         {
4814           case 16:
4815             cost += addr_cost->addr_scale_costs.hi;
4816             break;
4817
4818           case 32:
4819             cost += addr_cost->addr_scale_costs.si;
4820             break;
4821
4822           case 64:
4823             cost += addr_cost->addr_scale_costs.di;
4824             break;
4825
4826           /* We can't tell, or this is a 128-bit vector.  */
4827           default:
4828             cost += addr_cost->addr_scale_costs.ti;
4829             break;
4830         }
4831     }
4832
4833   return cost;
4834 }
4835
4836 /* Calculate the cost of calculating X, storing it in *COST.  Result
4837    is true if the total cost of the operation has now been calculated.  */
4838 static bool
4839 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4840                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4841 {
4842   rtx op0, op1;
4843   const struct cpu_cost_table *extra_cost
4844     = aarch64_tune_params->insn_extra_cost;
4845   enum machine_mode mode = GET_MODE (x);
4846
4847   /* By default, assume that everything has equivalent cost to the
4848      cheapest instruction.  Any additional costs are applied as a delta
4849      above this default.  */
4850   *cost = COSTS_N_INSNS (1);
4851
4852   /* TODO: The cost infrastructure currently does not handle
4853      vector operations.  Assume that all vector operations
4854      are equally expensive.  */
4855   if (VECTOR_MODE_P (mode))
4856     {
4857       if (speed)
4858         *cost += extra_cost->vect.alu;
4859       return true;
4860     }
4861
4862   switch (code)
4863     {
4864     case SET:
4865       /* The cost depends entirely on the operands to SET.  */
4866       *cost = 0;
4867       op0 = SET_DEST (x);
4868       op1 = SET_SRC (x);
4869
4870       switch (GET_CODE (op0))
4871         {
4872         case MEM:
4873           if (speed)
4874             {
4875               rtx address = XEXP (op0, 0);
4876               if (GET_MODE_CLASS (mode) == MODE_INT)
4877                 *cost += extra_cost->ldst.store;
4878               else if (mode == SFmode)
4879                 *cost += extra_cost->ldst.storef;
4880               else if (mode == DFmode)
4881                 *cost += extra_cost->ldst.stored;
4882
4883               *cost +=
4884                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4885                                                      0, speed));
4886             }
4887
4888           *cost += rtx_cost (op1, SET, 1, speed);
4889           return true;
4890
4891         case SUBREG:
4892           if (! REG_P (SUBREG_REG (op0)))
4893             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4894
4895           /* Fall through.  */
4896         case REG:
4897           /* const0_rtx is in general free, but we will use an
4898              instruction to set a register to 0.  */
4899           if (REG_P (op1) || op1 == const0_rtx)
4900             {
4901               /* The cost is 1 per register copied.  */
4902               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4903                               / UNITS_PER_WORD;
4904               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4905             }
4906           else
4907             /* Cost is just the cost of the RHS of the set.  */
4908             *cost += rtx_cost (op1, SET, 1, speed);
4909           return true;
4910
4911         case ZERO_EXTRACT:
4912         case SIGN_EXTRACT:
4913           /* Bit-field insertion.  Strip any redundant widening of
4914              the RHS to meet the width of the target.  */
4915           if (GET_CODE (op1) == SUBREG)
4916             op1 = SUBREG_REG (op1);
4917           if ((GET_CODE (op1) == ZERO_EXTEND
4918                || GET_CODE (op1) == SIGN_EXTEND)
4919               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4920               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4921                   >= INTVAL (XEXP (op0, 1))))
4922             op1 = XEXP (op1, 0);
4923
4924           if (CONST_INT_P (op1))
4925             {
4926               /* MOV immediate is assumed to always be cheap.  */
4927               *cost = COSTS_N_INSNS (1);
4928             }
4929           else
4930             {
4931               /* BFM.  */
4932               if (speed)
4933                 *cost += extra_cost->alu.bfi;
4934               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4935             }
4936
4937           return true;
4938
4939         default:
4940           /* We can't make sense of this, assume default cost.  */
4941           *cost = COSTS_N_INSNS (1);
4942           break;
4943         }
4944       return false;
4945
4946     case CONST_INT:
4947       /* If an instruction can incorporate a constant within the
4948          instruction, the instruction's expression avoids calling
4949          rtx_cost() on the constant.  If rtx_cost() is called on a
4950          constant, then it is usually because the constant must be
4951          moved into a register by one or more instructions.
4952
4953          The exception is constant 0, which can be expressed
4954          as XZR/WZR and is therefore free.  The exception to this is
4955          if we have (set (reg) (const0_rtx)) in which case we must cost
4956          the move.  However, we can catch that when we cost the SET, so
4957          we don't need to consider that here.  */
4958       if (x == const0_rtx)
4959         *cost = 0;
4960       else
4961         {
4962           /* To an approximation, building any other constant is
4963              proportionally expensive to the number of instructions
4964              required to build that constant.  This is true whether we
4965              are compiling for SPEED or otherwise.  */
4966           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4967                                                          INTVAL (x),
4968                                                          false));
4969         }
4970       return true;
4971
4972     case CONST_DOUBLE:
4973       if (speed)
4974         {
4975           /* mov[df,sf]_aarch64.  */
4976           if (aarch64_float_const_representable_p (x))
4977             /* FMOV (scalar immediate).  */
4978             *cost += extra_cost->fp[mode == DFmode].fpconst;
4979           else if (!aarch64_float_const_zero_rtx_p (x))
4980             {
4981               /* This will be a load from memory.  */
4982               if (mode == DFmode)
4983                 *cost += extra_cost->ldst.loadd;
4984               else
4985                 *cost += extra_cost->ldst.loadf;
4986             }
4987           else
4988             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
4989                or MOV v0.s[0], wzr - neither of which are modeled by the
4990                cost tables.  Just use the default cost.  */
4991             {
4992             }
4993         }
4994
4995       return true;
4996
4997     case MEM:
4998       if (speed)
4999         {
5000           /* For loads we want the base cost of a load, plus an
5001              approximation for the additional cost of the addressing
5002              mode.  */
5003           rtx address = XEXP (x, 0);
5004           if (GET_MODE_CLASS (mode) == MODE_INT)
5005             *cost += extra_cost->ldst.load;
5006           else if (mode == SFmode)
5007             *cost += extra_cost->ldst.loadf;
5008           else if (mode == DFmode)
5009             *cost += extra_cost->ldst.loadd;
5010
5011           *cost +=
5012                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5013                                                      0, speed));
5014         }
5015
5016       return true;
5017
5018     case NEG:
5019       op0 = XEXP (x, 0);
5020
5021       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5022        {
5023           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5024               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5025             {
5026               /* CSETM.  */
5027               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5028               return true;
5029             }
5030
5031           /* Cost this as SUB wzr, X.  */
5032           op0 = CONST0_RTX (GET_MODE (x));
5033           op1 = XEXP (x, 0);
5034           goto cost_minus;
5035         }
5036
5037       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5038         {
5039           /* Support (neg(fma...)) as a single instruction only if
5040              sign of zeros is unimportant.  This matches the decision
5041              making in aarch64.md.  */
5042           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5043             {
5044               /* FNMADD.  */
5045               *cost = rtx_cost (op0, NEG, 0, speed);
5046               return true;
5047             }
5048           if (speed)
5049             /* FNEG.  */
5050             *cost += extra_cost->fp[mode == DFmode].neg;
5051           return false;
5052         }
5053
5054       return false;
5055
5056     case COMPARE:
5057       op0 = XEXP (x, 0);
5058       op1 = XEXP (x, 1);
5059
5060       if (op1 == const0_rtx
5061           && GET_CODE (op0) == AND)
5062         {
5063           x = op0;
5064           goto cost_logic;
5065         }
5066
5067       /* Comparisons can work if the order is swapped.
5068          Canonicalization puts the more complex operation first, but
5069          we want it in op1.  */
5070       if (! (REG_P (op0)
5071              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5072         {
5073           op0 = XEXP (x, 1);
5074           op1 = XEXP (x, 0);
5075         }
5076       goto cost_minus;
5077
5078     case MINUS:
5079       {
5080         op0 = XEXP (x, 0);
5081         op1 = XEXP (x, 1);
5082
5083 cost_minus:
5084         /* Detect valid immediates.  */
5085         if ((GET_MODE_CLASS (mode) == MODE_INT
5086              || (GET_MODE_CLASS (mode) == MODE_CC
5087                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5088             && CONST_INT_P (op1)
5089             && aarch64_uimm12_shift (INTVAL (op1)))
5090           {
5091             *cost += rtx_cost (op0, MINUS, 0, speed);
5092
5093             if (speed)
5094               /* SUB(S) (immediate).  */
5095               *cost += extra_cost->alu.arith;
5096             return true;
5097
5098           }
5099
5100         rtx new_op1 = aarch64_strip_extend (op1);
5101
5102         /* Cost this as an FMA-alike operation.  */
5103         if ((GET_CODE (new_op1) == MULT
5104              || GET_CODE (new_op1) == ASHIFT)
5105             && code != COMPARE)
5106           {
5107             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5108                                             (enum rtx_code) code,
5109                                             speed);
5110             *cost += rtx_cost (op0, MINUS, 0, speed);
5111             return true;
5112           }
5113
5114         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5115
5116         if (speed)
5117           {
5118             if (GET_MODE_CLASS (mode) == MODE_INT)
5119               /* SUB(S).  */
5120               *cost += extra_cost->alu.arith;
5121             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5122               /* FSUB.  */
5123               *cost += extra_cost->fp[mode == DFmode].addsub;
5124           }
5125         return true;
5126       }
5127
5128     case PLUS:
5129       {
5130         rtx new_op0;
5131
5132         op0 = XEXP (x, 0);
5133         op1 = XEXP (x, 1);
5134
5135         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5136             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5137           {
5138             /* CSINC.  */
5139             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5140             *cost += rtx_cost (op1, PLUS, 1, speed);
5141             return true;
5142           }
5143
5144         if (GET_MODE_CLASS (mode) == MODE_INT
5145             && CONST_INT_P (op1)
5146             && aarch64_uimm12_shift (INTVAL (op1)))
5147           {
5148             *cost += rtx_cost (op0, PLUS, 0, speed);
5149
5150             if (speed)
5151               /* ADD (immediate).  */
5152               *cost += extra_cost->alu.arith;
5153             return true;
5154           }
5155
5156         /* Strip any extend, leave shifts behind as we will
5157            cost them through mult_cost.  */
5158         new_op0 = aarch64_strip_extend (op0);
5159
5160         if (GET_CODE (new_op0) == MULT
5161             || GET_CODE (new_op0) == ASHIFT)
5162           {
5163             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5164                                             speed);
5165             *cost += rtx_cost (op1, PLUS, 1, speed);
5166             return true;
5167           }
5168
5169         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5170                   + rtx_cost (op1, PLUS, 1, speed));
5171
5172         if (speed)
5173           {
5174             if (GET_MODE_CLASS (mode) == MODE_INT)
5175               /* ADD.  */
5176               *cost += extra_cost->alu.arith;
5177             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5178               /* FADD.  */
5179               *cost += extra_cost->fp[mode == DFmode].addsub;
5180           }
5181         return true;
5182       }
5183
5184     case BSWAP:
5185       *cost = COSTS_N_INSNS (1);
5186
5187       if (speed)
5188         *cost += extra_cost->alu.rev;
5189
5190       return false;
5191
5192     case IOR:
5193       if (aarch_rev16_p (x))
5194         {
5195           *cost = COSTS_N_INSNS (1);
5196
5197           if (speed)
5198             *cost += extra_cost->alu.rev;
5199
5200           return true;
5201         }
5202     /* Fall through.  */
5203     case XOR:
5204     case AND:
5205     cost_logic:
5206       op0 = XEXP (x, 0);
5207       op1 = XEXP (x, 1);
5208
5209       if (code == AND
5210           && GET_CODE (op0) == MULT
5211           && CONST_INT_P (XEXP (op0, 1))
5212           && CONST_INT_P (op1)
5213           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5214                                INTVAL (op1)) != 0)
5215         {
5216           /* This is a UBFM/SBFM.  */
5217           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5218           if (speed)
5219             *cost += extra_cost->alu.bfx;
5220           return true;
5221         }
5222
5223       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5224         {
5225           /* We possibly get the immediate for free, this is not
5226              modelled.  */
5227           if (CONST_INT_P (op1)
5228               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5229             {
5230               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5231
5232               if (speed)
5233                 *cost += extra_cost->alu.logical;
5234
5235               return true;
5236             }
5237           else
5238             {
5239               rtx new_op0 = op0;
5240
5241               /* Handle ORN, EON, or BIC.  */
5242               if (GET_CODE (op0) == NOT)
5243                 op0 = XEXP (op0, 0);
5244
5245               new_op0 = aarch64_strip_shift (op0);
5246
5247               /* If we had a shift on op0 then this is a logical-shift-
5248                  by-register/immediate operation.  Otherwise, this is just
5249                  a logical operation.  */
5250               if (speed)
5251                 {
5252                   if (new_op0 != op0)
5253                     {
5254                       /* Shift by immediate.  */
5255                       if (CONST_INT_P (XEXP (op0, 1)))
5256                         *cost += extra_cost->alu.log_shift;
5257                       else
5258                         *cost += extra_cost->alu.log_shift_reg;
5259                     }
5260                   else
5261                     *cost += extra_cost->alu.logical;
5262                 }
5263
5264               /* In both cases we want to cost both operands.  */
5265               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5266                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5267
5268               return true;
5269             }
5270         }
5271       return false;
5272
5273     case NOT:
5274       /* MVN.  */
5275       if (speed)
5276         *cost += extra_cost->alu.logical;
5277
5278       /* The logical instruction could have the shifted register form,
5279          but the cost is the same if the shift is processed as a separate
5280          instruction, so we don't bother with it here.  */
5281       return false;
5282
5283     case ZERO_EXTEND:
5284
5285       op0 = XEXP (x, 0);
5286       /* If a value is written in SI mode, then zero extended to DI
5287          mode, the operation will in general be free as a write to
5288          a 'w' register implicitly zeroes the upper bits of an 'x'
5289          register.  However, if this is
5290
5291            (set (reg) (zero_extend (reg)))
5292
5293          we must cost the explicit register move.  */
5294       if (mode == DImode
5295           && GET_MODE (op0) == SImode
5296           && outer == SET)
5297         {
5298           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5299
5300           if (!op_cost && speed)
5301             /* MOV.  */
5302             *cost += extra_cost->alu.extend;
5303           else
5304             /* Free, the cost is that of the SI mode operation.  */
5305             *cost = op_cost;
5306
5307           return true;
5308         }
5309       else if (MEM_P (XEXP (x, 0)))
5310         {
5311           /* All loads can zero extend to any size for free.  */
5312           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5313           return true;
5314         }
5315
5316       /* UXTB/UXTH.  */
5317       if (speed)
5318         *cost += extra_cost->alu.extend;
5319
5320       return false;
5321
5322     case SIGN_EXTEND:
5323       if (MEM_P (XEXP (x, 0)))
5324         {
5325           /* LDRSH.  */
5326           if (speed)
5327             {
5328               rtx address = XEXP (XEXP (x, 0), 0);
5329               *cost += extra_cost->ldst.load_sign_extend;
5330
5331               *cost +=
5332                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5333                                                      0, speed));
5334             }
5335           return true;
5336         }
5337
5338       if (speed)
5339         *cost += extra_cost->alu.extend;
5340       return false;
5341
5342     case ROTATE:
5343       if (!CONST_INT_P (XEXP (x, 1)))
5344         *cost += COSTS_N_INSNS (2);
5345       /* Fall through.  */
5346     case ROTATERT:
5347     case LSHIFTRT:
5348     case ASHIFT:
5349     case ASHIFTRT:
5350
5351       /* Shifting by a register often takes an extra cycle.  */
5352       if (speed && !CONST_INT_P (XEXP (x, 1)))
5353         *cost += extra_cost->alu.arith_shift_reg;
5354
5355       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5356       return true;
5357
5358     case HIGH:
5359       if (!CONSTANT_P (XEXP (x, 0)))
5360         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5361       return true;
5362
5363     case LO_SUM:
5364       if (!CONSTANT_P (XEXP (x, 1)))
5365         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5366       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5367       return true;
5368
5369     case ZERO_EXTRACT:
5370     case SIGN_EXTRACT:
5371       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5372       return true;
5373
5374     case MULT:
5375       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5376       /* aarch64_rtx_mult_cost always handles recursion to its
5377          operands.  */
5378       return true;
5379
5380     case MOD:
5381     case UMOD:
5382       *cost = COSTS_N_INSNS (2);
5383       if (speed)
5384         {
5385           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5386             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5387                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5388           else if (GET_MODE (x) == DFmode)
5389             *cost += (extra_cost->fp[1].mult
5390                       + extra_cost->fp[1].div);
5391           else if (GET_MODE (x) == SFmode)
5392             *cost += (extra_cost->fp[0].mult
5393                       + extra_cost->fp[0].div);
5394         }
5395       return false;  /* All arguments need to be in registers.  */
5396
5397     case DIV:
5398     case UDIV:
5399       *cost = COSTS_N_INSNS (1);
5400       if (speed)
5401         {
5402           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5403             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5404           else if (GET_MODE (x) == DFmode)
5405             *cost += extra_cost->fp[1].div;
5406           else if (GET_MODE (x) == SFmode)
5407             *cost += extra_cost->fp[0].div;
5408         }
5409       return false;  /* All arguments need to be in registers.  */
5410
5411     default:
5412       break;
5413     }
5414   return false;
5415 }
5416
5417 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5418    calculated for X.  This cost is stored in *COST.  Returns true
5419    if the total cost of X was calculated.  */
5420 static bool
5421 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5422                    int param, int *cost, bool speed)
5423 {
5424   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5425
5426   if (dump_file && (dump_flags & TDF_DETAILS))
5427     {
5428       print_rtl_single (dump_file, x);
5429       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5430                speed ? "Hot" : "Cold",
5431                *cost, result ? "final" : "partial");
5432     }
5433
5434   return result;
5435 }
5436
5437 static int
5438 aarch64_register_move_cost (enum machine_mode mode,
5439                             reg_class_t from_i, reg_class_t to_i)
5440 {
5441   enum reg_class from = (enum reg_class) from_i;
5442   enum reg_class to = (enum reg_class) to_i;
5443   const struct cpu_regmove_cost *regmove_cost
5444     = aarch64_tune_params->regmove_cost;
5445
5446   /* Moving between GPR and stack cost is the same as GP2GP.  */
5447   if ((from == GENERAL_REGS && to == STACK_REG)
5448       || (to == GENERAL_REGS && from == STACK_REG))
5449     return regmove_cost->GP2GP;
5450
5451   /* To/From the stack register, we move via the gprs.  */
5452   if (to == STACK_REG || from == STACK_REG)
5453     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5454             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5455
5456   if (from == GENERAL_REGS && to == GENERAL_REGS)
5457     return regmove_cost->GP2GP;
5458   else if (from == GENERAL_REGS)
5459     return regmove_cost->GP2FP;
5460   else if (to == GENERAL_REGS)
5461     return regmove_cost->FP2GP;
5462
5463   /* When AdvSIMD instructions are disabled it is not possible to move
5464      a 128-bit value directly between Q registers.  This is handled in
5465      secondary reload.  A general register is used as a scratch to move
5466      the upper DI value and the lower DI value is moved directly,
5467      hence the cost is the sum of three moves. */
5468   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5469     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5470
5471   return regmove_cost->FP2FP;
5472 }
5473
5474 static int
5475 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5476                           reg_class_t rclass ATTRIBUTE_UNUSED,
5477                           bool in ATTRIBUTE_UNUSED)
5478 {
5479   return aarch64_tune_params->memmov_cost;
5480 }
5481
5482 /* Return the number of instructions that can be issued per cycle.  */
5483 static int
5484 aarch64_sched_issue_rate (void)
5485 {
5486   return aarch64_tune_params->issue_rate;
5487 }
5488
5489 /* Vectorizer cost model target hooks.  */
5490
5491 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5492 static int
5493 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5494                                     tree vectype,
5495                                     int misalign ATTRIBUTE_UNUSED)
5496 {
5497   unsigned elements;
5498
5499   switch (type_of_cost)
5500     {
5501       case scalar_stmt:
5502         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5503
5504       case scalar_load:
5505         return aarch64_tune_params->vec_costs->scalar_load_cost;
5506
5507       case scalar_store:
5508         return aarch64_tune_params->vec_costs->scalar_store_cost;
5509
5510       case vector_stmt:
5511         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5512
5513       case vector_load:
5514         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5515
5516       case vector_store:
5517         return aarch64_tune_params->vec_costs->vec_store_cost;
5518
5519       case vec_to_scalar:
5520         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5521
5522       case scalar_to_vec:
5523         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5524
5525       case unaligned_load:
5526         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5527
5528       case unaligned_store:
5529         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5530
5531       case cond_branch_taken:
5532         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5533
5534       case cond_branch_not_taken:
5535         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5536
5537       case vec_perm:
5538       case vec_promote_demote:
5539         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5540
5541       case vec_construct:
5542         elements = TYPE_VECTOR_SUBPARTS (vectype);
5543         return elements / 2 + 1;
5544
5545       default:
5546         gcc_unreachable ();
5547     }
5548 }
5549
5550 /* Implement targetm.vectorize.add_stmt_cost.  */
5551 static unsigned
5552 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5553                        struct _stmt_vec_info *stmt_info, int misalign,
5554                        enum vect_cost_model_location where)
5555 {
5556   unsigned *cost = (unsigned *) data;
5557   unsigned retval = 0;
5558
5559   if (flag_vect_cost_model)
5560     {
5561       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5562       int stmt_cost =
5563             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5564
5565       /* Statements in an inner loop relative to the loop being
5566          vectorized are weighted more heavily.  The value here is
5567          a function (linear for now) of the loop nest level.  */
5568       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5569         {
5570           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5571           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5572           unsigned nest_level = loop_depth (loop);
5573
5574           count *= nest_level;
5575         }
5576
5577       retval = (unsigned) (count * stmt_cost);
5578       cost[where] += retval;
5579     }
5580
5581   return retval;
5582 }
5583
5584 static void initialize_aarch64_code_model (void);
5585
5586 /* Parse the architecture extension string.  */
5587
5588 static void
5589 aarch64_parse_extension (char *str)
5590 {
5591   /* The extension string is parsed left to right.  */
5592   const struct aarch64_option_extension *opt = NULL;
5593
5594   /* Flag to say whether we are adding or removing an extension.  */
5595   int adding_ext = -1;
5596
5597   while (str != NULL && *str != 0)
5598     {
5599       char *ext;
5600       size_t len;
5601
5602       str++;
5603       ext = strchr (str, '+');
5604
5605       if (ext != NULL)
5606         len = ext - str;
5607       else
5608         len = strlen (str);
5609
5610       if (len >= 2 && strncmp (str, "no", 2) == 0)
5611         {
5612           adding_ext = 0;
5613           len -= 2;
5614           str += 2;
5615         }
5616       else if (len > 0)
5617         adding_ext = 1;
5618
5619       if (len == 0)
5620         {
5621           error ("missing feature modifier after %qs", "+no");
5622           return;
5623         }
5624
5625       /* Scan over the extensions table trying to find an exact match.  */
5626       for (opt = all_extensions; opt->name != NULL; opt++)
5627         {
5628           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5629             {
5630               /* Add or remove the extension.  */
5631               if (adding_ext)
5632                 aarch64_isa_flags |= opt->flags_on;
5633               else
5634                 aarch64_isa_flags &= ~(opt->flags_off);
5635               break;
5636             }
5637         }
5638
5639       if (opt->name == NULL)
5640         {
5641           /* Extension not found in list.  */
5642           error ("unknown feature modifier %qs", str);
5643           return;
5644         }
5645
5646       str = ext;
5647     };
5648
5649   return;
5650 }
5651
5652 /* Parse the ARCH string.  */
5653
5654 static void
5655 aarch64_parse_arch (void)
5656 {
5657   char *ext;
5658   const struct processor *arch;
5659   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5660   size_t len;
5661
5662   strcpy (str, aarch64_arch_string);
5663
5664   ext = strchr (str, '+');
5665
5666   if (ext != NULL)
5667     len = ext - str;
5668   else
5669     len = strlen (str);
5670
5671   if (len == 0)
5672     {
5673       error ("missing arch name in -march=%qs", str);
5674       return;
5675     }
5676
5677   /* Loop through the list of supported ARCHs to find a match.  */
5678   for (arch = all_architectures; arch->name != NULL; arch++)
5679     {
5680       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5681         {
5682           selected_arch = arch;
5683           aarch64_isa_flags = selected_arch->flags;
5684
5685           if (!selected_cpu)
5686             selected_cpu = &all_cores[selected_arch->core];
5687
5688           if (ext != NULL)
5689             {
5690               /* ARCH string contains at least one extension.  */
5691               aarch64_parse_extension (ext);
5692             }
5693
5694           if (strcmp (selected_arch->arch, selected_cpu->arch))
5695             {
5696               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5697                        selected_cpu->name, selected_arch->name);
5698             }
5699
5700           return;
5701         }
5702     }
5703
5704   /* ARCH name not found in list.  */
5705   error ("unknown value %qs for -march", str);
5706   return;
5707 }
5708
5709 /* Parse the CPU string.  */
5710
5711 static void
5712 aarch64_parse_cpu (void)
5713 {
5714   char *ext;
5715   const struct processor *cpu;
5716   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5717   size_t len;
5718
5719   strcpy (str, aarch64_cpu_string);
5720
5721   ext = strchr (str, '+');
5722
5723   if (ext != NULL)
5724     len = ext - str;
5725   else
5726     len = strlen (str);
5727
5728   if (len == 0)
5729     {
5730       error ("missing cpu name in -mcpu=%qs", str);
5731       return;
5732     }
5733
5734   /* Loop through the list of supported CPUs to find a match.  */
5735   for (cpu = all_cores; cpu->name != NULL; cpu++)
5736     {
5737       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5738         {
5739           selected_cpu = cpu;
5740           selected_tune = cpu;
5741           aarch64_isa_flags = selected_cpu->flags;
5742
5743           if (ext != NULL)
5744             {
5745               /* CPU string contains at least one extension.  */
5746               aarch64_parse_extension (ext);
5747             }
5748
5749           return;
5750         }
5751     }
5752
5753   /* CPU name not found in list.  */
5754   error ("unknown value %qs for -mcpu", str);
5755   return;
5756 }
5757
5758 /* Parse the TUNE string.  */
5759
5760 static void
5761 aarch64_parse_tune (void)
5762 {
5763   const struct processor *cpu;
5764   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5765   strcpy (str, aarch64_tune_string);
5766
5767   /* Loop through the list of supported CPUs to find a match.  */
5768   for (cpu = all_cores; cpu->name != NULL; cpu++)
5769     {
5770       if (strcmp (cpu->name, str) == 0)
5771         {
5772           selected_tune = cpu;
5773           return;
5774         }
5775     }
5776
5777   /* CPU name not found in list.  */
5778   error ("unknown value %qs for -mtune", str);
5779   return;
5780 }
5781
5782
5783 /* Implement TARGET_OPTION_OVERRIDE.  */
5784
5785 static void
5786 aarch64_override_options (void)
5787 {
5788   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5789      If either of -march or -mtune is given, they override their
5790      respective component of -mcpu.
5791
5792      So, first parse AARCH64_CPU_STRING, then the others, be careful
5793      with -march as, if -mcpu is not present on the command line, march
5794      must set a sensible default CPU.  */
5795   if (aarch64_cpu_string)
5796     {
5797       aarch64_parse_cpu ();
5798     }
5799
5800   if (aarch64_arch_string)
5801     {
5802       aarch64_parse_arch ();
5803     }
5804
5805   if (aarch64_tune_string)
5806     {
5807       aarch64_parse_tune ();
5808     }
5809
5810 #ifndef HAVE_AS_MABI_OPTION
5811   /* The compiler may have been configured with 2.23.* binutils, which does
5812      not have support for ILP32.  */
5813   if (TARGET_ILP32)
5814     error ("Assembler does not support -mabi=ilp32");
5815 #endif
5816
5817   initialize_aarch64_code_model ();
5818
5819   aarch64_build_bitmask_table ();
5820
5821   /* This target defaults to strict volatile bitfields.  */
5822   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5823     flag_strict_volatile_bitfields = 1;
5824
5825   /* If the user did not specify a processor, choose the default
5826      one for them.  This will be the CPU set during configuration using
5827      --with-cpu, otherwise it is "generic".  */
5828   if (!selected_cpu)
5829     {
5830       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5831       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5832     }
5833
5834   gcc_assert (selected_cpu);
5835
5836   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5837   if (!selected_tune)
5838     selected_tune = &all_cores[selected_cpu->core];
5839
5840   aarch64_tune_flags = selected_tune->flags;
5841   aarch64_tune = selected_tune->core;
5842   aarch64_tune_params = selected_tune->tune;
5843
5844   aarch64_override_options_after_change ();
5845 }
5846
5847 /* Implement targetm.override_options_after_change.  */
5848
5849 static void
5850 aarch64_override_options_after_change (void)
5851 {
5852   if (flag_omit_frame_pointer)
5853     flag_omit_leaf_frame_pointer = false;
5854   else if (flag_omit_leaf_frame_pointer)
5855     flag_omit_frame_pointer = true;
5856 }
5857
5858 static struct machine_function *
5859 aarch64_init_machine_status (void)
5860 {
5861   struct machine_function *machine;
5862   machine = ggc_alloc_cleared_machine_function ();
5863   return machine;
5864 }
5865
5866 void
5867 aarch64_init_expanders (void)
5868 {
5869   init_machine_status = aarch64_init_machine_status;
5870 }
5871
5872 /* A checking mechanism for the implementation of the various code models.  */
5873 static void
5874 initialize_aarch64_code_model (void)
5875 {
5876    if (flag_pic)
5877      {
5878        switch (aarch64_cmodel_var)
5879          {
5880          case AARCH64_CMODEL_TINY:
5881            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5882            break;
5883          case AARCH64_CMODEL_SMALL:
5884            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5885            break;
5886          case AARCH64_CMODEL_LARGE:
5887            sorry ("code model %qs with -f%s", "large",
5888                   flag_pic > 1 ? "PIC" : "pic");
5889          default:
5890            gcc_unreachable ();
5891          }
5892      }
5893    else
5894      aarch64_cmodel = aarch64_cmodel_var;
5895 }
5896
5897 /* Return true if SYMBOL_REF X binds locally.  */
5898
5899 static bool
5900 aarch64_symbol_binds_local_p (const_rtx x)
5901 {
5902   return (SYMBOL_REF_DECL (x)
5903           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5904           : SYMBOL_REF_LOCAL_P (x));
5905 }
5906
5907 /* Return true if SYMBOL_REF X is thread local */
5908 static bool
5909 aarch64_tls_symbol_p (rtx x)
5910 {
5911   if (! TARGET_HAVE_TLS)
5912     return false;
5913
5914   if (GET_CODE (x) != SYMBOL_REF)
5915     return false;
5916
5917   return SYMBOL_REF_TLS_MODEL (x) != 0;
5918 }
5919
5920 /* Classify a TLS symbol into one of the TLS kinds.  */
5921 enum aarch64_symbol_type
5922 aarch64_classify_tls_symbol (rtx x)
5923 {
5924   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5925
5926   switch (tls_kind)
5927     {
5928     case TLS_MODEL_GLOBAL_DYNAMIC:
5929     case TLS_MODEL_LOCAL_DYNAMIC:
5930       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5931
5932     case TLS_MODEL_INITIAL_EXEC:
5933       return SYMBOL_SMALL_GOTTPREL;
5934
5935     case TLS_MODEL_LOCAL_EXEC:
5936       return SYMBOL_SMALL_TPREL;
5937
5938     case TLS_MODEL_EMULATED:
5939     case TLS_MODEL_NONE:
5940       return SYMBOL_FORCE_TO_MEM;
5941
5942     default:
5943       gcc_unreachable ();
5944     }
5945 }
5946
5947 /* Return the method that should be used to access SYMBOL_REF or
5948    LABEL_REF X in context CONTEXT.  */
5949
5950 enum aarch64_symbol_type
5951 aarch64_classify_symbol (rtx x,
5952                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5953 {
5954   if (GET_CODE (x) == LABEL_REF)
5955     {
5956       switch (aarch64_cmodel)
5957         {
5958         case AARCH64_CMODEL_LARGE:
5959           return SYMBOL_FORCE_TO_MEM;
5960
5961         case AARCH64_CMODEL_TINY_PIC:
5962         case AARCH64_CMODEL_TINY:
5963           return SYMBOL_TINY_ABSOLUTE;
5964
5965         case AARCH64_CMODEL_SMALL_PIC:
5966         case AARCH64_CMODEL_SMALL:
5967           return SYMBOL_SMALL_ABSOLUTE;
5968
5969         default:
5970           gcc_unreachable ();
5971         }
5972     }
5973
5974   if (GET_CODE (x) == SYMBOL_REF)
5975     {
5976       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5977           return SYMBOL_FORCE_TO_MEM;
5978
5979       if (aarch64_tls_symbol_p (x))
5980         return aarch64_classify_tls_symbol (x);
5981
5982       switch (aarch64_cmodel)
5983         {
5984         case AARCH64_CMODEL_TINY:
5985           if (SYMBOL_REF_WEAK (x))
5986             return SYMBOL_FORCE_TO_MEM;
5987           return SYMBOL_TINY_ABSOLUTE;
5988
5989         case AARCH64_CMODEL_SMALL:
5990           if (SYMBOL_REF_WEAK (x))
5991             return SYMBOL_FORCE_TO_MEM;
5992           return SYMBOL_SMALL_ABSOLUTE;
5993
5994         case AARCH64_CMODEL_TINY_PIC:
5995           if (!aarch64_symbol_binds_local_p (x))
5996             return SYMBOL_TINY_GOT;
5997           return SYMBOL_TINY_ABSOLUTE;
5998
5999         case AARCH64_CMODEL_SMALL_PIC:
6000           if (!aarch64_symbol_binds_local_p (x))
6001             return SYMBOL_SMALL_GOT;
6002           return SYMBOL_SMALL_ABSOLUTE;
6003
6004         default:
6005           gcc_unreachable ();
6006         }
6007     }
6008
6009   /* By default push everything into the constant pool.  */
6010   return SYMBOL_FORCE_TO_MEM;
6011 }
6012
6013 bool
6014 aarch64_constant_address_p (rtx x)
6015 {
6016   return (CONSTANT_P (x) && memory_address_p (DImode, x));
6017 }
6018
6019 bool
6020 aarch64_legitimate_pic_operand_p (rtx x)
6021 {
6022   if (GET_CODE (x) == SYMBOL_REF
6023       || (GET_CODE (x) == CONST
6024           && GET_CODE (XEXP (x, 0)) == PLUS
6025           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6026      return false;
6027
6028   return true;
6029 }
6030
6031 /* Return true if X holds either a quarter-precision or
6032      floating-point +0.0 constant.  */
6033 static bool
6034 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6035 {
6036   if (!CONST_DOUBLE_P (x))
6037     return false;
6038
6039   /* TODO: We could handle moving 0.0 to a TFmode register,
6040      but first we would like to refactor the movtf_aarch64
6041      to be more amicable to split moves properly and
6042      correctly gate on TARGET_SIMD.  For now - reject all
6043      constants which are not to SFmode or DFmode registers.  */
6044   if (!(mode == SFmode || mode == DFmode))
6045     return false;
6046
6047   if (aarch64_float_const_zero_rtx_p (x))
6048     return true;
6049   return aarch64_float_const_representable_p (x);
6050 }
6051
6052 static bool
6053 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6054 {
6055   /* Do not allow vector struct mode constants.  We could support
6056      0 and -1 easily, but they need support in aarch64-simd.md.  */
6057   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6058     return false;
6059
6060   /* This could probably go away because
6061      we now decompose CONST_INTs according to expand_mov_immediate.  */
6062   if ((GET_CODE (x) == CONST_VECTOR
6063        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6064       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6065         return !targetm.cannot_force_const_mem (mode, x);
6066
6067   if (GET_CODE (x) == HIGH
6068       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6069     return true;
6070
6071   return aarch64_constant_address_p (x);
6072 }
6073
6074 rtx
6075 aarch64_load_tp (rtx target)
6076 {
6077   if (!target
6078       || GET_MODE (target) != Pmode
6079       || !register_operand (target, Pmode))
6080     target = gen_reg_rtx (Pmode);
6081
6082   /* Can return in any reg.  */
6083   emit_insn (gen_aarch64_load_tp_hard (target));
6084   return target;
6085 }
6086
6087 /* On AAPCS systems, this is the "struct __va_list".  */
6088 static GTY(()) tree va_list_type;
6089
6090 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6091    Return the type to use as __builtin_va_list.
6092
6093    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6094
6095    struct __va_list
6096    {
6097      void *__stack;
6098      void *__gr_top;
6099      void *__vr_top;
6100      int   __gr_offs;
6101      int   __vr_offs;
6102    };  */
6103
6104 static tree
6105 aarch64_build_builtin_va_list (void)
6106 {
6107   tree va_list_name;
6108   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6109
6110   /* Create the type.  */
6111   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6112   /* Give it the required name.  */
6113   va_list_name = build_decl (BUILTINS_LOCATION,
6114                              TYPE_DECL,
6115                              get_identifier ("__va_list"),
6116                              va_list_type);
6117   DECL_ARTIFICIAL (va_list_name) = 1;
6118   TYPE_NAME (va_list_type) = va_list_name;
6119   TYPE_STUB_DECL (va_list_type) = va_list_name;
6120
6121   /* Create the fields.  */
6122   f_stack = build_decl (BUILTINS_LOCATION,
6123                         FIELD_DECL, get_identifier ("__stack"),
6124                         ptr_type_node);
6125   f_grtop = build_decl (BUILTINS_LOCATION,
6126                         FIELD_DECL, get_identifier ("__gr_top"),
6127                         ptr_type_node);
6128   f_vrtop = build_decl (BUILTINS_LOCATION,
6129                         FIELD_DECL, get_identifier ("__vr_top"),
6130                         ptr_type_node);
6131   f_groff = build_decl (BUILTINS_LOCATION,
6132                         FIELD_DECL, get_identifier ("__gr_offs"),
6133                         integer_type_node);
6134   f_vroff = build_decl (BUILTINS_LOCATION,
6135                         FIELD_DECL, get_identifier ("__vr_offs"),
6136                         integer_type_node);
6137
6138   DECL_ARTIFICIAL (f_stack) = 1;
6139   DECL_ARTIFICIAL (f_grtop) = 1;
6140   DECL_ARTIFICIAL (f_vrtop) = 1;
6141   DECL_ARTIFICIAL (f_groff) = 1;
6142   DECL_ARTIFICIAL (f_vroff) = 1;
6143
6144   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6145   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6146   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6147   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6148   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6149
6150   TYPE_FIELDS (va_list_type) = f_stack;
6151   DECL_CHAIN (f_stack) = f_grtop;
6152   DECL_CHAIN (f_grtop) = f_vrtop;
6153   DECL_CHAIN (f_vrtop) = f_groff;
6154   DECL_CHAIN (f_groff) = f_vroff;
6155
6156   /* Compute its layout.  */
6157   layout_type (va_list_type);
6158
6159   return va_list_type;
6160 }
6161
6162 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6163 static void
6164 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6165 {
6166   const CUMULATIVE_ARGS *cum;
6167   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6168   tree stack, grtop, vrtop, groff, vroff;
6169   tree t;
6170   int gr_save_area_size;
6171   int vr_save_area_size;
6172   int vr_offset;
6173
6174   cum = &crtl->args.info;
6175   gr_save_area_size
6176     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6177   vr_save_area_size
6178     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6179
6180   if (TARGET_GENERAL_REGS_ONLY)
6181     {
6182       if (cum->aapcs_nvrn > 0)
6183         sorry ("%qs and floating point or vector arguments",
6184                "-mgeneral-regs-only");
6185       vr_save_area_size = 0;
6186     }
6187
6188   f_stack = TYPE_FIELDS (va_list_type_node);
6189   f_grtop = DECL_CHAIN (f_stack);
6190   f_vrtop = DECL_CHAIN (f_grtop);
6191   f_groff = DECL_CHAIN (f_vrtop);
6192   f_vroff = DECL_CHAIN (f_groff);
6193
6194   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6195                   NULL_TREE);
6196   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6197                   NULL_TREE);
6198   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6199                   NULL_TREE);
6200   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6201                   NULL_TREE);
6202   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6203                   NULL_TREE);
6204
6205   /* Emit code to initialize STACK, which points to the next varargs stack
6206      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6207      by named arguments.  STACK is 8-byte aligned.  */
6208   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6209   if (cum->aapcs_stack_size > 0)
6210     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6211   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6212   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6213
6214   /* Emit code to initialize GRTOP, the top of the GR save area.
6215      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6216   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6217   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6218   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6219
6220   /* Emit code to initialize VRTOP, the top of the VR save area.
6221      This address is gr_save_area_bytes below GRTOP, rounded
6222      down to the next 16-byte boundary.  */
6223   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6224   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6225                              STACK_BOUNDARY / BITS_PER_UNIT);
6226
6227   if (vr_offset)
6228     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6229   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6230   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6231
6232   /* Emit code to initialize GROFF, the offset from GRTOP of the
6233      next GPR argument.  */
6234   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6235               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6236   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6237
6238   /* Likewise emit code to initialize VROFF, the offset from FTOP
6239      of the next VR argument.  */
6240   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6241               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6242   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6243 }
6244
6245 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6246
6247 static tree
6248 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6249                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6250 {
6251   tree addr;
6252   bool indirect_p;
6253   bool is_ha;           /* is HFA or HVA.  */
6254   bool dw_align;        /* double-word align.  */
6255   enum machine_mode ag_mode = VOIDmode;
6256   int nregs;
6257   enum machine_mode mode;
6258
6259   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6260   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6261   HOST_WIDE_INT size, rsize, adjust, align;
6262   tree t, u, cond1, cond2;
6263
6264   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6265   if (indirect_p)
6266     type = build_pointer_type (type);
6267
6268   mode = TYPE_MODE (type);
6269
6270   f_stack = TYPE_FIELDS (va_list_type_node);
6271   f_grtop = DECL_CHAIN (f_stack);
6272   f_vrtop = DECL_CHAIN (f_grtop);
6273   f_groff = DECL_CHAIN (f_vrtop);
6274   f_vroff = DECL_CHAIN (f_groff);
6275
6276   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6277                   f_stack, NULL_TREE);
6278   size = int_size_in_bytes (type);
6279   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6280
6281   dw_align = false;
6282   adjust = 0;
6283   if (aarch64_vfp_is_call_or_return_candidate (mode,
6284                                                type,
6285                                                &ag_mode,
6286                                                &nregs,
6287                                                &is_ha))
6288     {
6289       /* TYPE passed in fp/simd registers.  */
6290       if (TARGET_GENERAL_REGS_ONLY)
6291         sorry ("%qs and floating point or vector arguments",
6292                "-mgeneral-regs-only");
6293
6294       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6295                       unshare_expr (valist), f_vrtop, NULL_TREE);
6296       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6297                       unshare_expr (valist), f_vroff, NULL_TREE);
6298
6299       rsize = nregs * UNITS_PER_VREG;
6300
6301       if (is_ha)
6302         {
6303           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6304             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6305         }
6306       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6307                && size < UNITS_PER_VREG)
6308         {
6309           adjust = UNITS_PER_VREG - size;
6310         }
6311     }
6312   else
6313     {
6314       /* TYPE passed in general registers.  */
6315       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6316                       unshare_expr (valist), f_grtop, NULL_TREE);
6317       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6318                       unshare_expr (valist), f_groff, NULL_TREE);
6319       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6320       nregs = rsize / UNITS_PER_WORD;
6321
6322       if (align > 8)
6323         dw_align = true;
6324
6325       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6326           && size < UNITS_PER_WORD)
6327         {
6328           adjust = UNITS_PER_WORD  - size;
6329         }
6330     }
6331
6332   /* Get a local temporary for the field value.  */
6333   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6334
6335   /* Emit code to branch if off >= 0.  */
6336   t = build2 (GE_EXPR, boolean_type_node, off,
6337               build_int_cst (TREE_TYPE (off), 0));
6338   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6339
6340   if (dw_align)
6341     {
6342       /* Emit: offs = (offs + 15) & -16.  */
6343       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6344                   build_int_cst (TREE_TYPE (off), 15));
6345       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6346                   build_int_cst (TREE_TYPE (off), -16));
6347       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6348     }
6349   else
6350     roundup = NULL;
6351
6352   /* Update ap.__[g|v]r_offs  */
6353   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6354               build_int_cst (TREE_TYPE (off), rsize));
6355   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6356
6357   /* String up.  */
6358   if (roundup)
6359     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6360
6361   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6362   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6363               build_int_cst (TREE_TYPE (f_off), 0));
6364   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6365
6366   /* String up: make sure the assignment happens before the use.  */
6367   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6368   COND_EXPR_ELSE (cond1) = t;
6369
6370   /* Prepare the trees handling the argument that is passed on the stack;
6371      the top level node will store in ON_STACK.  */
6372   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6373   if (align > 8)
6374     {
6375       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6376       t = fold_convert (intDI_type_node, arg);
6377       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6378                   build_int_cst (TREE_TYPE (t), 15));
6379       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6380                   build_int_cst (TREE_TYPE (t), -16));
6381       t = fold_convert (TREE_TYPE (arg), t);
6382       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6383     }
6384   else
6385     roundup = NULL;
6386   /* Advance ap.__stack  */
6387   t = fold_convert (intDI_type_node, arg);
6388   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6389               build_int_cst (TREE_TYPE (t), size + 7));
6390   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6391               build_int_cst (TREE_TYPE (t), -8));
6392   t = fold_convert (TREE_TYPE (arg), t);
6393   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6394   /* String up roundup and advance.  */
6395   if (roundup)
6396     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6397   /* String up with arg */
6398   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6399   /* Big-endianness related address adjustment.  */
6400   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6401       && size < UNITS_PER_WORD)
6402   {
6403     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6404                 size_int (UNITS_PER_WORD - size));
6405     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6406   }
6407
6408   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6409   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6410
6411   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6412   t = off;
6413   if (adjust)
6414     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6415                 build_int_cst (TREE_TYPE (off), adjust));
6416
6417   t = fold_convert (sizetype, t);
6418   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6419
6420   if (is_ha)
6421     {
6422       /* type ha; // treat as "struct {ftype field[n];}"
6423          ... [computing offs]
6424          for (i = 0; i <nregs; ++i, offs += 16)
6425            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6426          return ha;  */
6427       int i;
6428       tree tmp_ha, field_t, field_ptr_t;
6429
6430       /* Declare a local variable.  */
6431       tmp_ha = create_tmp_var_raw (type, "ha");
6432       gimple_add_tmp_var (tmp_ha);
6433
6434       /* Establish the base type.  */
6435       switch (ag_mode)
6436         {
6437         case SFmode:
6438           field_t = float_type_node;
6439           field_ptr_t = float_ptr_type_node;
6440           break;
6441         case DFmode:
6442           field_t = double_type_node;
6443           field_ptr_t = double_ptr_type_node;
6444           break;
6445         case TFmode:
6446           field_t = long_double_type_node;
6447           field_ptr_t = long_double_ptr_type_node;
6448           break;
6449 /* The half precision and quad precision are not fully supported yet.  Enable
6450    the following code after the support is complete.  Need to find the correct
6451    type node for __fp16 *.  */
6452 #if 0
6453         case HFmode:
6454           field_t = float_type_node;
6455           field_ptr_t = float_ptr_type_node;
6456           break;
6457 #endif
6458         case V2SImode:
6459         case V4SImode:
6460             {
6461               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6462               field_t = build_vector_type_for_mode (innertype, ag_mode);
6463               field_ptr_t = build_pointer_type (field_t);
6464             }
6465           break;
6466         default:
6467           gcc_assert (0);
6468         }
6469
6470       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6471       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6472       addr = t;
6473       t = fold_convert (field_ptr_t, addr);
6474       t = build2 (MODIFY_EXPR, field_t,
6475                   build1 (INDIRECT_REF, field_t, tmp_ha),
6476                   build1 (INDIRECT_REF, field_t, t));
6477
6478       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6479       for (i = 1; i < nregs; ++i)
6480         {
6481           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6482           u = fold_convert (field_ptr_t, addr);
6483           u = build2 (MODIFY_EXPR, field_t,
6484                       build2 (MEM_REF, field_t, tmp_ha,
6485                               build_int_cst (field_ptr_t,
6486                                              (i *
6487                                               int_size_in_bytes (field_t)))),
6488                       build1 (INDIRECT_REF, field_t, u));
6489           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6490         }
6491
6492       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6493       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6494     }
6495
6496   COND_EXPR_ELSE (cond2) = t;
6497   addr = fold_convert (build_pointer_type (type), cond1);
6498   addr = build_va_arg_indirect_ref (addr);
6499
6500   if (indirect_p)
6501     addr = build_va_arg_indirect_ref (addr);
6502
6503   return addr;
6504 }
6505
6506 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6507
6508 static void
6509 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6510                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6511                                 int no_rtl)
6512 {
6513   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6514   CUMULATIVE_ARGS local_cum;
6515   int gr_saved, vr_saved;
6516
6517   /* The caller has advanced CUM up to, but not beyond, the last named
6518      argument.  Advance a local copy of CUM past the last "real" named
6519      argument, to find out how many registers are left over.  */
6520   local_cum = *cum;
6521   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6522
6523   /* Found out how many registers we need to save.  */
6524   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6525   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6526
6527   if (TARGET_GENERAL_REGS_ONLY)
6528     {
6529       if (local_cum.aapcs_nvrn > 0)
6530         sorry ("%qs and floating point or vector arguments",
6531                "-mgeneral-regs-only");
6532       vr_saved = 0;
6533     }
6534
6535   if (!no_rtl)
6536     {
6537       if (gr_saved > 0)
6538         {
6539           rtx ptr, mem;
6540
6541           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6542           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6543                                - gr_saved * UNITS_PER_WORD);
6544           mem = gen_frame_mem (BLKmode, ptr);
6545           set_mem_alias_set (mem, get_varargs_alias_set ());
6546
6547           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6548                                mem, gr_saved);
6549         }
6550       if (vr_saved > 0)
6551         {
6552           /* We can't use move_block_from_reg, because it will use
6553              the wrong mode, storing D regs only.  */
6554           enum machine_mode mode = TImode;
6555           int off, i;
6556
6557           /* Set OFF to the offset from virtual_incoming_args_rtx of
6558              the first vector register.  The VR save area lies below
6559              the GR one, and is aligned to 16 bytes.  */
6560           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6561                                    STACK_BOUNDARY / BITS_PER_UNIT);
6562           off -= vr_saved * UNITS_PER_VREG;
6563
6564           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6565             {
6566               rtx ptr, mem;
6567
6568               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6569               mem = gen_frame_mem (mode, ptr);
6570               set_mem_alias_set (mem, get_varargs_alias_set ());
6571               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6572               off += UNITS_PER_VREG;
6573             }
6574         }
6575     }
6576
6577   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6578      any complication of having crtl->args.pretend_args_size changed.  */
6579   cfun->machine->saved_varargs_size
6580     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6581                       STACK_BOUNDARY / BITS_PER_UNIT)
6582        + vr_saved * UNITS_PER_VREG);
6583 }
6584
6585 static void
6586 aarch64_conditional_register_usage (void)
6587 {
6588   int i;
6589   if (!TARGET_FLOAT)
6590     {
6591       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6592         {
6593           fixed_regs[i] = 1;
6594           call_used_regs[i] = 1;
6595         }
6596     }
6597 }
6598
6599 /* Walk down the type tree of TYPE counting consecutive base elements.
6600    If *MODEP is VOIDmode, then set it to the first valid floating point
6601    type.  If a non-floating point type is found, or if a floating point
6602    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6603    otherwise return the count in the sub-tree.  */
6604 static int
6605 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6606 {
6607   enum machine_mode mode;
6608   HOST_WIDE_INT size;
6609
6610   switch (TREE_CODE (type))
6611     {
6612     case REAL_TYPE:
6613       mode = TYPE_MODE (type);
6614       if (mode != DFmode && mode != SFmode && mode != TFmode)
6615         return -1;
6616
6617       if (*modep == VOIDmode)
6618         *modep = mode;
6619
6620       if (*modep == mode)
6621         return 1;
6622
6623       break;
6624
6625     case COMPLEX_TYPE:
6626       mode = TYPE_MODE (TREE_TYPE (type));
6627       if (mode != DFmode && mode != SFmode && mode != TFmode)
6628         return -1;
6629
6630       if (*modep == VOIDmode)
6631         *modep = mode;
6632
6633       if (*modep == mode)
6634         return 2;
6635
6636       break;
6637
6638     case VECTOR_TYPE:
6639       /* Use V2SImode and V4SImode as representatives of all 64-bit
6640          and 128-bit vector types.  */
6641       size = int_size_in_bytes (type);
6642       switch (size)
6643         {
6644         case 8:
6645           mode = V2SImode;
6646           break;
6647         case 16:
6648           mode = V4SImode;
6649           break;
6650         default:
6651           return -1;
6652         }
6653
6654       if (*modep == VOIDmode)
6655         *modep = mode;
6656
6657       /* Vector modes are considered to be opaque: two vectors are
6658          equivalent for the purposes of being homogeneous aggregates
6659          if they are the same size.  */
6660       if (*modep == mode)
6661         return 1;
6662
6663       break;
6664
6665     case ARRAY_TYPE:
6666       {
6667         int count;
6668         tree index = TYPE_DOMAIN (type);
6669
6670         /* Can't handle incomplete types nor sizes that are not
6671            fixed.  */
6672         if (!COMPLETE_TYPE_P (type)
6673             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6674           return -1;
6675
6676         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6677         if (count == -1
6678             || !index
6679             || !TYPE_MAX_VALUE (index)
6680             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6681             || !TYPE_MIN_VALUE (index)
6682             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6683             || count < 0)
6684           return -1;
6685
6686         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6687                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6688
6689         /* There must be no padding.  */
6690         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6691           return -1;
6692
6693         return count;
6694       }
6695
6696     case RECORD_TYPE:
6697       {
6698         int count = 0;
6699         int sub_count;
6700         tree field;
6701
6702         /* Can't handle incomplete types nor sizes that are not
6703            fixed.  */
6704         if (!COMPLETE_TYPE_P (type)
6705             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6706           return -1;
6707
6708         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6709           {
6710             if (TREE_CODE (field) != FIELD_DECL)
6711               continue;
6712
6713             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6714             if (sub_count < 0)
6715               return -1;
6716             count += sub_count;
6717           }
6718
6719         /* There must be no padding.  */
6720         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6721           return -1;
6722
6723         return count;
6724       }
6725
6726     case UNION_TYPE:
6727     case QUAL_UNION_TYPE:
6728       {
6729         /* These aren't very interesting except in a degenerate case.  */
6730         int count = 0;
6731         int sub_count;
6732         tree field;
6733
6734         /* Can't handle incomplete types nor sizes that are not
6735            fixed.  */
6736         if (!COMPLETE_TYPE_P (type)
6737             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6738           return -1;
6739
6740         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6741           {
6742             if (TREE_CODE (field) != FIELD_DECL)
6743               continue;
6744
6745             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6746             if (sub_count < 0)
6747               return -1;
6748             count = count > sub_count ? count : sub_count;
6749           }
6750
6751         /* There must be no padding.  */
6752         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6753           return -1;
6754
6755         return count;
6756       }
6757
6758     default:
6759       break;
6760     }
6761
6762   return -1;
6763 }
6764
6765 /* Return true if we use LRA instead of reload pass.  */
6766 static bool
6767 aarch64_lra_p (void)
6768 {
6769   return aarch64_lra_flag;
6770 }
6771
6772 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6773    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6774    array types.  The C99 floating-point complex types are also considered
6775    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6776    types, which are GCC extensions and out of the scope of AAPCS64, are
6777    treated as composite types here as well.
6778
6779    Note that MODE itself is not sufficient in determining whether a type
6780    is such a composite type or not.  This is because
6781    stor-layout.c:compute_record_mode may have already changed the MODE
6782    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6783    structure with only one field may have its MODE set to the mode of the
6784    field.  Also an integer mode whose size matches the size of the
6785    RECORD_TYPE type may be used to substitute the original mode
6786    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6787    solely relied on.  */
6788
6789 static bool
6790 aarch64_composite_type_p (const_tree type,
6791                           enum machine_mode mode)
6792 {
6793   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6794     return true;
6795
6796   if (mode == BLKmode
6797       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6798       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6799     return true;
6800
6801   return false;
6802 }
6803
6804 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6805    type as described in AAPCS64 \S 4.1.2.
6806
6807    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6808
6809 static bool
6810 aarch64_short_vector_p (const_tree type,
6811                         enum machine_mode mode)
6812 {
6813   HOST_WIDE_INT size = -1;
6814
6815   if (type && TREE_CODE (type) == VECTOR_TYPE)
6816     size = int_size_in_bytes (type);
6817   else if (!aarch64_composite_type_p (type, mode)
6818            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6819                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6820     size = GET_MODE_SIZE (mode);
6821
6822   return (size == 8 || size == 16) ? true : false;
6823 }
6824
6825 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6826    shall be passed or returned in simd/fp register(s) (providing these
6827    parameter passing registers are available).
6828
6829    Upon successful return, *COUNT returns the number of needed registers,
6830    *BASE_MODE returns the mode of the individual register and when IS_HAF
6831    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6832    floating-point aggregate or a homogeneous short-vector aggregate.  */
6833
6834 static bool
6835 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6836                                          const_tree type,
6837                                          enum machine_mode *base_mode,
6838                                          int *count,
6839                                          bool *is_ha)
6840 {
6841   enum machine_mode new_mode = VOIDmode;
6842   bool composite_p = aarch64_composite_type_p (type, mode);
6843
6844   if (is_ha != NULL) *is_ha = false;
6845
6846   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6847       || aarch64_short_vector_p (type, mode))
6848     {
6849       *count = 1;
6850       new_mode = mode;
6851     }
6852   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6853     {
6854       if (is_ha != NULL) *is_ha = true;
6855       *count = 2;
6856       new_mode = GET_MODE_INNER (mode);
6857     }
6858   else if (type && composite_p)
6859     {
6860       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6861
6862       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6863         {
6864           if (is_ha != NULL) *is_ha = true;
6865           *count = ag_count;
6866         }
6867       else
6868         return false;
6869     }
6870   else
6871     return false;
6872
6873   *base_mode = new_mode;
6874   return true;
6875 }
6876
6877 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6878
6879 static rtx
6880 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6881                           int incoming ATTRIBUTE_UNUSED)
6882 {
6883   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6884 }
6885
6886 /* Implements target hook vector_mode_supported_p.  */
6887 static bool
6888 aarch64_vector_mode_supported_p (enum machine_mode mode)
6889 {
6890   if (TARGET_SIMD
6891       && (mode == V4SImode  || mode == V8HImode
6892           || mode == V16QImode || mode == V2DImode
6893           || mode == V2SImode  || mode == V4HImode
6894           || mode == V8QImode || mode == V2SFmode
6895           || mode == V4SFmode || mode == V2DFmode))
6896     return true;
6897
6898   return false;
6899 }
6900
6901 /* Return appropriate SIMD container
6902    for MODE within a vector of WIDTH bits.  */
6903 static enum machine_mode
6904 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6905 {
6906   gcc_assert (width == 64 || width == 128);
6907   if (TARGET_SIMD)
6908     {
6909       if (width == 128)
6910         switch (mode)
6911           {
6912           case DFmode:
6913             return V2DFmode;
6914           case SFmode:
6915             return V4SFmode;
6916           case SImode:
6917             return V4SImode;
6918           case HImode:
6919             return V8HImode;
6920           case QImode:
6921             return V16QImode;
6922           case DImode:
6923             return V2DImode;
6924           default:
6925             break;
6926           }
6927       else
6928         switch (mode)
6929           {
6930           case SFmode:
6931             return V2SFmode;
6932           case SImode:
6933             return V2SImode;
6934           case HImode:
6935             return V4HImode;
6936           case QImode:
6937             return V8QImode;
6938           default:
6939             break;
6940           }
6941     }
6942   return word_mode;
6943 }
6944
6945 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6946 static enum machine_mode
6947 aarch64_preferred_simd_mode (enum machine_mode mode)
6948 {
6949   return aarch64_simd_container_mode (mode, 128);
6950 }
6951
6952 /* Return the bitmask of possible vector sizes for the vectorizer
6953    to iterate over.  */
6954 static unsigned int
6955 aarch64_autovectorize_vector_sizes (void)
6956 {
6957   return (16 | 8);
6958 }
6959
6960 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6961    vector types in order to conform to the AAPCS64 (see "Procedure
6962    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6963    qualify for emission with the mangled names defined in that document,
6964    a vector type must not only be of the correct mode but also be
6965    composed of AdvSIMD vector element types (e.g.
6966    _builtin_aarch64_simd_qi); these types are registered by
6967    aarch64_init_simd_builtins ().  In other words, vector types defined
6968    in other ways e.g. via vector_size attribute will get default
6969    mangled names.  */
6970 typedef struct
6971 {
6972   enum machine_mode mode;
6973   const char *element_type_name;
6974   const char *mangled_name;
6975 } aarch64_simd_mangle_map_entry;
6976
6977 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6978   /* 64-bit containerized types.  */
6979   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6980   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6981   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6982   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6983   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6984   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6985   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6986   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6987   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6988   /* 128-bit containerized types.  */
6989   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6990   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6991   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6992   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6993   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6994   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6995   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6996   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6997   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6998   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6999   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
7000   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7001   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7002   { VOIDmode, NULL, NULL }
7003 };
7004
7005 /* Implement TARGET_MANGLE_TYPE.  */
7006
7007 static const char *
7008 aarch64_mangle_type (const_tree type)
7009 {
7010   /* The AArch64 ABI documents say that "__va_list" has to be
7011      managled as if it is in the "std" namespace.  */
7012   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7013     return "St9__va_list";
7014
7015   /* Check the mode of the vector type, and the name of the vector
7016      element type, against the table.  */
7017   if (TREE_CODE (type) == VECTOR_TYPE)
7018     {
7019       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7020
7021       while (pos->mode != VOIDmode)
7022         {
7023           tree elt_type = TREE_TYPE (type);
7024
7025           if (pos->mode == TYPE_MODE (type)
7026               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7027               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7028                           pos->element_type_name))
7029             return pos->mangled_name;
7030
7031           pos++;
7032         }
7033     }
7034
7035   /* Use the default mangling.  */
7036   return NULL;
7037 }
7038
7039 /* Return the equivalent letter for size.  */
7040 static char
7041 sizetochar (int size)
7042 {
7043   switch (size)
7044     {
7045     case 64: return 'd';
7046     case 32: return 's';
7047     case 16: return 'h';
7048     case 8 : return 'b';
7049     default: gcc_unreachable ();
7050     }
7051 }
7052
7053 /* Return true iff x is a uniform vector of floating-point
7054    constants, and the constant can be represented in
7055    quarter-precision form.  Note, as aarch64_float_const_representable
7056    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7057 static bool
7058 aarch64_vect_float_const_representable_p (rtx x)
7059 {
7060   int i = 0;
7061   REAL_VALUE_TYPE r0, ri;
7062   rtx x0, xi;
7063
7064   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7065     return false;
7066
7067   x0 = CONST_VECTOR_ELT (x, 0);
7068   if (!CONST_DOUBLE_P (x0))
7069     return false;
7070
7071   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7072
7073   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7074     {
7075       xi = CONST_VECTOR_ELT (x, i);
7076       if (!CONST_DOUBLE_P (xi))
7077         return false;
7078
7079       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7080       if (!REAL_VALUES_EQUAL (r0, ri))
7081         return false;
7082     }
7083
7084   return aarch64_float_const_representable_p (x0);
7085 }
7086
7087 /* Return true for valid and false for invalid.  */
7088 bool
7089 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7090                               struct simd_immediate_info *info)
7091 {
7092 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7093   matches = 1;                                          \
7094   for (i = 0; i < idx; i += (STRIDE))                   \
7095     if (!(TEST))                                        \
7096       matches = 0;                                      \
7097   if (matches)                                          \
7098     {                                                   \
7099       immtype = (CLASS);                                \
7100       elsize = (ELSIZE);                                \
7101       eshift = (SHIFT);                                 \
7102       emvn = (NEG);                                     \
7103       break;                                            \
7104     }
7105
7106   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7107   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7108   unsigned char bytes[16];
7109   int immtype = -1, matches;
7110   unsigned int invmask = inverse ? 0xff : 0;
7111   int eshift, emvn;
7112
7113   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7114     {
7115       if (! (aarch64_simd_imm_zero_p (op, mode)
7116              || aarch64_vect_float_const_representable_p (op)))
7117         return false;
7118
7119       if (info)
7120         {
7121           info->value = CONST_VECTOR_ELT (op, 0);
7122           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7123           info->mvn = false;
7124           info->shift = 0;
7125         }
7126
7127       return true;
7128     }
7129
7130   /* Splat vector constant out into a byte vector.  */
7131   for (i = 0; i < n_elts; i++)
7132     {
7133       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7134          it must be laid out in the vector register in reverse order.  */
7135       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7136       unsigned HOST_WIDE_INT elpart;
7137       unsigned int part, parts;
7138
7139       if (GET_CODE (el) == CONST_INT)
7140         {
7141           elpart = INTVAL (el);
7142           parts = 1;
7143         }
7144       else if (GET_CODE (el) == CONST_DOUBLE)
7145         {
7146           elpart = CONST_DOUBLE_LOW (el);
7147           parts = 2;
7148         }
7149       else
7150         gcc_unreachable ();
7151
7152       for (part = 0; part < parts; part++)
7153         {
7154           unsigned int byte;
7155           for (byte = 0; byte < innersize; byte++)
7156             {
7157               bytes[idx++] = (elpart & 0xff) ^ invmask;
7158               elpart >>= BITS_PER_UNIT;
7159             }
7160           if (GET_CODE (el) == CONST_DOUBLE)
7161             elpart = CONST_DOUBLE_HIGH (el);
7162         }
7163     }
7164
7165   /* Sanity check.  */
7166   gcc_assert (idx == GET_MODE_SIZE (mode));
7167
7168   do
7169     {
7170       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7171              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7172
7173       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7174              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7175
7176       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7177              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7178
7179       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7180              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7181
7182       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7183
7184       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7185
7186       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7187              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7188
7189       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7190              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7191
7192       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7193              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7194
7195       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7196              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7197
7198       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7199
7200       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7201
7202       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7203              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7204
7205       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7206              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7207
7208       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7209              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7210
7211       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7212              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7213
7214       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7215
7216       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7217              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7218     }
7219   while (0);
7220
7221   if (immtype == -1)
7222     return false;
7223
7224   if (info)
7225     {
7226       info->element_width = elsize;
7227       info->mvn = emvn != 0;
7228       info->shift = eshift;
7229
7230       unsigned HOST_WIDE_INT imm = 0;
7231
7232       if (immtype >= 12 && immtype <= 15)
7233         info->msl = true;
7234
7235       /* Un-invert bytes of recognized vector, if necessary.  */
7236       if (invmask != 0)
7237         for (i = 0; i < idx; i++)
7238           bytes[i] ^= invmask;
7239
7240       if (immtype == 17)
7241         {
7242           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7243           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7244
7245           for (i = 0; i < 8; i++)
7246             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7247               << (i * BITS_PER_UNIT);
7248
7249
7250           info->value = GEN_INT (imm);
7251         }
7252       else
7253         {
7254           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7255             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7256
7257           /* Construct 'abcdefgh' because the assembler cannot handle
7258              generic constants.  */
7259           if (info->mvn)
7260             imm = ~imm;
7261           imm = (imm >> info->shift) & 0xff;
7262           info->value = GEN_INT (imm);
7263         }
7264     }
7265
7266   return true;
7267 #undef CHECK
7268 }
7269
7270 static bool
7271 aarch64_const_vec_all_same_int_p (rtx x,
7272                                   HOST_WIDE_INT minval,
7273                                   HOST_WIDE_INT maxval)
7274 {
7275   HOST_WIDE_INT firstval;
7276   int count, i;
7277
7278   if (GET_CODE (x) != CONST_VECTOR
7279       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7280     return false;
7281
7282   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7283   if (firstval < minval || firstval > maxval)
7284     return false;
7285
7286   count = CONST_VECTOR_NUNITS (x);
7287   for (i = 1; i < count; i++)
7288     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7289       return false;
7290
7291   return true;
7292 }
7293
7294 /* Check of immediate shift constants are within range.  */
7295 bool
7296 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7297 {
7298   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7299   if (left)
7300     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7301   else
7302     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7303 }
7304
7305 /* Return true if X is a uniform vector where all elements
7306    are either the floating-point constant 0.0 or the
7307    integer constant 0.  */
7308 bool
7309 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7310 {
7311   return x == CONST0_RTX (mode);
7312 }
7313
7314 bool
7315 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7316 {
7317   HOST_WIDE_INT imm = INTVAL (x);
7318   int i;
7319
7320   for (i = 0; i < 8; i++)
7321     {
7322       unsigned int byte = imm & 0xff;
7323       if (byte != 0xff && byte != 0)
7324        return false;
7325       imm >>= 8;
7326     }
7327
7328   return true;
7329 }
7330
7331 bool
7332 aarch64_mov_operand_p (rtx x,
7333                        enum aarch64_symbol_context context,
7334                        enum machine_mode mode)
7335 {
7336   if (GET_CODE (x) == HIGH
7337       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7338     return true;
7339
7340   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7341     return true;
7342
7343   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7344     return true;
7345
7346   return aarch64_classify_symbolic_expression (x, context)
7347     == SYMBOL_TINY_ABSOLUTE;
7348 }
7349
7350 /* Return a const_int vector of VAL.  */
7351 rtx
7352 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7353 {
7354   int nunits = GET_MODE_NUNITS (mode);
7355   rtvec v = rtvec_alloc (nunits);
7356   int i;
7357
7358   for (i=0; i < nunits; i++)
7359     RTVEC_ELT (v, i) = GEN_INT (val);
7360
7361   return gen_rtx_CONST_VECTOR (mode, v);
7362 }
7363
7364 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7365
7366 bool
7367 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7368 {
7369   enum machine_mode vmode;
7370
7371   gcc_assert (!VECTOR_MODE_P (mode));
7372   vmode = aarch64_preferred_simd_mode (mode);
7373   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7374   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7375 }
7376
7377 /* Construct and return a PARALLEL RTX vector.  */
7378 rtx
7379 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7380 {
7381   int nunits = GET_MODE_NUNITS (mode);
7382   rtvec v = rtvec_alloc (nunits / 2);
7383   int base = high ? nunits / 2 : 0;
7384   rtx t1;
7385   int i;
7386
7387   for (i=0; i < nunits / 2; i++)
7388     RTVEC_ELT (v, i) = GEN_INT (base + i);
7389
7390   t1 = gen_rtx_PARALLEL (mode, v);
7391   return t1;
7392 }
7393
7394 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7395    HIGH (exclusive).  */
7396 void
7397 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7398 {
7399   HOST_WIDE_INT lane;
7400   gcc_assert (GET_CODE (operand) == CONST_INT);
7401   lane = INTVAL (operand);
7402
7403   if (lane < low || lane >= high)
7404     error ("lane out of range");
7405 }
7406
7407 void
7408 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7409 {
7410   gcc_assert (GET_CODE (operand) == CONST_INT);
7411   HOST_WIDE_INT lane = INTVAL (operand);
7412
7413   if (lane < low || lane >= high)
7414     error ("constant out of range");
7415 }
7416
7417 /* Emit code to reinterpret one AdvSIMD type as another,
7418    without altering bits.  */
7419 void
7420 aarch64_simd_reinterpret (rtx dest, rtx src)
7421 {
7422   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7423 }
7424
7425 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7426    registers).  */
7427 void
7428 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7429                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7430                             rtx op1)
7431 {
7432   rtx mem = gen_rtx_MEM (mode, destaddr);
7433   rtx tmp1 = gen_reg_rtx (mode);
7434   rtx tmp2 = gen_reg_rtx (mode);
7435
7436   emit_insn (intfn (tmp1, op1, tmp2));
7437
7438   emit_move_insn (mem, tmp1);
7439   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7440   emit_move_insn (mem, tmp2);
7441 }
7442
7443 /* Return TRUE if OP is a valid vector addressing mode.  */
7444 bool
7445 aarch64_simd_mem_operand_p (rtx op)
7446 {
7447   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7448                         || GET_CODE (XEXP (op, 0)) == REG);
7449 }
7450
7451 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7452    not to early-clobber SRC registers in the process.
7453
7454    We assume that the operands described by SRC and DEST represent a
7455    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7456    number of components into which the copy has been decomposed.  */
7457 void
7458 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7459                                 rtx *src, unsigned int count)
7460 {
7461   unsigned int i;
7462
7463   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7464       || REGNO (operands[0]) < REGNO (operands[1]))
7465     {
7466       for (i = 0; i < count; i++)
7467         {
7468           operands[2 * i] = dest[i];
7469           operands[2 * i + 1] = src[i];
7470         }
7471     }
7472   else
7473     {
7474       for (i = 0; i < count; i++)
7475         {
7476           operands[2 * i] = dest[count - i - 1];
7477           operands[2 * i + 1] = src[count - i - 1];
7478         }
7479     }
7480 }
7481
7482 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7483    one of VSTRUCT modes: OI, CI or XI.  */
7484 int
7485 aarch64_simd_attr_length_move (rtx insn)
7486 {
7487   enum machine_mode mode;
7488
7489   extract_insn_cached (insn);
7490
7491   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7492     {
7493       mode = GET_MODE (recog_data.operand[0]);
7494       switch (mode)
7495         {
7496         case OImode:
7497           return 8;
7498         case CImode:
7499           return 12;
7500         case XImode:
7501           return 16;
7502         default:
7503           gcc_unreachable ();
7504         }
7505     }
7506   return 4;
7507 }
7508
7509 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7510    alignment of a vector to 128 bits.  */
7511 static HOST_WIDE_INT
7512 aarch64_simd_vector_alignment (const_tree type)
7513 {
7514   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7515   return MIN (align, 128);
7516 }
7517
7518 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7519 static bool
7520 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7521 {
7522   if (is_packed)
7523     return false;
7524
7525   /* We guarantee alignment for vectors up to 128-bits.  */
7526   if (tree_int_cst_compare (TYPE_SIZE (type),
7527                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7528     return false;
7529
7530   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7531   return true;
7532 }
7533
7534 /* If VALS is a vector constant that can be loaded into a register
7535    using DUP, generate instructions to do so and return an RTX to
7536    assign to the register.  Otherwise return NULL_RTX.  */
7537 static rtx
7538 aarch64_simd_dup_constant (rtx vals)
7539 {
7540   enum machine_mode mode = GET_MODE (vals);
7541   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7542   int n_elts = GET_MODE_NUNITS (mode);
7543   bool all_same = true;
7544   rtx x;
7545   int i;
7546
7547   if (GET_CODE (vals) != CONST_VECTOR)
7548     return NULL_RTX;
7549
7550   for (i = 1; i < n_elts; ++i)
7551     {
7552       x = CONST_VECTOR_ELT (vals, i);
7553       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7554         all_same = false;
7555     }
7556
7557   if (!all_same)
7558     return NULL_RTX;
7559
7560   /* We can load this constant by using DUP and a constant in a
7561      single ARM register.  This will be cheaper than a vector
7562      load.  */
7563   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7564   return gen_rtx_VEC_DUPLICATE (mode, x);
7565 }
7566
7567
7568 /* Generate code to load VALS, which is a PARALLEL containing only
7569    constants (for vec_init) or CONST_VECTOR, efficiently into a
7570    register.  Returns an RTX to copy into the register, or NULL_RTX
7571    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7572 static rtx
7573 aarch64_simd_make_constant (rtx vals)
7574 {
7575   enum machine_mode mode = GET_MODE (vals);
7576   rtx const_dup;
7577   rtx const_vec = NULL_RTX;
7578   int n_elts = GET_MODE_NUNITS (mode);
7579   int n_const = 0;
7580   int i;
7581
7582   if (GET_CODE (vals) == CONST_VECTOR)
7583     const_vec = vals;
7584   else if (GET_CODE (vals) == PARALLEL)
7585     {
7586       /* A CONST_VECTOR must contain only CONST_INTs and
7587          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7588          Only store valid constants in a CONST_VECTOR.  */
7589       for (i = 0; i < n_elts; ++i)
7590         {
7591           rtx x = XVECEXP (vals, 0, i);
7592           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7593             n_const++;
7594         }
7595       if (n_const == n_elts)
7596         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7597     }
7598   else
7599     gcc_unreachable ();
7600
7601   if (const_vec != NULL_RTX
7602       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7603     /* Load using MOVI/MVNI.  */
7604     return const_vec;
7605   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7606     /* Loaded using DUP.  */
7607     return const_dup;
7608   else if (const_vec != NULL_RTX)
7609     /* Load from constant pool. We can not take advantage of single-cycle
7610        LD1 because we need a PC-relative addressing mode.  */
7611     return const_vec;
7612   else
7613     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7614        We can not construct an initializer.  */
7615     return NULL_RTX;
7616 }
7617
7618 void
7619 aarch64_expand_vector_init (rtx target, rtx vals)
7620 {
7621   enum machine_mode mode = GET_MODE (target);
7622   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7623   int n_elts = GET_MODE_NUNITS (mode);
7624   int n_var = 0, one_var = -1;
7625   bool all_same = true;
7626   rtx x, mem;
7627   int i;
7628
7629   x = XVECEXP (vals, 0, 0);
7630   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7631     n_var = 1, one_var = 0;
7632
7633   for (i = 1; i < n_elts; ++i)
7634     {
7635       x = XVECEXP (vals, 0, i);
7636       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7637         ++n_var, one_var = i;
7638
7639       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7640         all_same = false;
7641     }
7642
7643   if (n_var == 0)
7644     {
7645       rtx constant = aarch64_simd_make_constant (vals);
7646       if (constant != NULL_RTX)
7647         {
7648           emit_move_insn (target, constant);
7649           return;
7650         }
7651     }
7652
7653   /* Splat a single non-constant element if we can.  */
7654   if (all_same)
7655     {
7656       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7657       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7658       return;
7659     }
7660
7661   /* One field is non-constant.  Load constant then overwrite varying
7662      field.  This is more efficient than using the stack.  */
7663   if (n_var == 1)
7664     {
7665       rtx copy = copy_rtx (vals);
7666       rtx index = GEN_INT (one_var);
7667       enum insn_code icode;
7668
7669       /* Load constant part of vector, substitute neighboring value for
7670          varying element.  */
7671       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7672       aarch64_expand_vector_init (target, copy);
7673
7674       /* Insert variable.  */
7675       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7676       icode = optab_handler (vec_set_optab, mode);
7677       gcc_assert (icode != CODE_FOR_nothing);
7678       emit_insn (GEN_FCN (icode) (target, x, index));
7679       return;
7680     }
7681
7682   /* Construct the vector in memory one field at a time
7683      and load the whole vector.  */
7684   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7685   for (i = 0; i < n_elts; i++)
7686     emit_move_insn (adjust_address_nv (mem, inner_mode,
7687                                     i * GET_MODE_SIZE (inner_mode)),
7688                     XVECEXP (vals, 0, i));
7689   emit_move_insn (target, mem);
7690
7691 }
7692
7693 static unsigned HOST_WIDE_INT
7694 aarch64_shift_truncation_mask (enum machine_mode mode)
7695 {
7696   return
7697     (aarch64_vector_mode_supported_p (mode)
7698      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7699 }
7700
7701 #ifndef TLS_SECTION_ASM_FLAG
7702 #define TLS_SECTION_ASM_FLAG 'T'
7703 #endif
7704
7705 void
7706 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7707                                tree decl ATTRIBUTE_UNUSED)
7708 {
7709   char flagchars[10], *f = flagchars;
7710
7711   /* If we have already declared this section, we can use an
7712      abbreviated form to switch back to it -- unless this section is
7713      part of a COMDAT groups, in which case GAS requires the full
7714      declaration every time.  */
7715   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7716       && (flags & SECTION_DECLARED))
7717     {
7718       fprintf (asm_out_file, "\t.section\t%s\n", name);
7719       return;
7720     }
7721
7722   if (!(flags & SECTION_DEBUG))
7723     *f++ = 'a';
7724   if (flags & SECTION_WRITE)
7725     *f++ = 'w';
7726   if (flags & SECTION_CODE)
7727     *f++ = 'x';
7728   if (flags & SECTION_SMALL)
7729     *f++ = 's';
7730   if (flags & SECTION_MERGE)
7731     *f++ = 'M';
7732   if (flags & SECTION_STRINGS)
7733     *f++ = 'S';
7734   if (flags & SECTION_TLS)
7735     *f++ = TLS_SECTION_ASM_FLAG;
7736   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7737     *f++ = 'G';
7738   *f = '\0';
7739
7740   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7741
7742   if (!(flags & SECTION_NOTYPE))
7743     {
7744       const char *type;
7745       const char *format;
7746
7747       if (flags & SECTION_BSS)
7748         type = "nobits";
7749       else
7750         type = "progbits";
7751
7752 #ifdef TYPE_OPERAND_FMT
7753       format = "," TYPE_OPERAND_FMT;
7754 #else
7755       format = ",@%s";
7756 #endif
7757
7758       fprintf (asm_out_file, format, type);
7759
7760       if (flags & SECTION_ENTSIZE)
7761         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7762       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7763         {
7764           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7765             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7766           else
7767             fprintf (asm_out_file, ",%s,comdat",
7768                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7769         }
7770     }
7771
7772   putc ('\n', asm_out_file);
7773 }
7774
7775 /* Select a format to encode pointers in exception handling data.  */
7776 int
7777 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7778 {
7779    int type;
7780    switch (aarch64_cmodel)
7781      {
7782      case AARCH64_CMODEL_TINY:
7783      case AARCH64_CMODEL_TINY_PIC:
7784      case AARCH64_CMODEL_SMALL:
7785      case AARCH64_CMODEL_SMALL_PIC:
7786        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7787           for everything.  */
7788        type = DW_EH_PE_sdata4;
7789        break;
7790      default:
7791        /* No assumptions here.  8-byte relocs required.  */
7792        type = DW_EH_PE_sdata8;
7793        break;
7794      }
7795    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7796 }
7797
7798 /* Emit load exclusive.  */
7799
7800 static void
7801 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7802                              rtx mem, rtx model_rtx)
7803 {
7804   rtx (*gen) (rtx, rtx, rtx);
7805
7806   switch (mode)
7807     {
7808     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7809     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7810     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7811     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7812     default:
7813       gcc_unreachable ();
7814     }
7815
7816   emit_insn (gen (rval, mem, model_rtx));
7817 }
7818
7819 /* Emit store exclusive.  */
7820
7821 static void
7822 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7823                               rtx rval, rtx mem, rtx model_rtx)
7824 {
7825   rtx (*gen) (rtx, rtx, rtx, rtx);
7826
7827   switch (mode)
7828     {
7829     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7830     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7831     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7832     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7833     default:
7834       gcc_unreachable ();
7835     }
7836
7837   emit_insn (gen (bval, rval, mem, model_rtx));
7838 }
7839
7840 /* Mark the previous jump instruction as unlikely.  */
7841
7842 static void
7843 aarch64_emit_unlikely_jump (rtx insn)
7844 {
7845   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7846
7847   insn = emit_jump_insn (insn);
7848   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7849 }
7850
7851 /* Expand a compare and swap pattern.  */
7852
7853 void
7854 aarch64_expand_compare_and_swap (rtx operands[])
7855 {
7856   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7857   enum machine_mode mode, cmp_mode;
7858   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7859
7860   bval = operands[0];
7861   rval = operands[1];
7862   mem = operands[2];
7863   oldval = operands[3];
7864   newval = operands[4];
7865   is_weak = operands[5];
7866   mod_s = operands[6];
7867   mod_f = operands[7];
7868   mode = GET_MODE (mem);
7869   cmp_mode = mode;
7870
7871   /* Normally the succ memory model must be stronger than fail, but in the
7872      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7873      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7874
7875   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7876       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7877     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7878
7879   switch (mode)
7880     {
7881     case QImode:
7882     case HImode:
7883       /* For short modes, we're going to perform the comparison in SImode,
7884          so do the zero-extension now.  */
7885       cmp_mode = SImode;
7886       rval = gen_reg_rtx (SImode);
7887       oldval = convert_modes (SImode, mode, oldval, true);
7888       /* Fall through.  */
7889
7890     case SImode:
7891     case DImode:
7892       /* Force the value into a register if needed.  */
7893       if (!aarch64_plus_operand (oldval, mode))
7894         oldval = force_reg (cmp_mode, oldval);
7895       break;
7896
7897     default:
7898       gcc_unreachable ();
7899     }
7900
7901   switch (mode)
7902     {
7903     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7904     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7905     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7906     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7907     default:
7908       gcc_unreachable ();
7909     }
7910
7911   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7912
7913   if (mode == QImode || mode == HImode)
7914     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7915
7916   x = gen_rtx_REG (CCmode, CC_REGNUM);
7917   x = gen_rtx_EQ (SImode, x, const0_rtx);
7918   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7919 }
7920
7921 /* Split a compare and swap pattern.  */
7922
7923 void
7924 aarch64_split_compare_and_swap (rtx operands[])
7925 {
7926   rtx rval, mem, oldval, newval, scratch;
7927   enum machine_mode mode;
7928   bool is_weak;
7929   rtx label1, label2, x, cond;
7930
7931   rval = operands[0];
7932   mem = operands[1];
7933   oldval = operands[2];
7934   newval = operands[3];
7935   is_weak = (operands[4] != const0_rtx);
7936   scratch = operands[7];
7937   mode = GET_MODE (mem);
7938
7939   label1 = NULL_RTX;
7940   if (!is_weak)
7941     {
7942       label1 = gen_label_rtx ();
7943       emit_label (label1);
7944     }
7945   label2 = gen_label_rtx ();
7946
7947   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7948
7949   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7950   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7951   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7952                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7953   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7954
7955   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7956
7957   if (!is_weak)
7958     {
7959       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7960       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7961                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7962       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7963     }
7964   else
7965     {
7966       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7967       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7968       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7969     }
7970
7971   emit_label (label2);
7972 }
7973
7974 /* Split an atomic operation.  */
7975
7976 void
7977 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7978                      rtx value, rtx model_rtx, rtx cond)
7979 {
7980   enum machine_mode mode = GET_MODE (mem);
7981   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7982   rtx label, x;
7983
7984   label = gen_label_rtx ();
7985   emit_label (label);
7986
7987   if (new_out)
7988     new_out = gen_lowpart (wmode, new_out);
7989   if (old_out)
7990     old_out = gen_lowpart (wmode, old_out);
7991   else
7992     old_out = new_out;
7993   value = simplify_gen_subreg (wmode, value, mode, 0);
7994
7995   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7996
7997   switch (code)
7998     {
7999     case SET:
8000       new_out = value;
8001       break;
8002
8003     case NOT:
8004       x = gen_rtx_AND (wmode, old_out, value);
8005       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8006       x = gen_rtx_NOT (wmode, new_out);
8007       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8008       break;
8009
8010     case MINUS:
8011       if (CONST_INT_P (value))
8012         {
8013           value = GEN_INT (-INTVAL (value));
8014           code = PLUS;
8015         }
8016       /* Fall through.  */
8017
8018     default:
8019       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8020       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8021       break;
8022     }
8023
8024   aarch64_emit_store_exclusive (mode, cond, mem,
8025                                 gen_lowpart (mode, new_out), model_rtx);
8026
8027   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8028   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8029                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8030   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8031 }
8032
8033 static void
8034 aarch64_print_extension (void)
8035 {
8036   const struct aarch64_option_extension *opt = NULL;
8037
8038   for (opt = all_extensions; opt->name != NULL; opt++)
8039     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8040       asm_fprintf (asm_out_file, "+%s", opt->name);
8041
8042   asm_fprintf (asm_out_file, "\n");
8043 }
8044
8045 static void
8046 aarch64_start_file (void)
8047 {
8048   if (selected_arch)
8049     {
8050       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8051       aarch64_print_extension ();
8052     }
8053   else if (selected_cpu)
8054     {
8055       const char *truncated_name
8056             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8057       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8058       aarch64_print_extension ();
8059     }
8060   default_file_start();
8061 }
8062
8063 /* Target hook for c_mode_for_suffix.  */
8064 static enum machine_mode
8065 aarch64_c_mode_for_suffix (char suffix)
8066 {
8067   if (suffix == 'q')
8068     return TFmode;
8069
8070   return VOIDmode;
8071 }
8072
8073 /* We can only represent floating point constants which will fit in
8074    "quarter-precision" values.  These values are characterised by
8075    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8076    by:
8077
8078    (-1)^s * (n/16) * 2^r
8079
8080    Where:
8081      's' is the sign bit.
8082      'n' is an integer in the range 16 <= n <= 31.
8083      'r' is an integer in the range -3 <= r <= 4.  */
8084
8085 /* Return true iff X can be represented by a quarter-precision
8086    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8087 bool
8088 aarch64_float_const_representable_p (rtx x)
8089 {
8090   /* This represents our current view of how many bits
8091      make up the mantissa.  */
8092   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8093   int exponent;
8094   unsigned HOST_WIDE_INT mantissa, mask;
8095   REAL_VALUE_TYPE r, m;
8096   bool fail;
8097
8098   if (!CONST_DOUBLE_P (x))
8099     return false;
8100
8101   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8102
8103   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8104      know if we have +zero until we analyse the mantissa, but we
8105      can reject the other invalid values.  */
8106   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8107       || REAL_VALUE_MINUS_ZERO (r))
8108     return false;
8109
8110   /* Extract exponent.  */
8111   r = real_value_abs (&r);
8112   exponent = REAL_EXP (&r);
8113
8114   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8115      highest (sign) bit, with a fixed binary point at bit point_pos.
8116      m1 holds the low part of the mantissa, m2 the high part.
8117      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8118      bits for the mantissa, this can fail (low bits will be lost).  */
8119   real_ldexp (&m, &r, point_pos - exponent);
8120   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8121
8122   /* If the low part of the mantissa has bits set we cannot represent
8123      the value.  */
8124   if (w.elt (0) != 0)
8125     return false;
8126   /* We have rejected the lower HOST_WIDE_INT, so update our
8127      understanding of how many bits lie in the mantissa and
8128      look only at the high HOST_WIDE_INT.  */
8129   mantissa = w.elt (1);
8130   point_pos -= HOST_BITS_PER_WIDE_INT;
8131
8132   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8133   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8134   if ((mantissa & mask) != 0)
8135     return false;
8136
8137   /* Having filtered unrepresentable values, we may now remove all
8138      but the highest 5 bits.  */
8139   mantissa >>= point_pos - 5;
8140
8141   /* We cannot represent the value 0.0, so reject it.  This is handled
8142      elsewhere.  */
8143   if (mantissa == 0)
8144     return false;
8145
8146   /* Then, as bit 4 is always set, we can mask it off, leaving
8147      the mantissa in the range [0, 15].  */
8148   mantissa &= ~(1 << 4);
8149   gcc_assert (mantissa <= 15);
8150
8151   /* GCC internally does not use IEEE754-like encoding (where normalized
8152      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8153      Our mantissa values are shifted 4 places to the left relative to
8154      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8155      by 5 places to correct for GCC's representation.  */
8156   exponent = 5 - exponent;
8157
8158   return (exponent >= 0 && exponent <= 7);
8159 }
8160
8161 char*
8162 aarch64_output_simd_mov_immediate (rtx const_vector,
8163                                    enum machine_mode mode,
8164                                    unsigned width)
8165 {
8166   bool is_valid;
8167   static char templ[40];
8168   const char *mnemonic;
8169   const char *shift_op;
8170   unsigned int lane_count = 0;
8171   char element_char;
8172
8173   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8174
8175   /* This will return true to show const_vector is legal for use as either
8176      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8177      also update INFO to show how the immediate should be generated.  */
8178   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8179   gcc_assert (is_valid);
8180
8181   element_char = sizetochar (info.element_width);
8182   lane_count = width / info.element_width;
8183
8184   mode = GET_MODE_INNER (mode);
8185   if (mode == SFmode || mode == DFmode)
8186     {
8187       gcc_assert (info.shift == 0 && ! info.mvn);
8188       if (aarch64_float_const_zero_rtx_p (info.value))
8189         info.value = GEN_INT (0);
8190       else
8191         {
8192 #define buf_size 20
8193           REAL_VALUE_TYPE r;
8194           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8195           char float_buf[buf_size] = {'\0'};
8196           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8197 #undef buf_size
8198
8199           if (lane_count == 1)
8200             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8201           else
8202             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8203                       lane_count, element_char, float_buf);
8204           return templ;
8205         }
8206     }
8207
8208   mnemonic = info.mvn ? "mvni" : "movi";
8209   shift_op = info.msl ? "msl" : "lsl";
8210
8211   if (lane_count == 1)
8212     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8213               mnemonic, UINTVAL (info.value));
8214   else if (info.shift)
8215     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8216               ", %s %d", mnemonic, lane_count, element_char,
8217               UINTVAL (info.value), shift_op, info.shift);
8218   else
8219     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8220               mnemonic, lane_count, element_char, UINTVAL (info.value));
8221   return templ;
8222 }
8223
8224 char*
8225 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8226                                           enum machine_mode mode)
8227 {
8228   enum machine_mode vmode;
8229
8230   gcc_assert (!VECTOR_MODE_P (mode));
8231   vmode = aarch64_simd_container_mode (mode, 64);
8232   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8233   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8234 }
8235
8236 /* Split operands into moves from op[1] + op[2] into op[0].  */
8237
8238 void
8239 aarch64_split_combinev16qi (rtx operands[3])
8240 {
8241   unsigned int dest = REGNO (operands[0]);
8242   unsigned int src1 = REGNO (operands[1]);
8243   unsigned int src2 = REGNO (operands[2]);
8244   enum machine_mode halfmode = GET_MODE (operands[1]);
8245   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8246   rtx destlo, desthi;
8247
8248   gcc_assert (halfmode == V16QImode);
8249
8250   if (src1 == dest && src2 == dest + halfregs)
8251     {
8252       /* No-op move.  Can't split to nothing; emit something.  */
8253       emit_note (NOTE_INSN_DELETED);
8254       return;
8255     }
8256
8257   /* Preserve register attributes for variable tracking.  */
8258   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8259   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8260                                GET_MODE_SIZE (halfmode));
8261
8262   /* Special case of reversed high/low parts.  */
8263   if (reg_overlap_mentioned_p (operands[2], destlo)
8264       && reg_overlap_mentioned_p (operands[1], desthi))
8265     {
8266       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8267       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8268       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8269     }
8270   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8271     {
8272       /* Try to avoid unnecessary moves if part of the result
8273          is in the right place already.  */
8274       if (src1 != dest)
8275         emit_move_insn (destlo, operands[1]);
8276       if (src2 != dest + halfregs)
8277         emit_move_insn (desthi, operands[2]);
8278     }
8279   else
8280     {
8281       if (src2 != dest + halfregs)
8282         emit_move_insn (desthi, operands[2]);
8283       if (src1 != dest)
8284         emit_move_insn (destlo, operands[1]);
8285     }
8286 }
8287
8288 /* vec_perm support.  */
8289
8290 #define MAX_VECT_LEN 16
8291
8292 struct expand_vec_perm_d
8293 {
8294   rtx target, op0, op1;
8295   unsigned char perm[MAX_VECT_LEN];
8296   enum machine_mode vmode;
8297   unsigned char nelt;
8298   bool one_vector_p;
8299   bool testing_p;
8300 };
8301
8302 /* Generate a variable permutation.  */
8303
8304 static void
8305 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8306 {
8307   enum machine_mode vmode = GET_MODE (target);
8308   bool one_vector_p = rtx_equal_p (op0, op1);
8309
8310   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8311   gcc_checking_assert (GET_MODE (op0) == vmode);
8312   gcc_checking_assert (GET_MODE (op1) == vmode);
8313   gcc_checking_assert (GET_MODE (sel) == vmode);
8314   gcc_checking_assert (TARGET_SIMD);
8315
8316   if (one_vector_p)
8317     {
8318       if (vmode == V8QImode)
8319         {
8320           /* Expand the argument to a V16QI mode by duplicating it.  */
8321           rtx pair = gen_reg_rtx (V16QImode);
8322           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8323           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8324         }
8325       else
8326         {
8327           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8328         }
8329     }
8330   else
8331     {
8332       rtx pair;
8333
8334       if (vmode == V8QImode)
8335         {
8336           pair = gen_reg_rtx (V16QImode);
8337           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8338           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8339         }
8340       else
8341         {
8342           pair = gen_reg_rtx (OImode);
8343           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8344           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8345         }
8346     }
8347 }
8348
8349 void
8350 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8351 {
8352   enum machine_mode vmode = GET_MODE (target);
8353   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8354   bool one_vector_p = rtx_equal_p (op0, op1);
8355   rtx rmask[MAX_VECT_LEN], mask;
8356
8357   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8358
8359   /* The TBL instruction does not use a modulo index, so we must take care
8360      of that ourselves.  */
8361   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8362   for (i = 0; i < nelt; ++i)
8363     rmask[i] = mask;
8364   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8365   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8366
8367   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8368 }
8369
8370 /* Recognize patterns suitable for the TRN instructions.  */
8371 static bool
8372 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8373 {
8374   unsigned int i, odd, mask, nelt = d->nelt;
8375   rtx out, in0, in1, x;
8376   rtx (*gen) (rtx, rtx, rtx);
8377   enum machine_mode vmode = d->vmode;
8378
8379   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8380     return false;
8381
8382   /* Note that these are little-endian tests.
8383      We correct for big-endian later.  */
8384   if (d->perm[0] == 0)
8385     odd = 0;
8386   else if (d->perm[0] == 1)
8387     odd = 1;
8388   else
8389     return false;
8390   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8391
8392   for (i = 0; i < nelt; i += 2)
8393     {
8394       if (d->perm[i] != i + odd)
8395         return false;
8396       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8397         return false;
8398     }
8399
8400   /* Success!  */
8401   if (d->testing_p)
8402     return true;
8403
8404   in0 = d->op0;
8405   in1 = d->op1;
8406   if (BYTES_BIG_ENDIAN)
8407     {
8408       x = in0, in0 = in1, in1 = x;
8409       odd = !odd;
8410     }
8411   out = d->target;
8412
8413   if (odd)
8414     {
8415       switch (vmode)
8416         {
8417         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8418         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8419         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8420         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8421         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8422         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8423         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8424         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8425         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8426         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8427         default:
8428           return false;
8429         }
8430     }
8431   else
8432     {
8433       switch (vmode)
8434         {
8435         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8436         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8437         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8438         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8439         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8440         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8441         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8442         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8443         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8444         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8445         default:
8446           return false;
8447         }
8448     }
8449
8450   emit_insn (gen (out, in0, in1));
8451   return true;
8452 }
8453
8454 /* Recognize patterns suitable for the UZP instructions.  */
8455 static bool
8456 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8457 {
8458   unsigned int i, odd, mask, nelt = d->nelt;
8459   rtx out, in0, in1, x;
8460   rtx (*gen) (rtx, rtx, rtx);
8461   enum machine_mode vmode = d->vmode;
8462
8463   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8464     return false;
8465
8466   /* Note that these are little-endian tests.
8467      We correct for big-endian later.  */
8468   if (d->perm[0] == 0)
8469     odd = 0;
8470   else if (d->perm[0] == 1)
8471     odd = 1;
8472   else
8473     return false;
8474   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8475
8476   for (i = 0; i < nelt; i++)
8477     {
8478       unsigned elt = (i * 2 + odd) & mask;
8479       if (d->perm[i] != elt)
8480         return false;
8481     }
8482
8483   /* Success!  */
8484   if (d->testing_p)
8485     return true;
8486
8487   in0 = d->op0;
8488   in1 = d->op1;
8489   if (BYTES_BIG_ENDIAN)
8490     {
8491       x = in0, in0 = in1, in1 = x;
8492       odd = !odd;
8493     }
8494   out = d->target;
8495
8496   if (odd)
8497     {
8498       switch (vmode)
8499         {
8500         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8501         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8502         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8503         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8504         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8505         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8506         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8507         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8508         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8509         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8510         default:
8511           return false;
8512         }
8513     }
8514   else
8515     {
8516       switch (vmode)
8517         {
8518         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8519         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8520         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8521         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8522         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8523         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8524         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8525         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8526         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8527         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8528         default:
8529           return false;
8530         }
8531     }
8532
8533   emit_insn (gen (out, in0, in1));
8534   return true;
8535 }
8536
8537 /* Recognize patterns suitable for the ZIP instructions.  */
8538 static bool
8539 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8540 {
8541   unsigned int i, high, mask, nelt = d->nelt;
8542   rtx out, in0, in1, x;
8543   rtx (*gen) (rtx, rtx, rtx);
8544   enum machine_mode vmode = d->vmode;
8545
8546   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8547     return false;
8548
8549   /* Note that these are little-endian tests.
8550      We correct for big-endian later.  */
8551   high = nelt / 2;
8552   if (d->perm[0] == high)
8553     /* Do Nothing.  */
8554     ;
8555   else if (d->perm[0] == 0)
8556     high = 0;
8557   else
8558     return false;
8559   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8560
8561   for (i = 0; i < nelt / 2; i++)
8562     {
8563       unsigned elt = (i + high) & mask;
8564       if (d->perm[i * 2] != elt)
8565         return false;
8566       elt = (elt + nelt) & mask;
8567       if (d->perm[i * 2 + 1] != elt)
8568         return false;
8569     }
8570
8571   /* Success!  */
8572   if (d->testing_p)
8573     return true;
8574
8575   in0 = d->op0;
8576   in1 = d->op1;
8577   if (BYTES_BIG_ENDIAN)
8578     {
8579       x = in0, in0 = in1, in1 = x;
8580       high = !high;
8581     }
8582   out = d->target;
8583
8584   if (high)
8585     {
8586       switch (vmode)
8587         {
8588         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8589         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8590         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8591         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8592         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8593         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8594         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8595         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8596         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8597         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8598         default:
8599           return false;
8600         }
8601     }
8602   else
8603     {
8604       switch (vmode)
8605         {
8606         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8607         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8608         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8609         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8610         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8611         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8612         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8613         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8614         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8615         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8616         default:
8617           return false;
8618         }
8619     }
8620
8621   emit_insn (gen (out, in0, in1));
8622   return true;
8623 }
8624
8625 static bool
8626 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8627 {
8628   rtx (*gen) (rtx, rtx, rtx);
8629   rtx out = d->target;
8630   rtx in0;
8631   enum machine_mode vmode = d->vmode;
8632   unsigned int i, elt, nelt = d->nelt;
8633   rtx lane;
8634
8635   /* TODO: This may not be big-endian safe.  */
8636   if (BYTES_BIG_ENDIAN)
8637     return false;
8638
8639   elt = d->perm[0];
8640   for (i = 1; i < nelt; i++)
8641     {
8642       if (elt != d->perm[i])
8643         return false;
8644     }
8645
8646   /* The generic preparation in aarch64_expand_vec_perm_const_1
8647      swaps the operand order and the permute indices if it finds
8648      d->perm[0] to be in the second operand.  Thus, we can always
8649      use d->op0 and need not do any extra arithmetic to get the
8650      correct lane number.  */
8651   in0 = d->op0;
8652   lane = GEN_INT (elt);
8653
8654   switch (vmode)
8655     {
8656     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8657     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8658     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8659     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8660     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8661     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8662     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8663     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8664     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8665     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8666     default:
8667       return false;
8668     }
8669
8670   emit_insn (gen (out, in0, lane));
8671   return true;
8672 }
8673
8674 static bool
8675 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8676 {
8677   rtx rperm[MAX_VECT_LEN], sel;
8678   enum machine_mode vmode = d->vmode;
8679   unsigned int i, nelt = d->nelt;
8680
8681   if (d->testing_p)
8682     return true;
8683
8684   /* Generic code will try constant permutation twice.  Once with the
8685      original mode and again with the elements lowered to QImode.
8686      So wait and don't do the selector expansion ourselves.  */
8687   if (vmode != V8QImode && vmode != V16QImode)
8688     return false;
8689
8690   for (i = 0; i < nelt; ++i)
8691     {
8692       int nunits = GET_MODE_NUNITS (vmode);
8693
8694       /* If big-endian and two vectors we end up with a weird mixed-endian
8695          mode on NEON.  Reverse the index within each word but not the word
8696          itself.  */
8697       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
8698                                            : d->perm[i]);
8699     }
8700   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8701   sel = force_reg (vmode, sel);
8702
8703   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8704   return true;
8705 }
8706
8707 static bool
8708 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8709 {
8710   /* The pattern matching functions above are written to look for a small
8711      number to begin the sequence (0, 1, N/2).  If we begin with an index
8712      from the second operand, we can swap the operands.  */
8713   if (d->perm[0] >= d->nelt)
8714     {
8715       unsigned i, nelt = d->nelt;
8716       rtx x;
8717
8718       gcc_assert (nelt == (nelt & -nelt));
8719       for (i = 0; i < nelt; ++i)
8720         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
8721
8722       x = d->op0;
8723       d->op0 = d->op1;
8724       d->op1 = x;
8725     }
8726
8727   if (TARGET_SIMD)
8728     {
8729       if (aarch64_evpc_zip (d))
8730         return true;
8731       else if (aarch64_evpc_uzp (d))
8732         return true;
8733       else if (aarch64_evpc_trn (d))
8734         return true;
8735       else if (aarch64_evpc_dup (d))
8736         return true;
8737       return aarch64_evpc_tbl (d);
8738     }
8739   return false;
8740 }
8741
8742 /* Expand a vec_perm_const pattern.  */
8743
8744 bool
8745 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8746 {
8747   struct expand_vec_perm_d d;
8748   int i, nelt, which;
8749
8750   d.target = target;
8751   d.op0 = op0;
8752   d.op1 = op1;
8753
8754   d.vmode = GET_MODE (target);
8755   gcc_assert (VECTOR_MODE_P (d.vmode));
8756   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8757   d.testing_p = false;
8758
8759   for (i = which = 0; i < nelt; ++i)
8760     {
8761       rtx e = XVECEXP (sel, 0, i);
8762       int ei = INTVAL (e) & (2 * nelt - 1);
8763       which |= (ei < nelt ? 1 : 2);
8764       d.perm[i] = ei;
8765     }
8766
8767   switch (which)
8768     {
8769     default:
8770       gcc_unreachable ();
8771
8772     case 3:
8773       d.one_vector_p = false;
8774       if (!rtx_equal_p (op0, op1))
8775         break;
8776
8777       /* The elements of PERM do not suggest that only the first operand
8778          is used, but both operands are identical.  Allow easier matching
8779          of the permutation by folding the permutation into the single
8780          input vector.  */
8781       /* Fall Through.  */
8782     case 2:
8783       for (i = 0; i < nelt; ++i)
8784         d.perm[i] &= nelt - 1;
8785       d.op0 = op1;
8786       d.one_vector_p = true;
8787       break;
8788
8789     case 1:
8790       d.op1 = op0;
8791       d.one_vector_p = true;
8792       break;
8793     }
8794
8795   return aarch64_expand_vec_perm_const_1 (&d);
8796 }
8797
8798 static bool
8799 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8800                                      const unsigned char *sel)
8801 {
8802   struct expand_vec_perm_d d;
8803   unsigned int i, nelt, which;
8804   bool ret;
8805
8806   d.vmode = vmode;
8807   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8808   d.testing_p = true;
8809   memcpy (d.perm, sel, nelt);
8810
8811   /* Calculate whether all elements are in one vector.  */
8812   for (i = which = 0; i < nelt; ++i)
8813     {
8814       unsigned char e = d.perm[i];
8815       gcc_assert (e < 2 * nelt);
8816       which |= (e < nelt ? 1 : 2);
8817     }
8818
8819   /* If all elements are from the second vector, reindex as if from the
8820      first vector.  */
8821   if (which == 2)
8822     for (i = 0; i < nelt; ++i)
8823       d.perm[i] -= nelt;
8824
8825   /* Check whether the mask can be applied to a single vector.  */
8826   d.one_vector_p = (which != 3);
8827
8828   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8829   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8830   if (!d.one_vector_p)
8831     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8832
8833   start_sequence ();
8834   ret = aarch64_expand_vec_perm_const_1 (&d);
8835   end_sequence ();
8836
8837   return ret;
8838 }
8839
8840 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8841 bool
8842 aarch64_cannot_change_mode_class (enum machine_mode from,
8843                                   enum machine_mode to,
8844                                   enum reg_class rclass)
8845 {
8846   /* Full-reg subregs are allowed on general regs or any class if they are
8847      the same size.  */
8848   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8849       || !reg_classes_intersect_p (FP_REGS, rclass))
8850     return false;
8851
8852   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8853      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8854      2. Scalar to Scalar for integer modes or same size float modes.
8855      3. Vector to Vector modes.
8856      4. On little-endian only, Vector-Structure to Vector modes.  */
8857   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8858     {
8859       if (aarch64_vector_mode_supported_p (from)
8860           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8861         return false;
8862
8863       if (GET_MODE_NUNITS (from) == 1
8864           && GET_MODE_NUNITS (to) == 1
8865           && (GET_MODE_CLASS (from) == MODE_INT
8866               || from == to))
8867         return false;
8868
8869       if (aarch64_vector_mode_supported_p (from)
8870           && aarch64_vector_mode_supported_p (to))
8871         return false;
8872
8873       /* Within an vector structure straddling multiple vector registers
8874          we are in a mixed-endian representation.  As such, we can't
8875          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
8876          switch between vectors and vector structures cheaply.  */
8877       if (!BYTES_BIG_ENDIAN)
8878         if ((aarch64_vector_mode_supported_p (from)
8879               && aarch64_vect_struct_mode_p (to))
8880             || (aarch64_vector_mode_supported_p (to)
8881               && aarch64_vect_struct_mode_p (from)))
8882           return false;
8883     }
8884
8885   return true;
8886 }
8887
8888 /* Implement MODES_TIEABLE_P.  */
8889
8890 bool
8891 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
8892 {
8893   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
8894     return true;
8895
8896   /* We specifically want to allow elements of "structure" modes to
8897      be tieable to the structure.  This more general condition allows
8898      other rarer situations too.  */
8899   if (TARGET_SIMD
8900       && aarch64_vector_mode_p (mode1)
8901       && aarch64_vector_mode_p (mode2))
8902     return true;
8903
8904   return false;
8905 }
8906
8907 #undef TARGET_ADDRESS_COST
8908 #define TARGET_ADDRESS_COST aarch64_address_cost
8909
8910 /* This hook will determines whether unnamed bitfields affect the alignment
8911    of the containing structure.  The hook returns true if the structure
8912    should inherit the alignment requirements of an unnamed bitfield's
8913    type.  */
8914 #undef TARGET_ALIGN_ANON_BITFIELD
8915 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8916
8917 #undef TARGET_ASM_ALIGNED_DI_OP
8918 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8919
8920 #undef TARGET_ASM_ALIGNED_HI_OP
8921 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8922
8923 #undef TARGET_ASM_ALIGNED_SI_OP
8924 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8925
8926 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8927 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8928   hook_bool_const_tree_hwi_hwi_const_tree_true
8929
8930 #undef TARGET_ASM_FILE_START
8931 #define TARGET_ASM_FILE_START aarch64_start_file
8932
8933 #undef TARGET_ASM_OUTPUT_MI_THUNK
8934 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8935
8936 #undef TARGET_ASM_SELECT_RTX_SECTION
8937 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8938
8939 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8940 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8941
8942 #undef TARGET_BUILD_BUILTIN_VA_LIST
8943 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8944
8945 #undef TARGET_CALLEE_COPIES
8946 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8947
8948 #undef TARGET_CAN_ELIMINATE
8949 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8950
8951 #undef TARGET_CANNOT_FORCE_CONST_MEM
8952 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8953
8954 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8955 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8956
8957 /* Only the least significant bit is used for initialization guard
8958    variables.  */
8959 #undef TARGET_CXX_GUARD_MASK_BIT
8960 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8961
8962 #undef TARGET_C_MODE_FOR_SUFFIX
8963 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8964
8965 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8966 #undef  TARGET_DEFAULT_TARGET_FLAGS
8967 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8968 #endif
8969
8970 #undef TARGET_CLASS_MAX_NREGS
8971 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8972
8973 #undef TARGET_BUILTIN_DECL
8974 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8975
8976 #undef  TARGET_EXPAND_BUILTIN
8977 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8978
8979 #undef TARGET_EXPAND_BUILTIN_VA_START
8980 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8981
8982 #undef TARGET_FOLD_BUILTIN
8983 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8984
8985 #undef TARGET_FUNCTION_ARG
8986 #define TARGET_FUNCTION_ARG aarch64_function_arg
8987
8988 #undef TARGET_FUNCTION_ARG_ADVANCE
8989 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8990
8991 #undef TARGET_FUNCTION_ARG_BOUNDARY
8992 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8993
8994 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8995 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8996
8997 #undef TARGET_FUNCTION_VALUE
8998 #define TARGET_FUNCTION_VALUE aarch64_function_value
8999
9000 #undef TARGET_FUNCTION_VALUE_REGNO_P
9001 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9002
9003 #undef TARGET_FRAME_POINTER_REQUIRED
9004 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9005
9006 #undef TARGET_GIMPLE_FOLD_BUILTIN
9007 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9008
9009 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9010 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9011
9012 #undef  TARGET_INIT_BUILTINS
9013 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9014
9015 #undef TARGET_LEGITIMATE_ADDRESS_P
9016 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9017
9018 #undef TARGET_LEGITIMATE_CONSTANT_P
9019 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9020
9021 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9022 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9023
9024 #undef TARGET_LRA_P
9025 #define TARGET_LRA_P aarch64_lra_p
9026
9027 #undef TARGET_MANGLE_TYPE
9028 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9029
9030 #undef TARGET_MEMORY_MOVE_COST
9031 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9032
9033 #undef TARGET_MUST_PASS_IN_STACK
9034 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9035
9036 /* This target hook should return true if accesses to volatile bitfields
9037    should use the narrowest mode possible.  It should return false if these
9038    accesses should use the bitfield container type.  */
9039 #undef TARGET_NARROW_VOLATILE_BITFIELD
9040 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9041
9042 #undef  TARGET_OPTION_OVERRIDE
9043 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9044
9045 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9046 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9047   aarch64_override_options_after_change
9048
9049 #undef TARGET_PASS_BY_REFERENCE
9050 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9051
9052 #undef TARGET_PREFERRED_RELOAD_CLASS
9053 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9054
9055 #undef TARGET_SECONDARY_RELOAD
9056 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9057
9058 #undef TARGET_SHIFT_TRUNCATION_MASK
9059 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9060
9061 #undef TARGET_SETUP_INCOMING_VARARGS
9062 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9063
9064 #undef TARGET_STRUCT_VALUE_RTX
9065 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9066
9067 #undef TARGET_REGISTER_MOVE_COST
9068 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9069
9070 #undef TARGET_RETURN_IN_MEMORY
9071 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9072
9073 #undef TARGET_RETURN_IN_MSB
9074 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9075
9076 #undef TARGET_RTX_COSTS
9077 #define TARGET_RTX_COSTS aarch64_rtx_costs
9078
9079 #undef TARGET_SCHED_ISSUE_RATE
9080 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9081
9082 #undef TARGET_TRAMPOLINE_INIT
9083 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9084
9085 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9086 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9087
9088 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9089 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9090
9091 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9092 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9093
9094 #undef TARGET_VECTORIZE_ADD_STMT_COST
9095 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9096
9097 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9098 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9099   aarch64_builtin_vectorization_cost
9100
9101 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9102 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9103
9104 #undef TARGET_VECTORIZE_BUILTINS
9105 #define TARGET_VECTORIZE_BUILTINS
9106
9107 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9108 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9109   aarch64_builtin_vectorized_function
9110
9111 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9112 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9113   aarch64_autovectorize_vector_sizes
9114
9115 /* Section anchor support.  */
9116
9117 #undef TARGET_MIN_ANCHOR_OFFSET
9118 #define TARGET_MIN_ANCHOR_OFFSET -256
9119
9120 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9121    byte offset; we can do much more for larger data types, but have no way
9122    to determine the size of the access.  We assume accesses are aligned.  */
9123 #undef TARGET_MAX_ANCHOR_OFFSET
9124 #define TARGET_MAX_ANCHOR_OFFSET 4095
9125
9126 #undef TARGET_VECTOR_ALIGNMENT
9127 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9128
9129 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9130 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9131   aarch64_simd_vector_alignment_reachable
9132
9133 /* vec_perm support.  */
9134
9135 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9136 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9137   aarch64_vectorize_vec_perm_const_ok
9138
9139
9140 #undef TARGET_FIXED_CONDITION_CODE_REGS
9141 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9142
9143 #undef TARGET_FLAGS_REGNUM
9144 #define TARGET_FLAGS_REGNUM CC_REGNUM
9145
9146 struct gcc_target targetm = TARGET_INITIALIZER;
9147
9148 #include "gt-aarch64.h"