gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66
  67 /* Defined for convenience.  */
  68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  69
  70 /* Classifies an address.
  71
  72    ADDRESS_REG_IMM
  73        A simple base register plus immediate offset.
  74
  75    ADDRESS_REG_WB
  76        A base register indexed by immediate offset with writeback.
  77
  78    ADDRESS_REG_REG
  79        A base register indexed by (optionally scaled) register.
  80
  81    ADDRESS_REG_UXTW
  82        A base register indexed by (optionally scaled) zero-extended register.
  83
  84    ADDRESS_REG_SXTW
  85        A base register indexed by (optionally scaled) sign-extended register.
  86
  87    ADDRESS_LO_SUM
  88        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  89
  90    ADDRESS_SYMBOLIC:
  91        A constant symbolic address, in pc-relative literal pool.  */
  92
  93 enum aarch64_address_type {
  94   ADDRESS_REG_IMM,
  95   ADDRESS_REG_WB,
  96   ADDRESS_REG_REG,
  97   ADDRESS_REG_UXTW,
  98   ADDRESS_REG_SXTW,
  99   ADDRESS_LO_SUM,
 100   ADDRESS_SYMBOLIC
 101 };
 102
 103 struct aarch64_address_info {
 104   enum aarch64_address_type type;
 105   rtx base;
 106   rtx offset;
 107   int shift;
 108   enum aarch64_symbol_type symbol_type;
 109 };
 110
 111 struct simd_immediate_info
 112 {
 113   rtx value;
 114   int shift;
 115   int element_width;
 116   bool mvn;
 117   bool msl;
 118 };
 119
 120 /* The current code model.  */
 121 enum aarch64_code_model aarch64_cmodel;
 122
 123 #ifdef HAVE_AS_TLS
 124 #undef TARGET_HAVE_TLS
 125 #define TARGET_HAVE_TLS 1
 126 #endif
 127
 128 static bool aarch64_lra_p (void);
 129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 131                                                      const_tree,
 132                                                      enum machine_mode *, int *,
 133                                                      bool *);
 134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_override_options_after_change (void);
 137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 138 static unsigned bit_count (unsigned HOST_WIDE_INT);
 139 static bool aarch64_const_vec_all_same_int_p (rtx,
 140                                               HOST_WIDE_INT, HOST_WIDE_INT);
 141
 142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 143                                                  const unsigned char *sel);
 144
 145 /* The processor for which instructions should be scheduled.  */
 146 enum aarch64_processor aarch64_tune = cortexa53;
 147
 148 /* The current tuning set.  */
 149 const struct tune_params *aarch64_tune_params;
 150
 151 /* Mask to specify which instructions we are allowed to generate.  */
 152 unsigned long aarch64_isa_flags = 0;
 153
 154 /* Mask to specify which instruction scheduling options should be used.  */
 155 unsigned long aarch64_tune_flags = 0;
 156
 157 /* Tuning parameters.  */
 158
 159 #if HAVE_DESIGNATED_INITIALIZERS
 160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 161 #else
 162 #define NAMED_PARAM(NAME, VAL) (VAL)
 163 #endif
 164
 165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 166 __extension__
 167 #endif
 168
 169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 170 __extension__
 171 #endif
 172 static const struct cpu_addrcost_table generic_addrcost_table =
 173 {
 174   NAMED_PARAM (pre_modify, 0),
 175   NAMED_PARAM (post_modify, 0),
 176   NAMED_PARAM (register_offset, 0),
 177   NAMED_PARAM (register_extend, 0),
 178   NAMED_PARAM (imm_offset, 0)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct cpu_regmove_cost generic_regmove_cost =
 185 {
 186   NAMED_PARAM (GP2GP, 1),
 187   NAMED_PARAM (GP2FP, 2),
 188   NAMED_PARAM (FP2GP, 2),
 189   /* We currently do not provide direct support for TFmode Q->Q move.
 190      Therefore we need to raise the cost above 2 in order to have
 191      reload handle the situation.  */
 192   NAMED_PARAM (FP2FP, 4)
 193 };
 194
 195 /* Generic costs for vector insn classes.  */
 196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 197 __extension__
 198 #endif
 199 static const struct cpu_vector_cost generic_vector_cost =
 200 {
 201   NAMED_PARAM (scalar_stmt_cost, 1),
 202   NAMED_PARAM (scalar_load_cost, 1),
 203   NAMED_PARAM (scalar_store_cost, 1),
 204   NAMED_PARAM (vec_stmt_cost, 1),
 205   NAMED_PARAM (vec_to_scalar_cost, 1),
 206   NAMED_PARAM (scalar_to_vec_cost, 1),
 207   NAMED_PARAM (vec_align_load_cost, 1),
 208   NAMED_PARAM (vec_unalign_load_cost, 1),
 209   NAMED_PARAM (vec_unalign_store_cost, 1),
 210   NAMED_PARAM (vec_store_cost, 1),
 211   NAMED_PARAM (cond_taken_branch_cost, 3),
 212   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 213 };
 214
 215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 216 __extension__
 217 #endif
 218 static const struct tune_params generic_tunings =
 219 {
 220   &generic_extra_costs,
 221   &generic_addrcost_table,
 222   &generic_regmove_cost,
 223   &generic_vector_cost,
 224   NAMED_PARAM (memmov_cost, 4)
 225 };
 226
 227 static const struct tune_params cortexa53_tunings =
 228 {
 229   &cortexa53_extra_costs,
 230   &generic_addrcost_table,
 231   &generic_regmove_cost,
 232   &generic_vector_cost,
 233   NAMED_PARAM (memmov_cost, 4)
 234 };
 235
 236 /* A processor implementing AArch64.  */
 237 struct processor
 238 {
 239   const char *const name;
 240   enum aarch64_processor core;
 241   const char *arch;
 242   const unsigned long flags;
 243   const struct tune_params *const tune;
 244 };
 245
 246 /* Processor cores implementing AArch64.  */
 247 static const struct processor all_cores[] =
 248 {
 249 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 250   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 251 #include "aarch64-cores.def"
 252 #undef AARCH64_CORE
 253   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 254   {NULL, aarch64_none, NULL, 0, NULL}
 255 };
 256
 257 /* Architectures implementing AArch64.  */
 258 static const struct processor all_architectures[] =
 259 {
 260 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 261   {NAME, CORE, #ARCH, FLAGS, NULL},
 262 #include "aarch64-arches.def"
 263 #undef AARCH64_ARCH
 264   {NULL, aarch64_none, NULL, 0, NULL}
 265 };
 266
 267 /* Target specification.  These are populated as commandline arguments
 268    are processed, or NULL if not specified.  */
 269 static const struct processor *selected_arch;
 270 static const struct processor *selected_cpu;
 271 static const struct processor *selected_tune;
 272
 273 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 274
 275 /* An ISA extension in the co-processor and main instruction set space.  */
 276 struct aarch64_option_extension
 277 {
 278   const char *const name;
 279   const unsigned long flags_on;
 280   const unsigned long flags_off;
 281 };
 282
 283 /* ISA extensions in AArch64.  */
 284 static const struct aarch64_option_extension all_extensions[] =
 285 {
 286 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 287   {NAME, FLAGS_ON, FLAGS_OFF},
 288 #include "aarch64-option-extensions.def"
 289 #undef AARCH64_OPT_EXTENSION
 290   {NULL, 0, 0}
 291 };
 292
 293 /* Used to track the size of an address when generating a pre/post
 294    increment address.  */
 295 static enum machine_mode aarch64_memory_reference_mode;
 296
 297 /* Used to force GTY into this file.  */
 298 static GTY(()) int gty_dummy;
 299
 300 /* A table of valid AArch64 "bitmask immediate" values for
 301    logical instructions.  */
 302
 303 #define AARCH64_NUM_BITMASKS  5334
 304 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 305
 306 /* Did we set flag_omit_frame_pointer just so
 307    aarch64_frame_pointer_required would be called? */
 308 static bool faked_omit_frame_pointer;
 309
 310 typedef enum aarch64_cond_code
 311 {
 312   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 313   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 314   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 315 }
 316 aarch64_cc;
 317
 318 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 319
 320 /* The condition codes of the processor, and the inverse function.  */
 321 static const char * const aarch64_condition_codes[] =
 322 {
 323   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 324   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 325 };
 326
 327 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 328 unsigned
 329 aarch64_dbx_register_number (unsigned regno)
 330 {
 331    if (GP_REGNUM_P (regno))
 332      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 333    else if (regno == SP_REGNUM)
 334      return AARCH64_DWARF_SP;
 335    else if (FP_REGNUM_P (regno))
 336      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 337
 338    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 339       equivalent DWARF register.  */
 340    return DWARF_FRAME_REGISTERS;
 341 }
 342
 343 /* Return TRUE if MODE is any of the large INT modes.  */
 344 static bool
 345 aarch64_vect_struct_mode_p (enum machine_mode mode)
 346 {
 347   return mode == OImode || mode == CImode || mode == XImode;
 348 }
 349
 350 /* Return TRUE if MODE is any of the vector modes.  */
 351 static bool
 352 aarch64_vector_mode_p (enum machine_mode mode)
 353 {
 354   return aarch64_vector_mode_supported_p (mode)
 355          || aarch64_vect_struct_mode_p (mode);
 356 }
 357
 358 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 359 static bool
 360 aarch64_array_mode_supported_p (enum machine_mode mode,
 361                                 unsigned HOST_WIDE_INT nelems)
 362 {
 363   if (TARGET_SIMD
 364       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 365       && (nelems >= 2 && nelems <= 4))
 366     return true;
 367
 368   return false;
 369 }
 370
 371 /* Implement HARD_REGNO_NREGS.  */
 372
 373 int
 374 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 375 {
 376   switch (aarch64_regno_regclass (regno))
 377     {
 378     case FP_REGS:
 379     case FP_LO_REGS:
 380       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 381     default:
 382       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 383     }
 384   gcc_unreachable ();
 385 }
 386
 387 /* Implement HARD_REGNO_MODE_OK.  */
 388
 389 int
 390 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 391 {
 392   if (GET_MODE_CLASS (mode) == MODE_CC)
 393     return regno == CC_REGNUM;
 394
 395   if (regno == SP_REGNUM)
 396     /* The purpose of comparing with ptr_mode is to support the
 397        global register variable associated with the stack pointer
 398        register via the syntax of asm ("wsp") in ILP32.  */
 399     return mode == Pmode || mode == ptr_mode;
 400
 401   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 402     return mode == Pmode;
 403
 404   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 405     return 1;
 406
 407   if (FP_REGNUM_P (regno))
 408     {
 409       if (aarch64_vect_struct_mode_p (mode))
 410         return
 411           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 412       else
 413         return 1;
 414     }
 415
 416   return 0;
 417 }
 418
 419 /* Return true if calls to DECL should be treated as
 420    long-calls (ie called via a register).  */
 421 static bool
 422 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 423 {
 424   return false;
 425 }
 426
 427 /* Return true if calls to symbol-ref SYM should be treated as
 428    long-calls (ie called via a register).  */
 429 bool
 430 aarch64_is_long_call_p (rtx sym)
 431 {
 432   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 433 }
 434
 435 /* Return true if the offsets to a zero/sign-extract operation
 436    represent an expression that matches an extend operation.  The
 437    operands represent the paramters from
 438
 439    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 440 bool
 441 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 442                                 rtx extract_imm)
 443 {
 444   HOST_WIDE_INT mult_val, extract_val;
 445
 446   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 447     return false;
 448
 449   mult_val = INTVAL (mult_imm);
 450   extract_val = INTVAL (extract_imm);
 451
 452   if (extract_val > 8
 453       && extract_val < GET_MODE_BITSIZE (mode)
 454       && exact_log2 (extract_val & ~7) > 0
 455       && (extract_val & 7) <= 4
 456       && mult_val == (1 << (extract_val & 7)))
 457     return true;
 458
 459   return false;
 460 }
 461
 462 /* Emit an insn that's a simple single-set.  Both the operands must be
 463    known to be valid.  */
 464 inline static rtx
 465 emit_set_insn (rtx x, rtx y)
 466 {
 467   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 468 }
 469
 470 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 471    return the rtx for register 0 in the proper mode.  */
 472 rtx
 473 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 474 {
 475   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 476   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 477
 478   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 479   return cc_reg;
 480 }
 481
 482 /* Build the SYMBOL_REF for __tls_get_addr.  */
 483
 484 static GTY(()) rtx tls_get_addr_libfunc;
 485
 486 rtx
 487 aarch64_tls_get_addr (void)
 488 {
 489   if (!tls_get_addr_libfunc)
 490     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 491   return tls_get_addr_libfunc;
 492 }
 493
 494 /* Return the TLS model to use for ADDR.  */
 495
 496 static enum tls_model
 497 tls_symbolic_operand_type (rtx addr)
 498 {
 499   enum tls_model tls_kind = TLS_MODEL_NONE;
 500   rtx sym, addend;
 501
 502   if (GET_CODE (addr) == CONST)
 503     {
 504       split_const (addr, &sym, &addend);
 505       if (GET_CODE (sym) == SYMBOL_REF)
 506         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 507     }
 508   else if (GET_CODE (addr) == SYMBOL_REF)
 509     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 510
 511   return tls_kind;
 512 }
 513
 514 /* We'll allow lo_sum's in addresses in our legitimate addresses
 515    so that combine would take care of combining addresses where
 516    necessary, but for generation purposes, we'll generate the address
 517    as :
 518    RTL                               Absolute
 519    tmp = hi (symbol_ref);            adrp  x1, foo
 520    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 521                                      nop
 522
 523    PIC                               TLS
 524    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 525    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 526                                      bl   __tls_get_addr
 527                                      nop
 528
 529    Load TLS symbol, depending on TLS mechanism and TLS access model.
 530
 531    Global Dynamic - Traditional TLS:
 532    adrp tmp, :tlsgd:imm
 533    add  dest, tmp, #:tlsgd_lo12:imm
 534    bl   __tls_get_addr
 535
 536    Global Dynamic - TLS Descriptors:
 537    adrp dest, :tlsdesc:imm
 538    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 539    add  dest, dest, #:tlsdesc_lo12:imm
 540    blr  tmp
 541    mrs  tp, tpidr_el0
 542    add  dest, dest, tp
 543
 544    Initial Exec:
 545    mrs  tp, tpidr_el0
 546    adrp tmp, :gottprel:imm
 547    ldr  dest, [tmp, #:gottprel_lo12:imm]
 548    add  dest, dest, tp
 549
 550    Local Exec:
 551    mrs  tp, tpidr_el0
 552    add  t0, tp, #:tprel_hi12:imm
 553    add  t0, #:tprel_lo12_nc:imm
 554 */
 555
 556 static void
 557 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 558                                    enum aarch64_symbol_type type)
 559 {
 560   switch (type)
 561     {
 562     case SYMBOL_SMALL_ABSOLUTE:
 563       {
 564         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 565         rtx tmp_reg = dest;
 566         enum machine_mode mode = GET_MODE (dest);
 567
 568         gcc_assert (mode == Pmode || mode == ptr_mode);
 569
 570         if (can_create_pseudo_p ())
 571           tmp_reg = gen_reg_rtx (mode);
 572
 573         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 574         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 575         return;
 576       }
 577
 578     case SYMBOL_TINY_ABSOLUTE:
 579       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 580       return;
 581
 582     case SYMBOL_SMALL_GOT:
 583       {
 584         /* In ILP32, the mode of dest can be either SImode or DImode,
 585            while the got entry is always of SImode size.  The mode of
 586            dest depends on how dest is used: if dest is assigned to a
 587            pointer (e.g. in the memory), it has SImode; it may have
 588            DImode if dest is dereferenced to access the memeory.
 589            This is why we have to handle three different ldr_got_small
 590            patterns here (two patterns for ILP32).  */
 591         rtx tmp_reg = dest;
 592         enum machine_mode mode = GET_MODE (dest);
 593
 594         if (can_create_pseudo_p ())
 595           tmp_reg = gen_reg_rtx (mode);
 596
 597         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 598         if (mode == ptr_mode)
 599           {
 600             if (mode == DImode)
 601               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 602             else
 603               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 604           }
 605         else
 606           {
 607             gcc_assert (mode == Pmode);
 608             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 609           }
 610
 611         return;
 612       }
 613
 614     case SYMBOL_SMALL_TLSGD:
 615       {
 616         rtx insns;
 617         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 618
 619         start_sequence ();
 620         emit_call_insn (gen_tlsgd_small (result, imm));
 621         insns = get_insns ();
 622         end_sequence ();
 623
 624         RTL_CONST_CALL_P (insns) = 1;
 625         emit_libcall_block (insns, dest, result, imm);
 626         return;
 627       }
 628
 629     case SYMBOL_SMALL_TLSDESC:
 630       {
 631         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 632         rtx tp;
 633
 634         emit_insn (gen_tlsdesc_small (imm));
 635         tp = aarch64_load_tp (NULL);
 636         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 637         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 638         return;
 639       }
 640
 641     case SYMBOL_SMALL_GOTTPREL:
 642       {
 643         rtx tmp_reg = gen_reg_rtx (Pmode);
 644         rtx tp = aarch64_load_tp (NULL);
 645         emit_insn (gen_tlsie_small (tmp_reg, imm));
 646         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 647         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 648         return;
 649       }
 650
 651     case SYMBOL_SMALL_TPREL:
 652       {
 653         rtx tp = aarch64_load_tp (NULL);
 654         emit_insn (gen_tlsle_small (dest, tp, imm));
 655         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 656         return;
 657       }
 658
 659     case SYMBOL_TINY_GOT:
 660       emit_insn (gen_ldr_got_tiny (dest, imm));
 661       return;
 662
 663     default:
 664       gcc_unreachable ();
 665     }
 666 }
 667
 668 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 669    handle all moves if !can_create_pseudo_p ().  The distinction is
 670    important because, unlike emit_move_insn, the move expanders know
 671    how to force Pmode objects into the constant pool even when the
 672    constant pool address is not itself legitimate.  */
 673 static rtx
 674 aarch64_emit_move (rtx dest, rtx src)
 675 {
 676   return (can_create_pseudo_p ()
 677           ? emit_move_insn (dest, src)
 678           : emit_move_insn_1 (dest, src));
 679 }
 680
 681 void
 682 aarch64_split_128bit_move (rtx dst, rtx src)
 683 {
 684   rtx low_dst;
 685
 686   enum machine_mode src_mode = GET_MODE (src);
 687   enum machine_mode dst_mode = GET_MODE (dst);
 688   int src_regno = REGNO (src);
 689   int dst_regno = REGNO (dst);
 690
 691   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 692
 693   if (REG_P (dst) && REG_P (src))
 694     {
 695       gcc_assert (src_mode == TImode || src_mode == TFmode);
 696
 697       /* Handle r -> w, w -> r.  */
 698       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 699         {
 700           switch (src_mode) {
 701           case TImode:
 702             emit_insn
 703               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 704             emit_insn
 705               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 706             return;
 707           case TFmode:
 708             emit_insn
 709               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 710             emit_insn
 711               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 712             return;
 713           default:
 714             gcc_unreachable ();
 715           }
 716         }
 717       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 718         {
 719           switch (src_mode) {
 720           case TImode:
 721             emit_insn
 722               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 723             emit_insn
 724               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 725             return;
 726           case TFmode:
 727             emit_insn
 728               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 729             emit_insn
 730               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 731             return;
 732           default:
 733             gcc_unreachable ();
 734           }
 735         }
 736       /* Fall through to r -> r cases.  */
 737     }
 738
 739   switch (dst_mode) {
 740   case TImode:
 741     low_dst = gen_lowpart (word_mode, dst);
 742     if (REG_P (low_dst)
 743         && reg_overlap_mentioned_p (low_dst, src))
 744       {
 745         aarch64_emit_move (gen_highpart (word_mode, dst),
 746                            gen_highpart_mode (word_mode, TImode, src));
 747         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 748       }
 749     else
 750       {
 751         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 752         aarch64_emit_move (gen_highpart (word_mode, dst),
 753                            gen_highpart_mode (word_mode, TImode, src));
 754       }
 755     return;
 756   case TFmode:
 757     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 758                     gen_rtx_REG (DFmode, src_regno));
 759     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 760                     gen_rtx_REG (DFmode, src_regno + 1));
 761     return;
 762   default:
 763     gcc_unreachable ();
 764   }
 765 }
 766
 767 bool
 768 aarch64_split_128bit_move_p (rtx dst, rtx src)
 769 {
 770   return (! REG_P (src)
 771           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 772 }
 773
 774 /* Split a complex SIMD combine.  */
 775
 776 void
 777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 778 {
 779   enum machine_mode src_mode = GET_MODE (src1);
 780   enum machine_mode dst_mode = GET_MODE (dst);
 781
 782   gcc_assert (VECTOR_MODE_P (dst_mode));
 783
 784   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 785     {
 786       rtx (*gen) (rtx, rtx, rtx);
 787
 788       switch (src_mode)
 789         {
 790         case V8QImode:
 791           gen = gen_aarch64_simd_combinev8qi;
 792           break;
 793         case V4HImode:
 794           gen = gen_aarch64_simd_combinev4hi;
 795           break;
 796         case V2SImode:
 797           gen = gen_aarch64_simd_combinev2si;
 798           break;
 799         case V2SFmode:
 800           gen = gen_aarch64_simd_combinev2sf;
 801           break;
 802         case DImode:
 803           gen = gen_aarch64_simd_combinedi;
 804           break;
 805         case DFmode:
 806           gen = gen_aarch64_simd_combinedf;
 807           break;
 808         default:
 809           gcc_unreachable ();
 810         }
 811
 812       emit_insn (gen (dst, src1, src2));
 813       return;
 814     }
 815 }
 816
 817 /* Split a complex SIMD move.  */
 818
 819 void
 820 aarch64_split_simd_move (rtx dst, rtx src)
 821 {
 822   enum machine_mode src_mode = GET_MODE (src);
 823   enum machine_mode dst_mode = GET_MODE (dst);
 824
 825   gcc_assert (VECTOR_MODE_P (dst_mode));
 826
 827   if (REG_P (dst) && REG_P (src))
 828     {
 829       rtx (*gen) (rtx, rtx);
 830
 831       gcc_assert (VECTOR_MODE_P (src_mode));
 832
 833       switch (src_mode)
 834         {
 835         case V16QImode:
 836           gen = gen_aarch64_split_simd_movv16qi;
 837           break;
 838         case V8HImode:
 839           gen = gen_aarch64_split_simd_movv8hi;
 840           break;
 841         case V4SImode:
 842           gen = gen_aarch64_split_simd_movv4si;
 843           break;
 844         case V2DImode:
 845           gen = gen_aarch64_split_simd_movv2di;
 846           break;
 847         case V4SFmode:
 848           gen = gen_aarch64_split_simd_movv4sf;
 849           break;
 850         case V2DFmode:
 851           gen = gen_aarch64_split_simd_movv2df;
 852           break;
 853         default:
 854           gcc_unreachable ();
 855         }
 856
 857       emit_insn (gen (dst, src));
 858       return;
 859     }
 860 }
 861
 862 static rtx
 863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 864 {
 865   if (can_create_pseudo_p ())
 866     return force_reg (mode, value);
 867   else
 868     {
 869       x = aarch64_emit_move (x, value);
 870       return x;
 871     }
 872 }
 873
 874
 875 static rtx
 876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 877 {
 878   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 879     {
 880       rtx high;
 881       /* Load the full offset into a register.  This
 882          might be improvable in the future.  */
 883       high = GEN_INT (offset);
 884       offset = 0;
 885       high = aarch64_force_temporary (mode, temp, high);
 886       reg = aarch64_force_temporary (mode, temp,
 887                                      gen_rtx_PLUS (mode, high, reg));
 888     }
 889   return plus_constant (mode, reg, offset);
 890 }
 891
 892 void
 893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 894 {
 895   enum machine_mode mode = GET_MODE (dest);
 896   unsigned HOST_WIDE_INT mask;
 897   int i;
 898   bool first;
 899   unsigned HOST_WIDE_INT val;
 900   bool subtargets;
 901   rtx subtarget;
 902   int one_match, zero_match;
 903
 904   gcc_assert (mode == SImode || mode == DImode);
 905
 906   /* Check on what type of symbol it is.  */
 907   if (GET_CODE (imm) == SYMBOL_REF
 908       || GET_CODE (imm) == LABEL_REF
 909       || GET_CODE (imm) == CONST)
 910     {
 911       rtx mem, base, offset;
 912       enum aarch64_symbol_type sty;
 913
 914       /* If we have (const (plus symbol offset)), separate out the offset
 915          before we start classifying the symbol.  */
 916       split_const (imm, &base, &offset);
 917
 918       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 919       switch (sty)
 920         {
 921         case SYMBOL_FORCE_TO_MEM:
 922           if (offset != const0_rtx
 923               && targetm.cannot_force_const_mem (mode, imm))
 924             {
 925               gcc_assert(can_create_pseudo_p ());
 926               base = aarch64_force_temporary (mode, dest, base);
 927               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 928               aarch64_emit_move (dest, base);
 929               return;
 930             }
 931           mem = force_const_mem (ptr_mode, imm);
 932           gcc_assert (mem);
 933           if (mode != ptr_mode)
 934             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 935           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 936           return;
 937
 938         case SYMBOL_SMALL_TLSGD:
 939         case SYMBOL_SMALL_TLSDESC:
 940         case SYMBOL_SMALL_GOTTPREL:
 941         case SYMBOL_SMALL_GOT:
 942         case SYMBOL_TINY_GOT:
 943           if (offset != const0_rtx)
 944             {
 945               gcc_assert(can_create_pseudo_p ());
 946               base = aarch64_force_temporary (mode, dest, base);
 947               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 948               aarch64_emit_move (dest, base);
 949               return;
 950             }
 951           /* FALLTHRU */
 952
 953         case SYMBOL_SMALL_TPREL:
 954         case SYMBOL_SMALL_ABSOLUTE:
 955         case SYMBOL_TINY_ABSOLUTE:
 956           aarch64_load_symref_appropriately (dest, imm, sty);
 957           return;
 958
 959         default:
 960           gcc_unreachable ();
 961         }
 962     }
 963
 964   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 965     {
 966       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 967       return;
 968     }
 969
 970   if (!CONST_INT_P (imm))
 971     {
 972       if (GET_CODE (imm) == HIGH)
 973         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 974       else
 975         {
 976           rtx mem = force_const_mem (mode, imm);
 977           gcc_assert (mem);
 978           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 979         }
 980
 981       return;
 982     }
 983
 984   if (mode == SImode)
 985     {
 986       /* We know we can't do this in 1 insn, and we must be able to do it
 987          in two; so don't mess around looking for sequences that don't buy
 988          us anything.  */
 989       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 990       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 991                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 992       return;
 993     }
 994
 995   /* Remaining cases are all for DImode.  */
 996
 997   val = INTVAL (imm);
 998   subtargets = optimize && can_create_pseudo_p ();
 999
1000   one_match = 0;
1001   zero_match = 0;
1002   mask = 0xffff;
1003
1004   for (i = 0; i < 64; i += 16, mask <<= 16)
1005     {
1006       if ((val & mask) == 0)
1007         zero_match++;
1008       else if ((val & mask) == mask)
1009         one_match++;
1010     }
1011
1012   if (one_match == 2)
1013     {
1014       mask = 0xffff;
1015       for (i = 0; i < 64; i += 16, mask <<= 16)
1016         {
1017           if ((val & mask) != mask)
1018             {
1019               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021                                          GEN_INT ((val >> i) & 0xffff)));
1022               return;
1023             }
1024         }
1025       gcc_unreachable ();
1026     }
1027
1028   if (zero_match == 2)
1029     goto simple_sequence;
1030
1031   mask = 0x0ffff0000UL;
1032   for (i = 16; i < 64; i += 16, mask <<= 16)
1033     {
1034       HOST_WIDE_INT comp = mask & ~(mask - 1);
1035
1036       if (aarch64_uimm12_shift (val - (val & mask)))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041           emit_insn (gen_adddi3 (dest, subtarget,
1042                                  GEN_INT (val - (val & mask))));
1043           return;
1044         }
1045       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1046         {
1047           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050                                   GEN_INT ((val + comp) & mask)));
1051           emit_insn (gen_adddi3 (dest, subtarget,
1052                                  GEN_INT (val - ((val + comp) & mask))));
1053           return;
1054         }
1055       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1056         {
1057           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060                                   GEN_INT ((val - comp) | ~mask)));
1061           emit_insn (gen_adddi3 (dest, subtarget,
1062                                  GEN_INT (val - ((val - comp) | ~mask))));
1063           return;
1064         }
1065       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1066         {
1067           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1068
1069           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070                                   GEN_INT (val | ~mask)));
1071           emit_insn (gen_adddi3 (dest, subtarget,
1072                                  GEN_INT (val - (val | ~mask))));
1073           return;
1074         }
1075     }
1076
1077   /* See if we can do it by arithmetically combining two
1078      immediates.  */
1079   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1080     {
1081       int j;
1082       mask = 0xffff;
1083
1084       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1086         {
1087           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089                                   GEN_INT (aarch64_bitmasks[i])));
1090           emit_insn (gen_adddi3 (dest, subtarget,
1091                                  GEN_INT (val - aarch64_bitmasks[i])));
1092           return;
1093         }
1094
1095       for (j = 0; j < 64; j += 16, mask <<= 16)
1096         {
1097           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1098             {
1099               emit_insn (gen_rtx_SET (VOIDmode, dest,
1100                                       GEN_INT (aarch64_bitmasks[i])));
1101               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102                                          GEN_INT ((val >> j) & 0xffff)));
1103               return;
1104             }
1105         }
1106     }
1107
1108   /* See if we can do it by logically combining two immediates.  */
1109   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1110     {
1111       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1112         {
1113           int j;
1114
1115           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1117               {
1118                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120                                         GEN_INT (aarch64_bitmasks[i])));
1121                 emit_insn (gen_iordi3 (dest, subtarget,
1122                                        GEN_INT (aarch64_bitmasks[j])));
1123                 return;
1124               }
1125         }
1126       else if ((val & aarch64_bitmasks[i]) == val)
1127         {
1128           int j;
1129
1130           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1132               {
1133
1134                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136                                         GEN_INT (aarch64_bitmasks[j])));
1137                 emit_insn (gen_anddi3 (dest, subtarget,
1138                                        GEN_INT (aarch64_bitmasks[i])));
1139                 return;
1140               }
1141         }
1142     }
1143
1144  simple_sequence:
1145   first = true;
1146   mask = 0xffff;
1147   for (i = 0; i < 64; i += 16, mask <<= 16)
1148     {
1149       if ((val & mask) != 0)
1150         {
1151           if (first)
1152             {
1153               emit_insn (gen_rtx_SET (VOIDmode, dest,
1154                                       GEN_INT (val & mask)));
1155               first = false;
1156             }
1157           else
1158             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159                                        GEN_INT ((val >> i) & 0xffff)));
1160         }
1161     }
1162 }
1163
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1166 {
1167   /* Indirect calls are not currently supported.  */
1168   if (decl == NULL)
1169     return false;
1170
1171   /* Cannot tail-call to long-calls, since these are outside of the
1172      range of a branch instruction (we could handle this if we added
1173      support for indirect tail-calls.  */
1174   if (aarch64_decl_is_long_call_p (decl))
1175     return false;
1176
1177   return true;
1178 }
1179
1180 /* Implement TARGET_PASS_BY_REFERENCE.  */
1181
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184                            enum machine_mode mode,
1185                            const_tree type,
1186                            bool named ATTRIBUTE_UNUSED)
1187 {
1188   HOST_WIDE_INT size;
1189   enum machine_mode dummymode;
1190   int nregs;
1191
1192   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1193   size = (mode == BLKmode && type)
1194     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1195
1196   if (type)
1197     {
1198       /* Arrays always passed by reference.  */
1199       if (TREE_CODE (type) == ARRAY_TYPE)
1200         return true;
1201       /* Other aggregates based on their size.  */
1202       if (AGGREGATE_TYPE_P (type))
1203         size = int_size_in_bytes (type);
1204     }
1205
1206   /* Variable sized arguments are always returned by reference.  */
1207   if (size < 0)
1208     return true;
1209
1210   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1211   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1212                                                &dummymode, &nregs,
1213                                                NULL))
1214     return false;
1215
1216   /* Arguments which are variable sized or larger than 2 registers are
1217      passed by reference unless they are a homogenous floating point
1218      aggregate.  */
1219   return size > 2 * UNITS_PER_WORD;
1220 }
1221
1222 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1223 static bool
1224 aarch64_return_in_msb (const_tree valtype)
1225 {
1226   enum machine_mode dummy_mode;
1227   int dummy_int;
1228
1229   /* Never happens in little-endian mode.  */
1230   if (!BYTES_BIG_ENDIAN)
1231     return false;
1232
1233   /* Only composite types smaller than or equal to 16 bytes can
1234      be potentially returned in registers.  */
1235   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1236       || int_size_in_bytes (valtype) <= 0
1237       || int_size_in_bytes (valtype) > 16)
1238     return false;
1239
1240   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1241      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1242      is always passed/returned in the least significant bits of fp/simd
1243      register(s).  */
1244   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1245                                                &dummy_mode, &dummy_int, NULL))
1246     return false;
1247
1248   return true;
1249 }
1250
1251 /* Implement TARGET_FUNCTION_VALUE.
1252    Define how to find the value returned by a function.  */
1253
1254 static rtx
1255 aarch64_function_value (const_tree type, const_tree func,
1256                         bool outgoing ATTRIBUTE_UNUSED)
1257 {
1258   enum machine_mode mode;
1259   int unsignedp;
1260   int count;
1261   enum machine_mode ag_mode;
1262
1263   mode = TYPE_MODE (type);
1264   if (INTEGRAL_TYPE_P (type))
1265     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1266
1267   if (aarch64_return_in_msb (type))
1268     {
1269       HOST_WIDE_INT size = int_size_in_bytes (type);
1270
1271       if (size % UNITS_PER_WORD != 0)
1272         {
1273           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1274           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1275         }
1276     }
1277
1278   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1279                                                &ag_mode, &count, NULL))
1280     {
1281       if (!aarch64_composite_type_p (type, mode))
1282         {
1283           gcc_assert (count == 1 && mode == ag_mode);
1284           return gen_rtx_REG (mode, V0_REGNUM);
1285         }
1286       else
1287         {
1288           int i;
1289           rtx par;
1290
1291           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1292           for (i = 0; i < count; i++)
1293             {
1294               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1295               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1296                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1297               XVECEXP (par, 0, i) = tmp;
1298             }
1299           return par;
1300         }
1301     }
1302   else
1303     return gen_rtx_REG (mode, R0_REGNUM);
1304 }
1305
1306 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1307    Return true if REGNO is the number of a hard register in which the values
1308    of called function may come back.  */
1309
1310 static bool
1311 aarch64_function_value_regno_p (const unsigned int regno)
1312 {
1313   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1314      of 16-byte return values are: 128-bit integers and 16-byte small
1315      structures (excluding homogeneous floating-point aggregates).  */
1316   if (regno == R0_REGNUM || regno == R1_REGNUM)
1317     return true;
1318
1319   /* Up to four fp/simd registers can return a function value, e.g. a
1320      homogeneous floating-point aggregate having four members.  */
1321   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1322     return !TARGET_GENERAL_REGS_ONLY;
1323
1324   return false;
1325 }
1326
1327 /* Implement TARGET_RETURN_IN_MEMORY.
1328
1329    If the type T of the result of a function is such that
1330      void func (T arg)
1331    would require that arg be passed as a value in a register (or set of
1332    registers) according to the parameter passing rules, then the result
1333    is returned in the same registers as would be used for such an
1334    argument.  */
1335
1336 static bool
1337 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1338 {
1339   HOST_WIDE_INT size;
1340   enum machine_mode ag_mode;
1341   int count;
1342
1343   if (!AGGREGATE_TYPE_P (type)
1344       && TREE_CODE (type) != COMPLEX_TYPE
1345       && TREE_CODE (type) != VECTOR_TYPE)
1346     /* Simple scalar types always returned in registers.  */
1347     return false;
1348
1349   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1350                                                type,
1351                                                &ag_mode,
1352                                                &count,
1353                                                NULL))
1354     return false;
1355
1356   /* Types larger than 2 registers returned in memory.  */
1357   size = int_size_in_bytes (type);
1358   return (size < 0 || size > 2 * UNITS_PER_WORD);
1359 }
1360
1361 static bool
1362 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1363                                const_tree type, int *nregs)
1364 {
1365   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1366   return aarch64_vfp_is_call_or_return_candidate (mode,
1367                                                   type,
1368                                                   &pcum->aapcs_vfp_rmode,
1369                                                   nregs,
1370                                                   NULL);
1371 }
1372
1373 /* Given MODE and TYPE of a function argument, return the alignment in
1374    bits.  The idea is to suppress any stronger alignment requested by
1375    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1376    This is a helper function for local use only.  */
1377
1378 static unsigned int
1379 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1380 {
1381   unsigned int alignment;
1382
1383   if (type)
1384     {
1385       if (!integer_zerop (TYPE_SIZE (type)))
1386         {
1387           if (TYPE_MODE (type) == mode)
1388             alignment = TYPE_ALIGN (type);
1389           else
1390             alignment = GET_MODE_ALIGNMENT (mode);
1391         }
1392       else
1393         alignment = 0;
1394     }
1395   else
1396     alignment = GET_MODE_ALIGNMENT (mode);
1397
1398   return alignment;
1399 }
1400
1401 /* Layout a function argument according to the AAPCS64 rules.  The rule
1402    numbers refer to the rule numbers in the AAPCS64.  */
1403
1404 static void
1405 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1406                     const_tree type,
1407                     bool named ATTRIBUTE_UNUSED)
1408 {
1409   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1410   int ncrn, nvrn, nregs;
1411   bool allocate_ncrn, allocate_nvrn;
1412
1413   /* We need to do this once per argument.  */
1414   if (pcum->aapcs_arg_processed)
1415     return;
1416
1417   pcum->aapcs_arg_processed = true;
1418
1419   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1420   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1421                                                  mode,
1422                                                  type,
1423                                                  &nregs);
1424
1425   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1426      The following code thus handles passing by SIMD/FP registers first.  */
1427
1428   nvrn = pcum->aapcs_nvrn;
1429
1430   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1431      and homogenous short-vector aggregates (HVA).  */
1432   if (allocate_nvrn)
1433     {
1434       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1435         {
1436           pcum->aapcs_nextnvrn = nvrn + nregs;
1437           if (!aarch64_composite_type_p (type, mode))
1438             {
1439               gcc_assert (nregs == 1);
1440               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1441             }
1442           else
1443             {
1444               rtx par;
1445               int i;
1446               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1447               for (i = 0; i < nregs; i++)
1448                 {
1449                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1450                                          V0_REGNUM + nvrn + i);
1451                   tmp = gen_rtx_EXPR_LIST
1452                     (VOIDmode, tmp,
1453                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1454                   XVECEXP (par, 0, i) = tmp;
1455                 }
1456               pcum->aapcs_reg = par;
1457             }
1458           return;
1459         }
1460       else
1461         {
1462           /* C.3 NSRN is set to 8.  */
1463           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1464           goto on_stack;
1465         }
1466     }
1467
1468   ncrn = pcum->aapcs_ncrn;
1469   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1470            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1471
1472
1473   /* C6 - C9.  though the sign and zero extension semantics are
1474      handled elsewhere.  This is the case where the argument fits
1475      entirely general registers.  */
1476   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1477     {
1478       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1479
1480       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1481
1482       /* C.8 if the argument has an alignment of 16 then the NGRN is
1483          rounded up to the next even number.  */
1484       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1485         {
1486           ++ncrn;
1487           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1488         }
1489       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1490          A reg is still generated for it, but the caller should be smart
1491          enough not to use it.  */
1492       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1493         {
1494           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1495         }
1496       else
1497         {
1498           rtx par;
1499           int i;
1500
1501           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1502           for (i = 0; i < nregs; i++)
1503             {
1504               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1505               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1506                                        GEN_INT (i * UNITS_PER_WORD));
1507               XVECEXP (par, 0, i) = tmp;
1508             }
1509           pcum->aapcs_reg = par;
1510         }
1511
1512       pcum->aapcs_nextncrn = ncrn + nregs;
1513       return;
1514     }
1515
1516   /* C.11  */
1517   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1518
1519   /* The argument is passed on stack; record the needed number of words for
1520      this argument (we can re-use NREGS) and align the total size if
1521      necessary.  */
1522 on_stack:
1523   pcum->aapcs_stack_words = nregs;
1524   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1525     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1526                                                16 / UNITS_PER_WORD) + 1;
1527   return;
1528 }
1529
1530 /* Implement TARGET_FUNCTION_ARG.  */
1531
1532 static rtx
1533 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1534                       const_tree type, bool named)
1535 {
1536   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1538
1539   if (mode == VOIDmode)
1540     return NULL_RTX;
1541
1542   aarch64_layout_arg (pcum_v, mode, type, named);
1543   return pcum->aapcs_reg;
1544 }
1545
1546 void
1547 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1548                            const_tree fntype ATTRIBUTE_UNUSED,
1549                            rtx libname ATTRIBUTE_UNUSED,
1550                            const_tree fndecl ATTRIBUTE_UNUSED,
1551                            unsigned n_named ATTRIBUTE_UNUSED)
1552 {
1553   pcum->aapcs_ncrn = 0;
1554   pcum->aapcs_nvrn = 0;
1555   pcum->aapcs_nextncrn = 0;
1556   pcum->aapcs_nextnvrn = 0;
1557   pcum->pcs_variant = ARM_PCS_AAPCS64;
1558   pcum->aapcs_reg = NULL_RTX;
1559   pcum->aapcs_arg_processed = false;
1560   pcum->aapcs_stack_words = 0;
1561   pcum->aapcs_stack_size = 0;
1562
1563   return;
1564 }
1565
1566 static void
1567 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1568                               enum machine_mode mode,
1569                               const_tree type,
1570                               bool named)
1571 {
1572   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1573   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1574     {
1575       aarch64_layout_arg (pcum_v, mode, type, named);
1576       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1577                   != (pcum->aapcs_stack_words != 0));
1578       pcum->aapcs_arg_processed = false;
1579       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1580       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1581       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1582       pcum->aapcs_stack_words = 0;
1583       pcum->aapcs_reg = NULL_RTX;
1584     }
1585 }
1586
1587 bool
1588 aarch64_function_arg_regno_p (unsigned regno)
1589 {
1590   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1591           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1592 }
1593
1594 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1595    PARM_BOUNDARY bits of alignment, but will be given anything up
1596    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1597    that both before and after the layout of each argument, the Next
1598    Stacked Argument Address (NSAA) will have a minimum alignment of
1599    8 bytes.  */
1600
1601 static unsigned int
1602 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1603 {
1604   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1605
1606   if (alignment < PARM_BOUNDARY)
1607     alignment = PARM_BOUNDARY;
1608   if (alignment > STACK_BOUNDARY)
1609     alignment = STACK_BOUNDARY;
1610   return alignment;
1611 }
1612
1613 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1614
1615    Return true if an argument passed on the stack should be padded upwards,
1616    i.e. if the least-significant byte of the stack slot has useful data.
1617
1618    Small aggregate types are placed in the lowest memory address.
1619
1620    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1621
1622 bool
1623 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1624 {
1625   /* On little-endian targets, the least significant byte of every stack
1626      argument is passed at the lowest byte address of the stack slot.  */
1627   if (!BYTES_BIG_ENDIAN)
1628     return true;
1629
1630   /* Otherwise, integral, floating-point and pointer types are padded downward:
1631      the least significant byte of a stack argument is passed at the highest
1632      byte address of the stack slot.  */
1633   if (type
1634       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1635          || POINTER_TYPE_P (type))
1636       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1637     return false;
1638
1639   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1640   return true;
1641 }
1642
1643 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1644
1645    It specifies padding for the last (may also be the only)
1646    element of a block move between registers and memory.  If
1647    assuming the block is in the memory, padding upward means that
1648    the last element is padded after its highest significant byte,
1649    while in downward padding, the last element is padded at the
1650    its least significant byte side.
1651
1652    Small aggregates and small complex types are always padded
1653    upwards.
1654
1655    We don't need to worry about homogeneous floating-point or
1656    short-vector aggregates; their move is not affected by the
1657    padding direction determined here.  Regardless of endianness,
1658    each element of such an aggregate is put in the least
1659    significant bits of a fp/simd register.
1660
1661    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1662    register has useful data, and return the opposite if the most
1663    significant byte does.  */
1664
1665 bool
1666 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1667                      bool first ATTRIBUTE_UNUSED)
1668 {
1669
1670   /* Small composite types are always padded upward.  */
1671   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1672     {
1673       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1674                             : GET_MODE_SIZE (mode));
1675       if (size < 2 * UNITS_PER_WORD)
1676         return true;
1677     }
1678
1679   /* Otherwise, use the default padding.  */
1680   return !BYTES_BIG_ENDIAN;
1681 }
1682
1683 static enum machine_mode
1684 aarch64_libgcc_cmp_return_mode (void)
1685 {
1686   return SImode;
1687 }
1688
1689 static bool
1690 aarch64_frame_pointer_required (void)
1691 {
1692   /* If the function contains dynamic stack allocations, we need to
1693      use the frame pointer to access the static parts of the frame.  */
1694   if (cfun->calls_alloca)
1695     return true;
1696
1697   /* We may have turned flag_omit_frame_pointer on in order to have this
1698      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1699      and we'll check it here.
1700      If we really did set flag_omit_frame_pointer normally, then we return false
1701      (no frame pointer required) in all cases.  */
1702
1703   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1704     return false;
1705   else if (flag_omit_leaf_frame_pointer)
1706     return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
1707   return true;
1708 }
1709
1710 /* Mark the registers that need to be saved by the callee and calculate
1711    the size of the callee-saved registers area and frame record (both FP
1712    and LR may be omitted).  */
1713 static void
1714 aarch64_layout_frame (void)
1715 {
1716   HOST_WIDE_INT offset = 0;
1717   int regno;
1718
1719   if (reload_completed && cfun->machine->frame.laid_out)
1720     return;
1721
1722   cfun->machine->frame.fp_lr_offset = 0;
1723
1724   /* First mark all the registers that really need to be saved...  */
1725   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1726     cfun->machine->frame.reg_offset[regno] = -1;
1727
1728   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1729     cfun->machine->frame.reg_offset[regno] = -1;
1730
1731   /* ... that includes the eh data registers (if needed)...  */
1732   if (crtl->calls_eh_return)
1733     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1734       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1735
1736   /* ... and any callee saved register that dataflow says is live.  */
1737   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1738     if (df_regs_ever_live_p (regno)
1739         && !call_used_regs[regno])
1740       cfun->machine->frame.reg_offset[regno] = 0;
1741
1742   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1743     if (df_regs_ever_live_p (regno)
1744         && !call_used_regs[regno])
1745       cfun->machine->frame.reg_offset[regno] = 0;
1746
1747   if (frame_pointer_needed)
1748     {
1749       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1750       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1751       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1752     }
1753
1754   /* Now assign stack slots for them.  */
1755   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1756     if (cfun->machine->frame.reg_offset[regno] != -1)
1757       {
1758         cfun->machine->frame.reg_offset[regno] = offset;
1759         offset += UNITS_PER_WORD;
1760       }
1761
1762   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1763     if (cfun->machine->frame.reg_offset[regno] != -1)
1764       {
1765         cfun->machine->frame.reg_offset[regno] = offset;
1766         offset += UNITS_PER_WORD;
1767       }
1768
1769   if (frame_pointer_needed)
1770     {
1771       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1772       offset += UNITS_PER_WORD;
1773       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1774     }
1775
1776   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1777     {
1778       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1779       offset += UNITS_PER_WORD;
1780       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1781     }
1782
1783   cfun->machine->frame.padding0 =
1784     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1785   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1786
1787   cfun->machine->frame.saved_regs_size = offset;
1788   cfun->machine->frame.laid_out = true;
1789 }
1790
1791 /* Make the last instruction frame-related and note that it performs
1792    the operation described by FRAME_PATTERN.  */
1793
1794 static void
1795 aarch64_set_frame_expr (rtx frame_pattern)
1796 {
1797   rtx insn;
1798
1799   insn = get_last_insn ();
1800   RTX_FRAME_RELATED_P (insn) = 1;
1801   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1802   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1803                                       frame_pattern,
1804                                       REG_NOTES (insn));
1805 }
1806
1807 static bool
1808 aarch64_register_saved_on_entry (int regno)
1809 {
1810   return cfun->machine->frame.reg_offset[regno] != -1;
1811 }
1812
1813
1814 static void
1815 aarch64_save_or_restore_fprs (int start_offset, int increment,
1816                               bool restore, rtx base_rtx)
1817
1818 {
1819   unsigned regno;
1820   unsigned regno2;
1821   rtx insn;
1822   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1823     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1824
1825
1826   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827     {
1828       if (aarch64_register_saved_on_entry (regno))
1829         {
1830           rtx mem;
1831           mem = gen_mem_ref (DFmode,
1832                              plus_constant (Pmode,
1833                                             base_rtx,
1834                                             start_offset));
1835
1836           for (regno2 = regno + 1;
1837                regno2 <= V31_REGNUM
1838                  && !aarch64_register_saved_on_entry (regno2);
1839                regno2++)
1840             {
1841               /* Empty loop.  */
1842             }
1843           if (regno2 <= V31_REGNUM &&
1844               aarch64_register_saved_on_entry (regno2))
1845             {
1846               rtx mem2;
1847               /* Next highest register to be saved.  */
1848               mem2 = gen_mem_ref (DFmode,
1849                                   plus_constant
1850                                   (Pmode,
1851                                    base_rtx,
1852                                    start_offset + increment));
1853               if (restore == false)
1854                 {
1855                   insn = emit_insn
1856                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1857                                         mem2, gen_rtx_REG (DFmode, regno2)));
1858
1859                 }
1860               else
1861                 {
1862                   insn = emit_insn
1863                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1864                                        gen_rtx_REG (DFmode, regno2), mem2));
1865
1866                   add_reg_note (insn, REG_CFA_RESTORE,
1867                                 gen_rtx_REG (DFmode, regno));
1868                   add_reg_note (insn, REG_CFA_RESTORE,
1869                                 gen_rtx_REG (DFmode, regno2));
1870                 }
1871
1872                   /* The first part of a frame-related parallel insn
1873                      is always assumed to be relevant to the frame
1874                      calculations; subsequent parts, are only
1875                      frame-related if explicitly marked.  */
1876               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1877               regno = regno2;
1878               start_offset += increment * 2;
1879             }
1880           else
1881             {
1882               if (restore == false)
1883                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1884               else
1885                 {
1886                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1887                   add_reg_note (insn, REG_CFA_RESTORE,
1888                                 gen_rtx_REG (DImode, regno));
1889                 }
1890               start_offset += increment;
1891             }
1892           RTX_FRAME_RELATED_P (insn) = 1;
1893         }
1894     }
1895
1896 }
1897
1898
1899 /* offset from the stack pointer of where the saves and
1900    restore's have to happen.  */
1901 static void
1902 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1903                                             bool restore)
1904 {
1905   rtx insn;
1906   rtx base_rtx = stack_pointer_rtx;
1907   HOST_WIDE_INT start_offset = offset;
1908   HOST_WIDE_INT increment = UNITS_PER_WORD;
1909   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1910   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1911   unsigned regno;
1912   unsigned regno2;
1913
1914   for (regno = R0_REGNUM; regno <= limit; regno++)
1915     {
1916       if (aarch64_register_saved_on_entry (regno))
1917         {
1918           rtx mem;
1919           mem = gen_mem_ref (Pmode,
1920                              plus_constant (Pmode,
1921                                             base_rtx,
1922                                             start_offset));
1923
1924           for (regno2 = regno + 1;
1925                regno2 <= limit
1926                  && !aarch64_register_saved_on_entry (regno2);
1927                regno2++)
1928             {
1929               /* Empty loop.  */
1930             }
1931           if (regno2 <= limit &&
1932               aarch64_register_saved_on_entry (regno2))
1933             {
1934               rtx mem2;
1935               /* Next highest register to be saved.  */
1936               mem2 = gen_mem_ref (Pmode,
1937                                   plus_constant
1938                                   (Pmode,
1939                                    base_rtx,
1940                                    start_offset + increment));
1941               if (restore == false)
1942                 {
1943                   insn = emit_insn
1944                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1945                                         mem2, gen_rtx_REG (DImode, regno2)));
1946
1947                 }
1948               else
1949                 {
1950                   insn = emit_insn
1951                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1952                                      gen_rtx_REG (DImode, regno2), mem2));
1953
1954                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1955                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1956                 }
1957
1958                   /* The first part of a frame-related parallel insn
1959                      is always assumed to be relevant to the frame
1960                      calculations; subsequent parts, are only
1961                      frame-related if explicitly marked.  */
1962               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1963                                             1)) = 1;
1964               regno = regno2;
1965               start_offset += increment * 2;
1966             }
1967           else
1968             {
1969               if (restore == false)
1970                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1971               else
1972                 {
1973                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1974                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1975                 }
1976               start_offset += increment;
1977             }
1978           RTX_FRAME_RELATED_P (insn) = 1;
1979         }
1980     }
1981
1982   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1983
1984 }
1985
1986 /* AArch64 stack frames generated by this compiler look like:
1987
1988         +-------------------------------+
1989         |                               |
1990         |  incoming stack arguments     |
1991         |                               |
1992         +-------------------------------+ <-- arg_pointer_rtx
1993         |                               |
1994         |  callee-allocated save area   |
1995         |  for register varargs         |
1996         |                               |
1997         +-------------------------------+
1998         |                               |
1999         |  local variables              |
2000         |                               |
2001         +-------------------------------+ <-- frame_pointer_rtx
2002         |                               |
2003         |  callee-saved registers       |
2004         |                               |
2005         +-------------------------------+
2006         |  LR'                          |
2007         +-------------------------------+
2008         |  FP'                          |
2009       P +-------------------------------+ <-- hard_frame_pointer_rtx
2010         |  dynamic allocation           |
2011         +-------------------------------+
2012         |                               |
2013         |  outgoing stack arguments     |
2014         |                               |
2015         +-------------------------------+ <-- stack_pointer_rtx
2016
2017    Dynamic stack allocations such as alloca insert data at point P.
2018    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2019    hard_frame_pointer_rtx unchanged.  */
2020
2021 /* Generate the prologue instructions for entry into a function.
2022    Establish the stack frame by decreasing the stack pointer with a
2023    properly calculated size and, if necessary, create a frame record
2024    filled with the values of LR and previous frame pointer.  The
2025    current FP is also set up if it is in use.  */
2026
2027 void
2028 aarch64_expand_prologue (void)
2029 {
2030   /* sub sp, sp, #<frame_size>
2031      stp {fp, lr}, [sp, #<frame_size> - 16]
2032      add fp, sp, #<frame_size> - hardfp_offset
2033      stp {cs_reg}, [fp, #-16] etc.
2034
2035      sub sp, sp, <final_adjustment_if_any>
2036   */
2037   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2038   HOST_WIDE_INT frame_size, offset;
2039   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2040   rtx insn;
2041
2042   aarch64_layout_frame ();
2043   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2044   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2045               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2046   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2047                 + crtl->outgoing_args_size);
2048   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2049                                           STACK_BOUNDARY / BITS_PER_UNIT);
2050
2051   if (flag_stack_usage_info)
2052     current_function_static_stack_size = frame_size;
2053
2054   fp_offset = (offset
2055                - original_frame_size
2056                - cfun->machine->frame.saved_regs_size);
2057
2058   /* Store pairs and load pairs have a range only -512 to 504.  */
2059   if (offset >= 512)
2060     {
2061       /* When the frame has a large size, an initial decrease is done on
2062          the stack pointer to jump over the callee-allocated save area for
2063          register varargs, the local variable area and/or the callee-saved
2064          register area.  This will allow the pre-index write-back
2065          store pair instructions to be used for setting up the stack frame
2066          efficiently.  */
2067       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2068       if (offset >= 512)
2069         offset = cfun->machine->frame.saved_regs_size;
2070
2071       frame_size -= (offset + crtl->outgoing_args_size);
2072       fp_offset = 0;
2073
2074       if (frame_size >= 0x1000000)
2075         {
2076           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2077           emit_move_insn (op0, GEN_INT (-frame_size));
2078           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2079           aarch64_set_frame_expr (gen_rtx_SET
2080                                   (Pmode, stack_pointer_rtx,
2081                                    plus_constant (Pmode,
2082                                                   stack_pointer_rtx,
2083                                                   -frame_size)));
2084         }
2085       else if (frame_size > 0)
2086         {
2087           if ((frame_size & 0xfff) != frame_size)
2088             {
2089               insn = emit_insn (gen_add2_insn
2090                                 (stack_pointer_rtx,
2091                                  GEN_INT (-(frame_size
2092                                             & ~(HOST_WIDE_INT)0xfff))));
2093               RTX_FRAME_RELATED_P (insn) = 1;
2094             }
2095           if ((frame_size & 0xfff) != 0)
2096             {
2097               insn = emit_insn (gen_add2_insn
2098                                 (stack_pointer_rtx,
2099                                  GEN_INT (-(frame_size
2100                                             & (HOST_WIDE_INT)0xfff))));
2101               RTX_FRAME_RELATED_P (insn) = 1;
2102             }
2103         }
2104     }
2105   else
2106     frame_size = -1;
2107
2108   if (offset > 0)
2109     {
2110       /* Save the frame pointer and lr if the frame pointer is needed
2111          first.  Make the frame pointer point to the location of the
2112          old frame pointer on the stack.  */
2113       if (frame_pointer_needed)
2114         {
2115           rtx mem_fp, mem_lr;
2116
2117           if (fp_offset)
2118             {
2119               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2120                                                GEN_INT (-offset)));
2121               RTX_FRAME_RELATED_P (insn) = 1;
2122               aarch64_set_frame_expr (gen_rtx_SET
2123                                       (Pmode, stack_pointer_rtx,
2124                                        gen_rtx_MINUS (Pmode,
2125                                                       stack_pointer_rtx,
2126                                                       GEN_INT (offset))));
2127               mem_fp = gen_frame_mem (DImode,
2128                                       plus_constant (Pmode,
2129                                                      stack_pointer_rtx,
2130                                                      fp_offset));
2131               mem_lr = gen_frame_mem (DImode,
2132                                       plus_constant (Pmode,
2133                                                      stack_pointer_rtx,
2134                                                      fp_offset
2135                                                      + UNITS_PER_WORD));
2136               insn = emit_insn (gen_store_pairdi (mem_fp,
2137                                                   hard_frame_pointer_rtx,
2138                                                   mem_lr,
2139                                                   gen_rtx_REG (DImode,
2140                                                                LR_REGNUM)));
2141             }
2142           else
2143             {
2144               insn = emit_insn (gen_storewb_pairdi_di
2145                                 (stack_pointer_rtx, stack_pointer_rtx,
2146                                  hard_frame_pointer_rtx,
2147                                  gen_rtx_REG (DImode, LR_REGNUM),
2148                                  GEN_INT (-offset),
2149                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2150               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2151             }
2152
2153           /* The first part of a frame-related parallel insn is always
2154              assumed to be relevant to the frame calculations;
2155              subsequent parts, are only frame-related if explicitly
2156              marked.  */
2157           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2158           RTX_FRAME_RELATED_P (insn) = 1;
2159
2160           /* Set up frame pointer to point to the location of the
2161              previous frame pointer on the stack.  */
2162           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2163                                            stack_pointer_rtx,
2164                                            GEN_INT (fp_offset)));
2165           aarch64_set_frame_expr (gen_rtx_SET
2166                                   (Pmode, hard_frame_pointer_rtx,
2167                                    plus_constant (Pmode,
2168                                                   stack_pointer_rtx,
2169                                                   fp_offset)));
2170           RTX_FRAME_RELATED_P (insn) = 1;
2171           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2172                                            hard_frame_pointer_rtx));
2173         }
2174       else
2175         {
2176           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2177                                            GEN_INT (-offset)));
2178           RTX_FRAME_RELATED_P (insn) = 1;
2179         }
2180
2181       aarch64_save_or_restore_callee_save_registers
2182         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2183     }
2184
2185   /* when offset >= 512,
2186      sub sp, sp, #<outgoing_args_size> */
2187   if (frame_size > -1)
2188     {
2189       if (crtl->outgoing_args_size > 0)
2190         {
2191           insn = emit_insn (gen_add2_insn
2192                             (stack_pointer_rtx,
2193                              GEN_INT (- crtl->outgoing_args_size)));
2194           RTX_FRAME_RELATED_P (insn) = 1;
2195         }
2196     }
2197 }
2198
2199 /* Generate the epilogue instructions for returning from a function.  */
2200 void
2201 aarch64_expand_epilogue (bool for_sibcall)
2202 {
2203   HOST_WIDE_INT original_frame_size, frame_size, offset;
2204   HOST_WIDE_INT fp_offset;
2205   rtx insn;
2206   rtx cfa_reg;
2207
2208   aarch64_layout_frame ();
2209   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2210   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2211                 + crtl->outgoing_args_size);
2212   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2213                                           STACK_BOUNDARY / BITS_PER_UNIT);
2214
2215   fp_offset = (offset
2216                - original_frame_size
2217                - cfun->machine->frame.saved_regs_size);
2218
2219   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2220
2221   /* Store pairs and load pairs have a range only -512 to 504.  */
2222   if (offset >= 512)
2223     {
2224       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2225       if (offset >= 512)
2226         offset = cfun->machine->frame.saved_regs_size;
2227
2228       frame_size -= (offset + crtl->outgoing_args_size);
2229       fp_offset = 0;
2230       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2231         {
2232           insn = emit_insn (gen_add2_insn
2233                             (stack_pointer_rtx,
2234                              GEN_INT (crtl->outgoing_args_size)));
2235           RTX_FRAME_RELATED_P (insn) = 1;
2236         }
2237     }
2238   else
2239     frame_size = -1;
2240
2241   /* If there were outgoing arguments or we've done dynamic stack
2242      allocation, then restore the stack pointer from the frame
2243      pointer.  This is at most one insn and more efficient than using
2244      GCC's internal mechanism.  */
2245   if (frame_pointer_needed
2246       && (crtl->outgoing_args_size || cfun->calls_alloca))
2247     {
2248       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2249                                        hard_frame_pointer_rtx,
2250                                        GEN_INT (- fp_offset)));
2251       RTX_FRAME_RELATED_P (insn) = 1;
2252       /* As SP is set to (FP - fp_offset), according to the rules in
2253          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2254          from the value of SP from now on.  */
2255       cfa_reg = stack_pointer_rtx;
2256     }
2257
2258   aarch64_save_or_restore_callee_save_registers
2259     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2260
2261   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2262   if (offset > 0)
2263     {
2264       if (frame_pointer_needed)
2265         {
2266           rtx mem_fp, mem_lr;
2267
2268           if (fp_offset)
2269             {
2270               mem_fp = gen_frame_mem (DImode,
2271                                       plus_constant (Pmode,
2272                                                      stack_pointer_rtx,
2273                                                      fp_offset));
2274               mem_lr = gen_frame_mem (DImode,
2275                                       plus_constant (Pmode,
2276                                                      stack_pointer_rtx,
2277                                                      fp_offset
2278                                                      + UNITS_PER_WORD));
2279               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2280                                                  mem_fp,
2281                                                  gen_rtx_REG (DImode,
2282                                                               LR_REGNUM),
2283                                                  mem_lr));
2284             }
2285           else
2286             {
2287               insn = emit_insn (gen_loadwb_pairdi_di
2288                                 (stack_pointer_rtx,
2289                                  stack_pointer_rtx,
2290                                  hard_frame_pointer_rtx,
2291                                  gen_rtx_REG (DImode, LR_REGNUM),
2292                                  GEN_INT (offset),
2293                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2294               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2295               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2296                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2297                                           plus_constant (Pmode, cfa_reg,
2298                                                          offset))));
2299             }
2300
2301           /* The first part of a frame-related parallel insn
2302              is always assumed to be relevant to the frame
2303              calculations; subsequent parts, are only
2304              frame-related if explicitly marked.  */
2305           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2306           RTX_FRAME_RELATED_P (insn) = 1;
2307           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2308           add_reg_note (insn, REG_CFA_RESTORE,
2309                         gen_rtx_REG (DImode, LR_REGNUM));
2310
2311           if (fp_offset)
2312             {
2313               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2314                                                GEN_INT (offset)));
2315               RTX_FRAME_RELATED_P (insn) = 1;
2316             }
2317         }
2318       else
2319         {
2320           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2321                                            GEN_INT (offset)));
2322           RTX_FRAME_RELATED_P (insn) = 1;
2323         }
2324     }
2325
2326   /* Stack adjustment for exception handler.  */
2327   if (crtl->calls_eh_return)
2328     {
2329       /* We need to unwind the stack by the offset computed by
2330          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2331          based on SP.  Ideally we would update the SP and define the
2332          CFA along the lines of:
2333
2334          SP = SP + EH_RETURN_STACKADJ_RTX
2335          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2336
2337          However the dwarf emitter only understands a constant
2338          register offset.
2339
2340          The solution chosen here is to use the otherwise unused IP0
2341          as a temporary register to hold the current SP value.  The
2342          CFA is described using IP0 then SP is modified.  */
2343
2344       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2345
2346       insn = emit_move_insn (ip0, stack_pointer_rtx);
2347       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2348       RTX_FRAME_RELATED_P (insn) = 1;
2349
2350       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2351
2352       /* Ensure the assignment to IP0 does not get optimized away.  */
2353       emit_use (ip0);
2354     }
2355
2356   if (frame_size > -1)
2357     {
2358       if (frame_size >= 0x1000000)
2359         {
2360           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2361           emit_move_insn (op0, GEN_INT (frame_size));
2362           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2363           aarch64_set_frame_expr (gen_rtx_SET
2364                                   (Pmode, stack_pointer_rtx,
2365                                    plus_constant (Pmode,
2366                                                   stack_pointer_rtx,
2367                                                   frame_size)));
2368         }
2369       else if (frame_size > 0)
2370         {
2371           if ((frame_size & 0xfff) != 0)
2372             {
2373               insn = emit_insn (gen_add2_insn
2374                                 (stack_pointer_rtx,
2375                                  GEN_INT ((frame_size
2376                                            & (HOST_WIDE_INT) 0xfff))));
2377               RTX_FRAME_RELATED_P (insn) = 1;
2378             }
2379           if ((frame_size & 0xfff) != frame_size)
2380             {
2381               insn = emit_insn (gen_add2_insn
2382                                 (stack_pointer_rtx,
2383                                  GEN_INT ((frame_size
2384                                            & ~ (HOST_WIDE_INT) 0xfff))));
2385               RTX_FRAME_RELATED_P (insn) = 1;
2386             }
2387         }
2388
2389         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2390                                              plus_constant (Pmode,
2391                                                             stack_pointer_rtx,
2392                                                             offset)));
2393     }
2394
2395   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2396   if (!for_sibcall)
2397     emit_jump_insn (ret_rtx);
2398 }
2399
2400 /* Return the place to copy the exception unwinding return address to.
2401    This will probably be a stack slot, but could (in theory be the
2402    return register).  */
2403 rtx
2404 aarch64_final_eh_return_addr (void)
2405 {
2406   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2407   aarch64_layout_frame ();
2408   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2409   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2410                 + crtl->outgoing_args_size);
2411   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2412                                           STACK_BOUNDARY / BITS_PER_UNIT);
2413   fp_offset = offset
2414     - original_frame_size
2415     - cfun->machine->frame.saved_regs_size;
2416
2417   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2418     return gen_rtx_REG (DImode, LR_REGNUM);
2419
2420   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2421      result in a store to save LR introduced by builtin_eh_return () being
2422      incorrectly deleted because the alias is not detected.
2423      So in the calculation of the address to copy the exception unwinding
2424      return address to, we note 2 cases.
2425      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2426      we return a SP-relative location since all the addresses are SP-relative
2427      in this case.  This prevents the store from being optimized away.
2428      If the fp_offset is not 0, then the addresses will be FP-relative and
2429      therefore we return a FP-relative location.  */
2430
2431   if (frame_pointer_needed)
2432     {
2433       if (fp_offset)
2434         return gen_frame_mem (DImode,
2435                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2436       else
2437         return gen_frame_mem (DImode,
2438                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2439     }
2440
2441   /* If FP is not needed, we calculate the location of LR, which would be
2442      at the top of the saved registers block.  */
2443
2444   return gen_frame_mem (DImode,
2445                         plus_constant (Pmode,
2446                                        stack_pointer_rtx,
2447                                        fp_offset
2448                                        + cfun->machine->frame.saved_regs_size
2449                                        - 2 * UNITS_PER_WORD));
2450 }
2451
2452 /* Output code to build up a constant in a register.  */
2453 static void
2454 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2455 {
2456   if (aarch64_bitmask_imm (val, DImode))
2457     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2458   else
2459     {
2460       int i;
2461       int ncount = 0;
2462       int zcount = 0;
2463       HOST_WIDE_INT valp = val >> 16;
2464       HOST_WIDE_INT valm;
2465       HOST_WIDE_INT tval;
2466
2467       for (i = 16; i < 64; i += 16)
2468         {
2469           valm = (valp & 0xffff);
2470
2471           if (valm != 0)
2472             ++ zcount;
2473
2474           if (valm != 0xffff)
2475             ++ ncount;
2476
2477           valp >>= 16;
2478         }
2479
2480       /* zcount contains the number of additional MOVK instructions
2481          required if the constant is built up with an initial MOVZ instruction,
2482          while ncount is the number of MOVK instructions required if starting
2483          with a MOVN instruction.  Choose the sequence that yields the fewest
2484          number of instructions, preferring MOVZ instructions when they are both
2485          the same.  */
2486       if (ncount < zcount)
2487         {
2488           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2489                           GEN_INT ((~val) & 0xffff));
2490           tval = 0xffff;
2491         }
2492       else
2493         {
2494           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2495                           GEN_INT (val & 0xffff));
2496           tval = 0;
2497         }
2498
2499       val >>= 16;
2500
2501       for (i = 16; i < 64; i += 16)
2502         {
2503           if ((val & 0xffff) != tval)
2504             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2505                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2506           val >>= 16;
2507         }
2508     }
2509 }
2510
2511 static void
2512 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2513 {
2514   HOST_WIDE_INT mdelta = delta;
2515   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2516   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2517
2518   if (mdelta < 0)
2519     mdelta = -mdelta;
2520
2521   if (mdelta >= 4096 * 4096)
2522     {
2523       aarch64_build_constant (scratchreg, delta);
2524       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2525     }
2526   else if (mdelta > 0)
2527     {
2528       if (mdelta >= 4096)
2529         {
2530           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2531           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2532           if (delta < 0)
2533             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2535           else
2536             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2537                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2538         }
2539       if (mdelta % 4096 != 0)
2540         {
2541           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2542           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2543                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2544         }
2545     }
2546 }
2547
2548 /* Output code to add DELTA to the first argument, and then jump
2549    to FUNCTION.  Used for C++ multiple inheritance.  */
2550 static void
2551 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2552                          HOST_WIDE_INT delta,
2553                          HOST_WIDE_INT vcall_offset,
2554                          tree function)
2555 {
2556   /* The this pointer is always in x0.  Note that this differs from
2557      Arm where the this pointer maybe bumped to r1 if r0 is required
2558      to return a pointer to an aggregate.  On AArch64 a result value
2559      pointer will be in x8.  */
2560   int this_regno = R0_REGNUM;
2561   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2562
2563   reload_completed = 1;
2564   emit_note (NOTE_INSN_PROLOGUE_END);
2565
2566   if (vcall_offset == 0)
2567     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2568   else
2569     {
2570       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2571
2572       this_rtx = gen_rtx_REG (Pmode, this_regno);
2573       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2574       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2575
2576       addr = this_rtx;
2577       if (delta != 0)
2578         {
2579           if (delta >= -256 && delta < 256)
2580             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2581                                        plus_constant (Pmode, this_rtx, delta));
2582           else
2583             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2584         }
2585
2586       if (Pmode == ptr_mode)
2587         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2588       else
2589         aarch64_emit_move (temp0,
2590                            gen_rtx_ZERO_EXTEND (Pmode,
2591                                                 gen_rtx_MEM (ptr_mode, addr)));
2592
2593       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2594           addr = plus_constant (Pmode, temp0, vcall_offset);
2595       else
2596         {
2597           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2598           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2599         }
2600
2601       if (Pmode == ptr_mode)
2602         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2603       else
2604         aarch64_emit_move (temp1,
2605                            gen_rtx_SIGN_EXTEND (Pmode,
2606                                                 gen_rtx_MEM (ptr_mode, addr)));
2607
2608       emit_insn (gen_add2_insn (this_rtx, temp1));
2609     }
2610
2611   /* Generate a tail call to the target function.  */
2612   if (!TREE_USED (function))
2613     {
2614       assemble_external (function);
2615       TREE_USED (function) = 1;
2616     }
2617   funexp = XEXP (DECL_RTL (function), 0);
2618   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2619   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2620   SIBLING_CALL_P (insn) = 1;
2621
2622   insn = get_insns ();
2623   shorten_branches (insn);
2624   final_start_function (insn, file, 1);
2625   final (insn, file, 1);
2626   final_end_function ();
2627
2628   /* Stop pretending to be a post-reload pass.  */
2629   reload_completed = 0;
2630 }
2631
2632 static int
2633 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2634 {
2635   if (GET_CODE (*x) == SYMBOL_REF)
2636     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2637
2638   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2639      TLS offsets, not real symbol references.  */
2640   if (GET_CODE (*x) == UNSPEC
2641       && XINT (*x, 1) == UNSPEC_TLS)
2642     return -1;
2643
2644   return 0;
2645 }
2646
2647 static bool
2648 aarch64_tls_referenced_p (rtx x)
2649 {
2650   if (!TARGET_HAVE_TLS)
2651     return false;
2652
2653   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2654 }
2655
2656
2657 static int
2658 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2659 {
2660   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2661   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2662
2663   if (*imm1 < *imm2)
2664     return -1;
2665   if (*imm1 > *imm2)
2666     return +1;
2667   return 0;
2668 }
2669
2670
2671 static void
2672 aarch64_build_bitmask_table (void)
2673 {
2674   unsigned HOST_WIDE_INT mask, imm;
2675   unsigned int log_e, e, s, r;
2676   unsigned int nimms = 0;
2677
2678   for (log_e = 1; log_e <= 6; log_e++)
2679     {
2680       e = 1 << log_e;
2681       if (e == 64)
2682         mask = ~(HOST_WIDE_INT) 0;
2683       else
2684         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2685       for (s = 1; s < e; s++)
2686         {
2687           for (r = 0; r < e; r++)
2688             {
2689               /* set s consecutive bits to 1 (s < 64) */
2690               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2691               /* rotate right by r */
2692               if (r != 0)
2693                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2694               /* replicate the constant depending on SIMD size */
2695               switch (log_e) {
2696               case 1: imm |= (imm <<  2);
2697               case 2: imm |= (imm <<  4);
2698               case 3: imm |= (imm <<  8);
2699               case 4: imm |= (imm << 16);
2700               case 5: imm |= (imm << 32);
2701               case 6:
2702                 break;
2703               default:
2704                 gcc_unreachable ();
2705               }
2706               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2707               aarch64_bitmasks[nimms++] = imm;
2708             }
2709         }
2710     }
2711
2712   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2713   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2714          aarch64_bitmasks_cmp);
2715 }
2716
2717
2718 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2719    a left shift of 0 or 12 bits.  */
2720 bool
2721 aarch64_uimm12_shift (HOST_WIDE_INT val)
2722 {
2723   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2724           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2725           );
2726 }
2727
2728
2729 /* Return true if val is an immediate that can be loaded into a
2730    register by a MOVZ instruction.  */
2731 static bool
2732 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2733 {
2734   if (GET_MODE_SIZE (mode) > 4)
2735     {
2736       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2737           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2738         return 1;
2739     }
2740   else
2741     {
2742       /* Ignore sign extension.  */
2743       val &= (HOST_WIDE_INT) 0xffffffff;
2744     }
2745   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2746           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2747 }
2748
2749
2750 /* Return true if val is a valid bitmask immediate.  */
2751 bool
2752 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2753 {
2754   if (GET_MODE_SIZE (mode) < 8)
2755     {
2756       /* Replicate bit pattern.  */
2757       val &= (HOST_WIDE_INT) 0xffffffff;
2758       val |= val << 32;
2759     }
2760   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2761                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2762 }
2763
2764
2765 /* Return true if val is an immediate that can be loaded into a
2766    register in a single instruction.  */
2767 bool
2768 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2769 {
2770   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2771     return 1;
2772   return aarch64_bitmask_imm (val, mode);
2773 }
2774
2775 static bool
2776 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2777 {
2778   rtx base, offset;
2779
2780   if (GET_CODE (x) == HIGH)
2781     return true;
2782
2783   split_const (x, &base, &offset);
2784   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2785     {
2786       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2787           != SYMBOL_FORCE_TO_MEM)
2788         return true;
2789       else
2790         /* Avoid generating a 64-bit relocation in ILP32; leave
2791            to aarch64_expand_mov_immediate to handle it properly.  */
2792         return mode != ptr_mode;
2793     }
2794
2795   return aarch64_tls_referenced_p (x);
2796 }
2797
2798 /* Return true if register REGNO is a valid index register.
2799    STRICT_P is true if REG_OK_STRICT is in effect.  */
2800
2801 bool
2802 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2803 {
2804   if (!HARD_REGISTER_NUM_P (regno))
2805     {
2806       if (!strict_p)
2807         return true;
2808
2809       if (!reg_renumber)
2810         return false;
2811
2812       regno = reg_renumber[regno];
2813     }
2814   return GP_REGNUM_P (regno);
2815 }
2816
2817 /* Return true if register REGNO is a valid base register for mode MODE.
2818    STRICT_P is true if REG_OK_STRICT is in effect.  */
2819
2820 bool
2821 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2822 {
2823   if (!HARD_REGISTER_NUM_P (regno))
2824     {
2825       if (!strict_p)
2826         return true;
2827
2828       if (!reg_renumber)
2829         return false;
2830
2831       regno = reg_renumber[regno];
2832     }
2833
2834   /* The fake registers will be eliminated to either the stack or
2835      hard frame pointer, both of which are usually valid base registers.
2836      Reload deals with the cases where the eliminated form isn't valid.  */
2837   return (GP_REGNUM_P (regno)
2838           || regno == SP_REGNUM
2839           || regno == FRAME_POINTER_REGNUM
2840           || regno == ARG_POINTER_REGNUM);
2841 }
2842
2843 /* Return true if X is a valid base register for mode MODE.
2844    STRICT_P is true if REG_OK_STRICT is in effect.  */
2845
2846 static bool
2847 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2848 {
2849   if (!strict_p && GET_CODE (x) == SUBREG)
2850     x = SUBREG_REG (x);
2851
2852   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2853 }
2854
2855 /* Return true if address offset is a valid index.  If it is, fill in INFO
2856    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2857
2858 static bool
2859 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2860                         enum machine_mode mode, bool strict_p)
2861 {
2862   enum aarch64_address_type type;
2863   rtx index;
2864   int shift;
2865
2866   /* (reg:P) */
2867   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2868       && GET_MODE (x) == Pmode)
2869     {
2870       type = ADDRESS_REG_REG;
2871       index = x;
2872       shift = 0;
2873     }
2874   /* (sign_extend:DI (reg:SI)) */
2875   else if ((GET_CODE (x) == SIGN_EXTEND
2876             || GET_CODE (x) == ZERO_EXTEND)
2877            && GET_MODE (x) == DImode
2878            && GET_MODE (XEXP (x, 0)) == SImode)
2879     {
2880       type = (GET_CODE (x) == SIGN_EXTEND)
2881         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882       index = XEXP (x, 0);
2883       shift = 0;
2884     }
2885   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2886   else if (GET_CODE (x) == MULT
2887            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889            && GET_MODE (XEXP (x, 0)) == DImode
2890            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891            && CONST_INT_P (XEXP (x, 1)))
2892     {
2893       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895       index = XEXP (XEXP (x, 0), 0);
2896       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2897     }
2898   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2899   else if (GET_CODE (x) == ASHIFT
2900            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2901                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2902            && GET_MODE (XEXP (x, 0)) == DImode
2903            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2904            && CONST_INT_P (XEXP (x, 1)))
2905     {
2906       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2907         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908       index = XEXP (XEXP (x, 0), 0);
2909       shift = INTVAL (XEXP (x, 1));
2910     }
2911   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2912   else if ((GET_CODE (x) == SIGN_EXTRACT
2913             || GET_CODE (x) == ZERO_EXTRACT)
2914            && GET_MODE (x) == DImode
2915            && GET_CODE (XEXP (x, 0)) == MULT
2916            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2917            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2918     {
2919       type = (GET_CODE (x) == SIGN_EXTRACT)
2920         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2921       index = XEXP (XEXP (x, 0), 0);
2922       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2923       if (INTVAL (XEXP (x, 1)) != 32 + shift
2924           || INTVAL (XEXP (x, 2)) != 0)
2925         shift = -1;
2926     }
2927   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2928      (const_int 0xffffffff<<shift)) */
2929   else if (GET_CODE (x) == AND
2930            && GET_MODE (x) == DImode
2931            && GET_CODE (XEXP (x, 0)) == MULT
2932            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2933            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2934            && CONST_INT_P (XEXP (x, 1)))
2935     {
2936       type = ADDRESS_REG_UXTW;
2937       index = XEXP (XEXP (x, 0), 0);
2938       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2939       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2940         shift = -1;
2941     }
2942   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2943   else if ((GET_CODE (x) == SIGN_EXTRACT
2944             || GET_CODE (x) == ZERO_EXTRACT)
2945            && GET_MODE (x) == DImode
2946            && GET_CODE (XEXP (x, 0)) == ASHIFT
2947            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2948            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2949     {
2950       type = (GET_CODE (x) == SIGN_EXTRACT)
2951         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2952       index = XEXP (XEXP (x, 0), 0);
2953       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2954       if (INTVAL (XEXP (x, 1)) != 32 + shift
2955           || INTVAL (XEXP (x, 2)) != 0)
2956         shift = -1;
2957     }
2958   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2959      (const_int 0xffffffff<<shift)) */
2960   else if (GET_CODE (x) == AND
2961            && GET_MODE (x) == DImode
2962            && GET_CODE (XEXP (x, 0)) == ASHIFT
2963            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2964            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2965            && CONST_INT_P (XEXP (x, 1)))
2966     {
2967       type = ADDRESS_REG_UXTW;
2968       index = XEXP (XEXP (x, 0), 0);
2969       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2970       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2971         shift = -1;
2972     }
2973   /* (mult:P (reg:P) (const_int scale)) */
2974   else if (GET_CODE (x) == MULT
2975            && GET_MODE (x) == Pmode
2976            && GET_MODE (XEXP (x, 0)) == Pmode
2977            && CONST_INT_P (XEXP (x, 1)))
2978     {
2979       type = ADDRESS_REG_REG;
2980       index = XEXP (x, 0);
2981       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2982     }
2983   /* (ashift:P (reg:P) (const_int shift)) */
2984   else if (GET_CODE (x) == ASHIFT
2985            && GET_MODE (x) == Pmode
2986            && GET_MODE (XEXP (x, 0)) == Pmode
2987            && CONST_INT_P (XEXP (x, 1)))
2988     {
2989       type = ADDRESS_REG_REG;
2990       index = XEXP (x, 0);
2991       shift = INTVAL (XEXP (x, 1));
2992     }
2993   else
2994     return false;
2995
2996   if (GET_CODE (index) == SUBREG)
2997     index = SUBREG_REG (index);
2998
2999   if ((shift == 0 ||
3000        (shift > 0 && shift <= 3
3001         && (1 << shift) == GET_MODE_SIZE (mode)))
3002       && REG_P (index)
3003       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3004     {
3005       info->type = type;
3006       info->offset = index;
3007       info->shift = shift;
3008       return true;
3009     }
3010
3011   return false;
3012 }
3013
3014 static inline bool
3015 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3016 {
3017   return (offset >= -64 * GET_MODE_SIZE (mode)
3018           && offset < 64 * GET_MODE_SIZE (mode)
3019           && offset % GET_MODE_SIZE (mode) == 0);
3020 }
3021
3022 static inline bool
3023 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3024                                HOST_WIDE_INT offset)
3025 {
3026   return offset >= -256 && offset < 256;
3027 }
3028
3029 static inline bool
3030 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3031 {
3032   return (offset >= 0
3033           && offset < 4096 * GET_MODE_SIZE (mode)
3034           && offset % GET_MODE_SIZE (mode) == 0);
3035 }
3036
3037 /* Return true if X is a valid address for machine mode MODE.  If it is,
3038    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3039    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3040
3041 static bool
3042 aarch64_classify_address (struct aarch64_address_info *info,
3043                           rtx x, enum machine_mode mode,
3044                           RTX_CODE outer_code, bool strict_p)
3045 {
3046   enum rtx_code code = GET_CODE (x);
3047   rtx op0, op1;
3048   bool allow_reg_index_p =
3049     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3050
3051   /* Don't support anything other than POST_INC or REG addressing for
3052      AdvSIMD.  */
3053   if (aarch64_vector_mode_p (mode)
3054       && (code != POST_INC && code != REG))
3055     return false;
3056
3057   switch (code)
3058     {
3059     case REG:
3060     case SUBREG:
3061       info->type = ADDRESS_REG_IMM;
3062       info->base = x;
3063       info->offset = const0_rtx;
3064       return aarch64_base_register_rtx_p (x, strict_p);
3065
3066     case PLUS:
3067       op0 = XEXP (x, 0);
3068       op1 = XEXP (x, 1);
3069       if (GET_MODE_SIZE (mode) != 0
3070           && CONST_INT_P (op1)
3071           && aarch64_base_register_rtx_p (op0, strict_p))
3072         {
3073           HOST_WIDE_INT offset = INTVAL (op1);
3074
3075           info->type = ADDRESS_REG_IMM;
3076           info->base = op0;
3077           info->offset = op1;
3078
3079           /* TImode and TFmode values are allowed in both pairs of X
3080              registers and individual Q registers.  The available
3081              address modes are:
3082              X,X: 7-bit signed scaled offset
3083              Q:   9-bit signed offset
3084              We conservatively require an offset representable in either mode.
3085            */
3086           if (mode == TImode || mode == TFmode)
3087             return (offset_7bit_signed_scaled_p (mode, offset)
3088                     && offset_9bit_signed_unscaled_p (mode, offset));
3089
3090           if (outer_code == PARALLEL)
3091             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3092                     && offset_7bit_signed_scaled_p (mode, offset));
3093           else
3094             return (offset_9bit_signed_unscaled_p (mode, offset)
3095                     || offset_12bit_unsigned_scaled_p (mode, offset));
3096         }
3097
3098       if (allow_reg_index_p)
3099         {
3100           /* Look for base + (scaled/extended) index register.  */
3101           if (aarch64_base_register_rtx_p (op0, strict_p)
3102               && aarch64_classify_index (info, op1, mode, strict_p))
3103             {
3104               info->base = op0;
3105               return true;
3106             }
3107           if (aarch64_base_register_rtx_p (op1, strict_p)
3108               && aarch64_classify_index (info, op0, mode, strict_p))
3109             {
3110               info->base = op1;
3111               return true;
3112             }
3113         }
3114
3115       return false;
3116
3117     case POST_INC:
3118     case POST_DEC:
3119     case PRE_INC:
3120     case PRE_DEC:
3121       info->type = ADDRESS_REG_WB;
3122       info->base = XEXP (x, 0);
3123       info->offset = NULL_RTX;
3124       return aarch64_base_register_rtx_p (info->base, strict_p);
3125
3126     case POST_MODIFY:
3127     case PRE_MODIFY:
3128       info->type = ADDRESS_REG_WB;
3129       info->base = XEXP (x, 0);
3130       if (GET_CODE (XEXP (x, 1)) == PLUS
3131           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3132           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3133           && aarch64_base_register_rtx_p (info->base, strict_p))
3134         {
3135           HOST_WIDE_INT offset;
3136           info->offset = XEXP (XEXP (x, 1), 1);
3137           offset = INTVAL (info->offset);
3138
3139           /* TImode and TFmode values are allowed in both pairs of X
3140              registers and individual Q registers.  The available
3141              address modes are:
3142              X,X: 7-bit signed scaled offset
3143              Q:   9-bit signed offset
3144              We conservatively require an offset representable in either mode.
3145            */
3146           if (mode == TImode || mode == TFmode)
3147             return (offset_7bit_signed_scaled_p (mode, offset)
3148                     && offset_9bit_signed_unscaled_p (mode, offset));
3149
3150           if (outer_code == PARALLEL)
3151             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3152                     && offset_7bit_signed_scaled_p (mode, offset));
3153           else
3154             return offset_9bit_signed_unscaled_p (mode, offset);
3155         }
3156       return false;
3157
3158     case CONST:
3159     case SYMBOL_REF:
3160     case LABEL_REF:
3161       /* load literal: pc-relative constant pool entry.  Only supported
3162          for SI mode or larger.  */
3163       info->type = ADDRESS_SYMBOLIC;
3164       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3165         {
3166           rtx sym, addend;
3167
3168           split_const (x, &sym, &addend);
3169           return (GET_CODE (sym) == LABEL_REF
3170                   || (GET_CODE (sym) == SYMBOL_REF
3171                       && CONSTANT_POOL_ADDRESS_P (sym)));
3172         }
3173       return false;
3174
3175     case LO_SUM:
3176       info->type = ADDRESS_LO_SUM;
3177       info->base = XEXP (x, 0);
3178       info->offset = XEXP (x, 1);
3179       if (allow_reg_index_p
3180           && aarch64_base_register_rtx_p (info->base, strict_p))
3181         {
3182           rtx sym, offs;
3183           split_const (info->offset, &sym, &offs);
3184           if (GET_CODE (sym) == SYMBOL_REF
3185               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3186                   == SYMBOL_SMALL_ABSOLUTE))
3187             {
3188               /* The symbol and offset must be aligned to the access size.  */
3189               unsigned int align;
3190               unsigned int ref_size;
3191
3192               if (CONSTANT_POOL_ADDRESS_P (sym))
3193                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3194               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3195                 {
3196                   tree exp = SYMBOL_REF_DECL (sym);
3197                   align = TYPE_ALIGN (TREE_TYPE (exp));
3198                   align = CONSTANT_ALIGNMENT (exp, align);
3199                 }
3200               else if (SYMBOL_REF_DECL (sym))
3201                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3202               else
3203                 align = BITS_PER_UNIT;
3204
3205               ref_size = GET_MODE_SIZE (mode);
3206               if (ref_size == 0)
3207                 ref_size = GET_MODE_SIZE (DImode);
3208
3209               return ((INTVAL (offs) & (ref_size - 1)) == 0
3210                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3211             }
3212         }
3213       return false;
3214
3215     default:
3216       return false;
3217     }
3218 }
3219
3220 bool
3221 aarch64_symbolic_address_p (rtx x)
3222 {
3223   rtx offset;
3224
3225   split_const (x, &x, &offset);
3226   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3227 }
3228
3229 /* Classify the base of symbolic expression X, given that X appears in
3230    context CONTEXT.  */
3231
3232 enum aarch64_symbol_type
3233 aarch64_classify_symbolic_expression (rtx x,
3234                                       enum aarch64_symbol_context context)
3235 {
3236   rtx offset;
3237
3238   split_const (x, &x, &offset);
3239   return aarch64_classify_symbol (x, context);
3240 }
3241
3242
3243 /* Return TRUE if X is a legitimate address for accessing memory in
3244    mode MODE.  */
3245 static bool
3246 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3247 {
3248   struct aarch64_address_info addr;
3249
3250   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3251 }
3252
3253 /* Return TRUE if X is a legitimate address for accessing memory in
3254    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3255    pair operation.  */
3256 bool
3257 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3258                            RTX_CODE outer_code, bool strict_p)
3259 {
3260   struct aarch64_address_info addr;
3261
3262   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3263 }
3264
3265 /* Return TRUE if rtx X is immediate constant 0.0 */
3266 bool
3267 aarch64_float_const_zero_rtx_p (rtx x)
3268 {
3269   REAL_VALUE_TYPE r;
3270
3271   if (GET_MODE (x) == VOIDmode)
3272     return false;
3273
3274   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3275   if (REAL_VALUE_MINUS_ZERO (r))
3276     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3277   return REAL_VALUES_EQUAL (r, dconst0);
3278 }
3279
3280 /* Return the fixed registers used for condition codes.  */
3281
3282 static bool
3283 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3284 {
3285   *p1 = CC_REGNUM;
3286   *p2 = INVALID_REGNUM;
3287   return true;
3288 }
3289
3290 enum machine_mode
3291 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3292 {
3293   /* All floating point compares return CCFP if it is an equality
3294      comparison, and CCFPE otherwise.  */
3295   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3296     {
3297       switch (code)
3298         {
3299         case EQ:
3300         case NE:
3301         case UNORDERED:
3302         case ORDERED:
3303         case UNLT:
3304         case UNLE:
3305         case UNGT:
3306         case UNGE:
3307         case UNEQ:
3308         case LTGT:
3309           return CCFPmode;
3310
3311         case LT:
3312         case LE:
3313         case GT:
3314         case GE:
3315           return CCFPEmode;
3316
3317         default:
3318           gcc_unreachable ();
3319         }
3320     }
3321
3322   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3323       && y == const0_rtx
3324       && (code == EQ || code == NE || code == LT || code == GE)
3325       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3326           || GET_CODE (x) == NEG))
3327     return CC_NZmode;
3328
3329   /* A compare with a shifted operand.  Because of canonicalization,
3330      the comparison will have to be swapped when we emit the assembly
3331      code.  */
3332   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3333       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3334       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3335           || GET_CODE (x) == LSHIFTRT
3336           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3337     return CC_SWPmode;
3338
3339   /* Similarly for a negated operand, but we can only do this for
3340      equalities.  */
3341   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3342       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3343       && (code == EQ || code == NE)
3344       && GET_CODE (x) == NEG)
3345     return CC_Zmode;
3346
3347   /* A compare of a mode narrower than SI mode against zero can be done
3348      by extending the value in the comparison.  */
3349   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3350       && y == const0_rtx)
3351     /* Only use sign-extension if we really need it.  */
3352     return ((code == GT || code == GE || code == LE || code == LT)
3353             ? CC_SESWPmode : CC_ZESWPmode);
3354
3355   /* For everything else, return CCmode.  */
3356   return CCmode;
3357 }
3358
3359 static unsigned
3360 aarch64_get_condition_code (rtx x)
3361 {
3362   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3363   enum rtx_code comp_code = GET_CODE (x);
3364
3365   if (GET_MODE_CLASS (mode) != MODE_CC)
3366     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3367
3368   switch (mode)
3369     {
3370     case CCFPmode:
3371     case CCFPEmode:
3372       switch (comp_code)
3373         {
3374         case GE: return AARCH64_GE;
3375         case GT: return AARCH64_GT;
3376         case LE: return AARCH64_LS;
3377         case LT: return AARCH64_MI;
3378         case NE: return AARCH64_NE;
3379         case EQ: return AARCH64_EQ;
3380         case ORDERED: return AARCH64_VC;
3381         case UNORDERED: return AARCH64_VS;
3382         case UNLT: return AARCH64_LT;
3383         case UNLE: return AARCH64_LE;
3384         case UNGT: return AARCH64_HI;
3385         case UNGE: return AARCH64_PL;
3386         default: gcc_unreachable ();
3387         }
3388       break;
3389
3390     case CCmode:
3391       switch (comp_code)
3392         {
3393         case NE: return AARCH64_NE;
3394         case EQ: return AARCH64_EQ;
3395         case GE: return AARCH64_GE;
3396         case GT: return AARCH64_GT;
3397         case LE: return AARCH64_LE;
3398         case LT: return AARCH64_LT;
3399         case GEU: return AARCH64_CS;
3400         case GTU: return AARCH64_HI;
3401         case LEU: return AARCH64_LS;
3402         case LTU: return AARCH64_CC;
3403         default: gcc_unreachable ();
3404         }
3405       break;
3406
3407     case CC_SWPmode:
3408     case CC_ZESWPmode:
3409     case CC_SESWPmode:
3410       switch (comp_code)
3411         {
3412         case NE: return AARCH64_NE;
3413         case EQ: return AARCH64_EQ;
3414         case GE: return AARCH64_LE;
3415         case GT: return AARCH64_LT;
3416         case LE: return AARCH64_GE;
3417         case LT: return AARCH64_GT;
3418         case GEU: return AARCH64_LS;
3419         case GTU: return AARCH64_CC;
3420         case LEU: return AARCH64_CS;
3421         case LTU: return AARCH64_HI;
3422         default: gcc_unreachable ();
3423         }
3424       break;
3425
3426     case CC_NZmode:
3427       switch (comp_code)
3428         {
3429         case NE: return AARCH64_NE;
3430         case EQ: return AARCH64_EQ;
3431         case GE: return AARCH64_PL;
3432         case LT: return AARCH64_MI;
3433         default: gcc_unreachable ();
3434         }
3435       break;
3436
3437     case CC_Zmode:
3438       switch (comp_code)
3439         {
3440         case NE: return AARCH64_NE;
3441         case EQ: return AARCH64_EQ;
3442         default: gcc_unreachable ();
3443         }
3444       break;
3445
3446     default:
3447       gcc_unreachable ();
3448       break;
3449     }
3450 }
3451
3452 static unsigned
3453 bit_count (unsigned HOST_WIDE_INT value)
3454 {
3455   unsigned count = 0;
3456
3457   while (value)
3458     {
3459       count++;
3460       value &= value - 1;
3461     }
3462
3463   return count;
3464 }
3465
3466 void
3467 aarch64_print_operand (FILE *f, rtx x, char code)
3468 {
3469   switch (code)
3470     {
3471     /* An integer or symbol address without a preceding # sign.  */
3472     case 'c':
3473       switch (GET_CODE (x))
3474         {
3475         case CONST_INT:
3476           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3477           break;
3478
3479         case SYMBOL_REF:
3480           output_addr_const (f, x);
3481           break;
3482
3483         case CONST:
3484           if (GET_CODE (XEXP (x, 0)) == PLUS
3485               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3486             {
3487               output_addr_const (f, x);
3488               break;
3489             }
3490           /* Fall through.  */
3491
3492         default:
3493           output_operand_lossage ("Unsupported operand for code '%c'", code);
3494         }
3495       break;
3496
3497     case 'e':
3498       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3499       {
3500         int n;
3501
3502         if (GET_CODE (x) != CONST_INT
3503             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3504           {
3505             output_operand_lossage ("invalid operand for '%%%c'", code);
3506             return;
3507           }
3508
3509         switch (n)
3510           {
3511           case 3:
3512             fputc ('b', f);
3513             break;
3514           case 4:
3515             fputc ('h', f);
3516             break;
3517           case 5:
3518             fputc ('w', f);
3519             break;
3520           default:
3521             output_operand_lossage ("invalid operand for '%%%c'", code);
3522             return;
3523           }
3524       }
3525       break;
3526
3527     case 'p':
3528       {
3529         int n;
3530
3531         /* Print N such that 2^N == X.  */
3532         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3533           {
3534             output_operand_lossage ("invalid operand for '%%%c'", code);
3535             return;
3536           }
3537
3538         asm_fprintf (f, "%d", n);
3539       }
3540       break;
3541
3542     case 'P':
3543       /* Print the number of non-zero bits in X (a const_int).  */
3544       if (GET_CODE (x) != CONST_INT)
3545         {
3546           output_operand_lossage ("invalid operand for '%%%c'", code);
3547           return;
3548         }
3549
3550       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3551       break;
3552
3553     case 'H':
3554       /* Print the higher numbered register of a pair (TImode) of regs.  */
3555       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3556         {
3557           output_operand_lossage ("invalid operand for '%%%c'", code);
3558           return;
3559         }
3560
3561       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3562       break;
3563
3564     case 'm':
3565       /* Print a condition (eq, ne, etc).  */
3566
3567       /* CONST_TRUE_RTX means always -- that's the default.  */
3568       if (x == const_true_rtx)
3569         return;
3570
3571       if (!COMPARISON_P (x))
3572         {
3573           output_operand_lossage ("invalid operand for '%%%c'", code);
3574           return;
3575         }
3576
3577       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3578       break;
3579
3580     case 'M':
3581       /* Print the inverse of a condition (eq <-> ne, etc).  */
3582
3583       /* CONST_TRUE_RTX means never -- that's the default.  */
3584       if (x == const_true_rtx)
3585         {
3586           fputs ("nv", f);
3587           return;
3588         }
3589
3590       if (!COMPARISON_P (x))
3591         {
3592           output_operand_lossage ("invalid operand for '%%%c'", code);
3593           return;
3594         }
3595
3596       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3597                                   (aarch64_get_condition_code (x))], f);
3598       break;
3599
3600     case 'b':
3601     case 'h':
3602     case 's':
3603     case 'd':
3604     case 'q':
3605       /* Print a scalar FP/SIMD register name.  */
3606       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3607         {
3608           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3609           return;
3610         }
3611       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3612       break;
3613
3614     case 'S':
3615     case 'T':
3616     case 'U':
3617     case 'V':
3618       /* Print the first FP/SIMD register name in a list.  */
3619       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3620         {
3621           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3622           return;
3623         }
3624       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3625       break;
3626
3627     case 'X':
3628       /* Print bottom 16 bits of integer constant in hex.  */
3629       if (GET_CODE (x) != CONST_INT)
3630         {
3631           output_operand_lossage ("invalid operand for '%%%c'", code);
3632           return;
3633         }
3634       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3635       break;
3636
3637     case 'w':
3638     case 'x':
3639       /* Print a general register name or the zero register (32-bit or
3640          64-bit).  */
3641       if (x == const0_rtx
3642           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3643         {
3644           asm_fprintf (f, "%czr", code);
3645           break;
3646         }
3647
3648       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3649         {
3650           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3651           break;
3652         }
3653
3654       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3655         {
3656           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3657           break;
3658         }
3659
3660       /* Fall through */
3661
3662     case 0:
3663       /* Print a normal operand, if it's a general register, then we
3664          assume DImode.  */
3665       if (x == NULL)
3666         {
3667           output_operand_lossage ("missing operand");
3668           return;
3669         }
3670
3671       switch (GET_CODE (x))
3672         {
3673         case REG:
3674           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3675           break;
3676
3677         case MEM:
3678           aarch64_memory_reference_mode = GET_MODE (x);
3679           output_address (XEXP (x, 0));
3680           break;
3681
3682         case LABEL_REF:
3683         case SYMBOL_REF:
3684           output_addr_const (asm_out_file, x);
3685           break;
3686
3687         case CONST_INT:
3688           asm_fprintf (f, "%wd", INTVAL (x));
3689           break;
3690
3691         case CONST_VECTOR:
3692           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3693             {
3694               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3695                                                             HOST_WIDE_INT_MIN,
3696                                                             HOST_WIDE_INT_MAX));
3697               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3698             }
3699           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3700             {
3701               fputc ('0', f);
3702             }
3703           else
3704             gcc_unreachable ();
3705           break;
3706
3707         case CONST_DOUBLE:
3708           /* CONST_DOUBLE can represent a double-width integer.
3709              In this case, the mode of x is VOIDmode.  */
3710           if (GET_MODE (x) == VOIDmode)
3711             ; /* Do Nothing.  */
3712           else if (aarch64_float_const_zero_rtx_p (x))
3713             {
3714               fputc ('0', f);
3715               break;
3716             }
3717           else if (aarch64_float_const_representable_p (x))
3718             {
3719 #define buf_size 20
3720               char float_buf[buf_size] = {'\0'};
3721               REAL_VALUE_TYPE r;
3722               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3723               real_to_decimal_for_mode (float_buf, &r,
3724                                         buf_size, buf_size,
3725                                         1, GET_MODE (x));
3726               asm_fprintf (asm_out_file, "%s", float_buf);
3727               break;
3728 #undef buf_size
3729             }
3730           output_operand_lossage ("invalid constant");
3731           return;
3732         default:
3733           output_operand_lossage ("invalid operand");
3734           return;
3735         }
3736       break;
3737
3738     case 'A':
3739       if (GET_CODE (x) == HIGH)
3740         x = XEXP (x, 0);
3741
3742       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3743         {
3744         case SYMBOL_SMALL_GOT:
3745           asm_fprintf (asm_out_file, ":got:");
3746           break;
3747
3748         case SYMBOL_SMALL_TLSGD:
3749           asm_fprintf (asm_out_file, ":tlsgd:");
3750           break;
3751
3752         case SYMBOL_SMALL_TLSDESC:
3753           asm_fprintf (asm_out_file, ":tlsdesc:");
3754           break;
3755
3756         case SYMBOL_SMALL_GOTTPREL:
3757           asm_fprintf (asm_out_file, ":gottprel:");
3758           break;
3759
3760         case SYMBOL_SMALL_TPREL:
3761           asm_fprintf (asm_out_file, ":tprel:");
3762           break;
3763
3764         case SYMBOL_TINY_GOT:
3765           gcc_unreachable ();
3766           break;
3767
3768         default:
3769           break;
3770         }
3771       output_addr_const (asm_out_file, x);
3772       break;
3773
3774     case 'L':
3775       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3776         {
3777         case SYMBOL_SMALL_GOT:
3778           asm_fprintf (asm_out_file, ":lo12:");
3779           break;
3780
3781         case SYMBOL_SMALL_TLSGD:
3782           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3783           break;
3784
3785         case SYMBOL_SMALL_TLSDESC:
3786           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3787           break;
3788
3789         case SYMBOL_SMALL_GOTTPREL:
3790           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3791           break;
3792
3793         case SYMBOL_SMALL_TPREL:
3794           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3795           break;
3796
3797         case SYMBOL_TINY_GOT:
3798           asm_fprintf (asm_out_file, ":got:");
3799           break;
3800
3801         default:
3802           break;
3803         }
3804       output_addr_const (asm_out_file, x);
3805       break;
3806
3807     case 'G':
3808
3809       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3810         {
3811         case SYMBOL_SMALL_TPREL:
3812           asm_fprintf (asm_out_file, ":tprel_hi12:");
3813           break;
3814         default:
3815           break;
3816         }
3817       output_addr_const (asm_out_file, x);
3818       break;
3819
3820     default:
3821       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3822       return;
3823     }
3824 }
3825
3826 void
3827 aarch64_print_operand_address (FILE *f, rtx x)
3828 {
3829   struct aarch64_address_info addr;
3830
3831   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3832                              MEM, true))
3833     switch (addr.type)
3834       {
3835       case ADDRESS_REG_IMM:
3836         if (addr.offset == const0_rtx)
3837           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3838         else
3839           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3840                        INTVAL (addr.offset));
3841         return;
3842
3843       case ADDRESS_REG_REG:
3844         if (addr.shift == 0)
3845           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3846                        reg_names [REGNO (addr.offset)]);
3847         else
3848           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3849                        reg_names [REGNO (addr.offset)], addr.shift);
3850         return;
3851
3852       case ADDRESS_REG_UXTW:
3853         if (addr.shift == 0)
3854           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3855                        REGNO (addr.offset) - R0_REGNUM);
3856         else
3857           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3858                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3859         return;
3860
3861       case ADDRESS_REG_SXTW:
3862         if (addr.shift == 0)
3863           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3864                        REGNO (addr.offset) - R0_REGNUM);
3865         else
3866           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3867                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3868         return;
3869
3870       case ADDRESS_REG_WB:
3871         switch (GET_CODE (x))
3872           {
3873           case PRE_INC:
3874             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3875                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3876             return;
3877           case POST_INC:
3878             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3879                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3880             return;
3881           case PRE_DEC:
3882             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3883                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3884             return;
3885           case POST_DEC:
3886             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3887                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3888             return;
3889           case PRE_MODIFY:
3890             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3891                          INTVAL (addr.offset));
3892             return;
3893           case POST_MODIFY:
3894             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3895                          INTVAL (addr.offset));
3896             return;
3897           default:
3898             break;
3899           }
3900         break;
3901
3902       case ADDRESS_LO_SUM:
3903         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3904         output_addr_const (f, addr.offset);
3905         asm_fprintf (f, "]");
3906         return;
3907
3908       case ADDRESS_SYMBOLIC:
3909         break;
3910       }
3911
3912   output_addr_const (f, x);
3913 }
3914
3915 bool
3916 aarch64_label_mentioned_p (rtx x)
3917 {
3918   const char *fmt;
3919   int i;
3920
3921   if (GET_CODE (x) == LABEL_REF)
3922     return true;
3923
3924   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3925      referencing instruction, but they are constant offsets, not
3926      symbols.  */
3927   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3928     return false;
3929
3930   fmt = GET_RTX_FORMAT (GET_CODE (x));
3931   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3932     {
3933       if (fmt[i] == 'E')
3934         {
3935           int j;
3936
3937           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3938             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3939               return 1;
3940         }
3941       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3942         return 1;
3943     }
3944
3945   return 0;
3946 }
3947
3948 /* Implement REGNO_REG_CLASS.  */
3949
3950 enum reg_class
3951 aarch64_regno_regclass (unsigned regno)
3952 {
3953   if (GP_REGNUM_P (regno))
3954     return CORE_REGS;
3955
3956   if (regno == SP_REGNUM)
3957     return STACK_REG;
3958
3959   if (regno == FRAME_POINTER_REGNUM
3960       || regno == ARG_POINTER_REGNUM)
3961     return POINTER_REGS;
3962
3963   if (FP_REGNUM_P (regno))
3964     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3965
3966   return NO_REGS;
3967 }
3968
3969 /* Try a machine-dependent way of reloading an illegitimate address
3970    operand.  If we find one, push the reload and return the new rtx.  */
3971
3972 rtx
3973 aarch64_legitimize_reload_address (rtx *x_p,
3974                                    enum machine_mode mode,
3975                                    int opnum, int type,
3976                                    int ind_levels ATTRIBUTE_UNUSED)
3977 {
3978   rtx x = *x_p;
3979
3980   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3981   if (aarch64_vector_mode_p (mode)
3982       && GET_CODE (x) == PLUS
3983       && REG_P (XEXP (x, 0))
3984       && CONST_INT_P (XEXP (x, 1)))
3985     {
3986       rtx orig_rtx = x;
3987       x = copy_rtx (x);
3988       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3989                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3990                    opnum, (enum reload_type) type);
3991       return x;
3992     }
3993
3994   /* We must recognize output that we have already generated ourselves.  */
3995   if (GET_CODE (x) == PLUS
3996       && GET_CODE (XEXP (x, 0)) == PLUS
3997       && REG_P (XEXP (XEXP (x, 0), 0))
3998       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3999       && CONST_INT_P (XEXP (x, 1)))
4000     {
4001       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4002                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4003                    opnum, (enum reload_type) type);
4004       return x;
4005     }
4006
4007   /* We wish to handle large displacements off a base register by splitting
4008      the addend across an add and the mem insn.  This can cut the number of
4009      extra insns needed from 3 to 1.  It is only useful for load/store of a
4010      single register with 12 bit offset field.  */
4011   if (GET_CODE (x) == PLUS
4012       && REG_P (XEXP (x, 0))
4013       && CONST_INT_P (XEXP (x, 1))
4014       && HARD_REGISTER_P (XEXP (x, 0))
4015       && mode != TImode
4016       && mode != TFmode
4017       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4018     {
4019       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4020       HOST_WIDE_INT low = val & 0xfff;
4021       HOST_WIDE_INT high = val - low;
4022       HOST_WIDE_INT offs;
4023       rtx cst;
4024       enum machine_mode xmode = GET_MODE (x);
4025
4026       /* In ILP32, xmode can be either DImode or SImode.  */
4027       gcc_assert (xmode == DImode || xmode == SImode);
4028
4029       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4030          BLKmode alignment.  */
4031       if (GET_MODE_SIZE (mode) == 0)
4032         return NULL_RTX;
4033
4034       offs = low % GET_MODE_SIZE (mode);
4035
4036       /* Align misaligned offset by adjusting high part to compensate.  */
4037       if (offs != 0)
4038         {
4039           if (aarch64_uimm12_shift (high + offs))
4040             {
4041               /* Align down.  */
4042               low = low - offs;
4043               high = high + offs;
4044             }
4045           else
4046             {
4047               /* Align up.  */
4048               offs = GET_MODE_SIZE (mode) - offs;
4049               low = low + offs;
4050               high = high + (low & 0x1000) - offs;
4051               low &= 0xfff;
4052             }
4053         }
4054
4055       /* Check for overflow.  */
4056       if (high + low != val)
4057         return NULL_RTX;
4058
4059       cst = GEN_INT (high);
4060       if (!aarch64_uimm12_shift (high))
4061         cst = force_const_mem (xmode, cst);
4062
4063       /* Reload high part into base reg, leaving the low part
4064          in the mem instruction.
4065          Note that replacing this gen_rtx_PLUS with plus_constant is
4066          wrong in this case because we rely on the
4067          (plus (plus reg c1) c2) structure being preserved so that
4068          XEXP (*p, 0) in push_reload below uses the correct term.  */
4069       x = gen_rtx_PLUS (xmode,
4070                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4071                         GEN_INT (low));
4072
4073       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4074                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4075                    opnum, (enum reload_type) type);
4076       return x;
4077     }
4078
4079   return NULL_RTX;
4080 }
4081
4082
4083 static reg_class_t
4084 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4085                           reg_class_t rclass,
4086                           enum machine_mode mode,
4087                           secondary_reload_info *sri)
4088 {
4089   /* Without the TARGET_SIMD instructions we cannot move a Q register
4090      to a Q register directly.  We need a scratch.  */
4091   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4092       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4093       && reg_class_subset_p (rclass, FP_REGS))
4094     {
4095       if (mode == TFmode)
4096         sri->icode = CODE_FOR_aarch64_reload_movtf;
4097       else if (mode == TImode)
4098         sri->icode = CODE_FOR_aarch64_reload_movti;
4099       return NO_REGS;
4100     }
4101
4102   /* A TFmode or TImode memory access should be handled via an FP_REGS
4103      because AArch64 has richer addressing modes for LDR/STR instructions
4104      than LDP/STP instructions.  */
4105   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4106       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4107     return FP_REGS;
4108
4109   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4110       return CORE_REGS;
4111
4112   return NO_REGS;
4113 }
4114
4115 static bool
4116 aarch64_can_eliminate (const int from, const int to)
4117 {
4118   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4119      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4120
4121   if (frame_pointer_needed)
4122     {
4123       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4124         return true;
4125       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4126         return false;
4127       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4128           && !cfun->calls_alloca)
4129         return true;
4130       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4131         return true;
4132     return false;
4133     }
4134   else
4135     {
4136       /* If we decided that we didn't need a leaf frame pointer but then used
4137          LR in the function, then we'll want a frame pointer after all, so
4138          prevent this elimination to ensure a frame pointer is used.
4139
4140          NOTE: the original value of flag_omit_frame_pointer gets trashed
4141          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4142          of faked_omit_frame_pointer here (which is true when we always
4143          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4144          pointers when LR is clobbered).  */
4145       if (to == STACK_POINTER_REGNUM
4146           && df_regs_ever_live_p (LR_REGNUM)
4147           && faked_omit_frame_pointer)
4148         return false;
4149     }
4150
4151   return true;
4152 }
4153
4154 HOST_WIDE_INT
4155 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4156 {
4157   HOST_WIDE_INT frame_size;
4158   HOST_WIDE_INT offset;
4159
4160   aarch64_layout_frame ();
4161   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4162                 + crtl->outgoing_args_size
4163                 + cfun->machine->saved_varargs_size);
4164
4165    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4166    offset = frame_size;
4167
4168    if (to == HARD_FRAME_POINTER_REGNUM)
4169      {
4170        if (from == ARG_POINTER_REGNUM)
4171          return offset - crtl->outgoing_args_size;
4172
4173        if (from == FRAME_POINTER_REGNUM)
4174          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4175      }
4176
4177    if (to == STACK_POINTER_REGNUM)
4178      {
4179        if (from == FRAME_POINTER_REGNUM)
4180          {
4181            HOST_WIDE_INT elim = crtl->outgoing_args_size
4182                               + cfun->machine->frame.saved_regs_size
4183                               + get_frame_size ()
4184                               - cfun->machine->frame.fp_lr_offset;
4185            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4186            return elim;
4187          }
4188      }
4189
4190    return offset;
4191 }
4192
4193
4194 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4195    previous frame.  */
4196
4197 rtx
4198 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4199 {
4200   if (count != 0)
4201     return const0_rtx;
4202   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4203 }
4204
4205
4206 static void
4207 aarch64_asm_trampoline_template (FILE *f)
4208 {
4209   if (TARGET_ILP32)
4210     {
4211       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4212       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4213     }
4214   else
4215     {
4216       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4217       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4218     }
4219   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4220   assemble_aligned_integer (4, const0_rtx);
4221   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4222   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4223 }
4224
4225 static void
4226 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4227 {
4228   rtx fnaddr, mem, a_tramp;
4229   const int tramp_code_sz = 16;
4230
4231   /* Don't need to copy the trailing D-words, we fill those in below.  */
4232   emit_block_move (m_tramp, assemble_trampoline_template (),
4233                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4234   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4235   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4236   if (GET_MODE (fnaddr) != ptr_mode)
4237     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4238   emit_move_insn (mem, fnaddr);
4239
4240   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4241   emit_move_insn (mem, chain_value);
4242
4243   /* XXX We should really define a "clear_cache" pattern and use
4244      gen_clear_cache().  */
4245   a_tramp = XEXP (m_tramp, 0);
4246   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4247                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4248                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4249                      ptr_mode);
4250 }
4251
4252 static unsigned char
4253 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4254 {
4255   switch (regclass)
4256     {
4257     case CORE_REGS:
4258     case POINTER_REGS:
4259     case GENERAL_REGS:
4260     case ALL_REGS:
4261     case FP_REGS:
4262     case FP_LO_REGS:
4263       return
4264         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4265                                        (GET_MODE_SIZE (mode) + 7) / 8;
4266     case STACK_REG:
4267       return 1;
4268
4269     case NO_REGS:
4270       return 0;
4271
4272     default:
4273       break;
4274     }
4275   gcc_unreachable ();
4276 }
4277
4278 static reg_class_t
4279 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4280 {
4281   if (regclass == POINTER_REGS)
4282     return GENERAL_REGS;
4283
4284   if (regclass == STACK_REG)
4285     {
4286       if (REG_P(x)
4287           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4288           return regclass;
4289
4290       return NO_REGS;
4291     }
4292
4293   /* If it's an integer immediate that MOVI can't handle, then
4294      FP_REGS is not an option, so we return NO_REGS instead.  */
4295   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4296       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4297     return NO_REGS;
4298
4299   /* Register eliminiation can result in a request for
4300      SP+constant->FP_REGS.  We cannot support such operations which
4301      use SP as source and an FP_REG as destination, so reject out
4302      right now.  */
4303   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4304     {
4305       rtx lhs = XEXP (x, 0);
4306
4307       /* Look through a possible SUBREG introduced by ILP32.  */
4308       if (GET_CODE (lhs) == SUBREG)
4309         lhs = SUBREG_REG (lhs);
4310
4311       gcc_assert (REG_P (lhs));
4312       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4313                                       POINTER_REGS));
4314       return NO_REGS;
4315     }
4316
4317   return regclass;
4318 }
4319
4320 void
4321 aarch64_asm_output_labelref (FILE* f, const char *name)
4322 {
4323   asm_fprintf (f, "%U%s", name);
4324 }
4325
4326 static void
4327 aarch64_elf_asm_constructor (rtx symbol, int priority)
4328 {
4329   if (priority == DEFAULT_INIT_PRIORITY)
4330     default_ctor_section_asm_out_constructor (symbol, priority);
4331   else
4332     {
4333       section *s;
4334       char buf[18];
4335       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4336       s = get_section (buf, SECTION_WRITE, NULL);
4337       switch_to_section (s);
4338       assemble_align (POINTER_SIZE);
4339       assemble_aligned_integer (POINTER_BYTES, symbol);
4340     }
4341 }
4342
4343 static void
4344 aarch64_elf_asm_destructor (rtx symbol, int priority)
4345 {
4346   if (priority == DEFAULT_INIT_PRIORITY)
4347     default_dtor_section_asm_out_destructor (symbol, priority);
4348   else
4349     {
4350       section *s;
4351       char buf[18];
4352       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4353       s = get_section (buf, SECTION_WRITE, NULL);
4354       switch_to_section (s);
4355       assemble_align (POINTER_SIZE);
4356       assemble_aligned_integer (POINTER_BYTES, symbol);
4357     }
4358 }
4359
4360 const char*
4361 aarch64_output_casesi (rtx *operands)
4362 {
4363   char buf[100];
4364   char label[100];
4365   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4366   int index;
4367   static const char *const patterns[4][2] =
4368   {
4369     {
4370       "ldrb\t%w3, [%0,%w1,uxtw]",
4371       "add\t%3, %4, %w3, sxtb #2"
4372     },
4373     {
4374       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4375       "add\t%3, %4, %w3, sxth #2"
4376     },
4377     {
4378       "ldr\t%w3, [%0,%w1,uxtw #2]",
4379       "add\t%3, %4, %w3, sxtw #2"
4380     },
4381     /* We assume that DImode is only generated when not optimizing and
4382        that we don't really need 64-bit address offsets.  That would
4383        imply an object file with 8GB of code in a single function!  */
4384     {
4385       "ldr\t%w3, [%0,%w1,uxtw #2]",
4386       "add\t%3, %4, %w3, sxtw #2"
4387     }
4388   };
4389
4390   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4391
4392   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4393
4394   gcc_assert (index >= 0 && index <= 3);
4395
4396   /* Need to implement table size reduction, by chaning the code below.  */
4397   output_asm_insn (patterns[index][0], operands);
4398   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4399   snprintf (buf, sizeof (buf),
4400             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4401   output_asm_insn (buf, operands);
4402   output_asm_insn (patterns[index][1], operands);
4403   output_asm_insn ("br\t%3", operands);
4404   assemble_label (asm_out_file, label);
4405   return "";
4406 }
4407
4408
4409 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4410    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4411    operator.  */
4412
4413 int
4414 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4415 {
4416   if (shift >= 0 && shift <= 3)
4417     {
4418       int size;
4419       for (size = 8; size <= 32; size *= 2)
4420         {
4421           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4422           if (mask == bits << shift)
4423             return size;
4424         }
4425     }
4426   return 0;
4427 }
4428
4429 static bool
4430 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4431                                    const_rtx x ATTRIBUTE_UNUSED)
4432 {
4433   /* We can't use blocks for constants when we're using a per-function
4434      constant pool.  */
4435   return false;
4436 }
4437
4438 static section *
4439 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4440                             rtx x ATTRIBUTE_UNUSED,
4441                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4442 {
4443   /* Force all constant pool entries into the current function section.  */
4444   return function_section (current_function_decl);
4445 }
4446
4447
4448 /* Costs.  */
4449
4450 /* Helper function for rtx cost calculation.  Strip a shift expression
4451    from X.  Returns the inner operand if successful, or the original
4452    expression on failure.  */
4453 static rtx
4454 aarch64_strip_shift (rtx x)
4455 {
4456   rtx op = x;
4457
4458   if ((GET_CODE (op) == ASHIFT
4459        || GET_CODE (op) == ASHIFTRT
4460        || GET_CODE (op) == LSHIFTRT)
4461       && CONST_INT_P (XEXP (op, 1)))
4462     return XEXP (op, 0);
4463
4464   if (GET_CODE (op) == MULT
4465       && CONST_INT_P (XEXP (op, 1))
4466       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4467     return XEXP (op, 0);
4468
4469   return x;
4470 }
4471
4472 /* Helper function for rtx cost calculation.  Strip a shift or extend
4473    expression from X.  Returns the inner operand if successful, or the
4474    original expression on failure.  We deal with a number of possible
4475    canonicalization variations here.  */
4476 static rtx
4477 aarch64_strip_shift_or_extend (rtx x)
4478 {
4479   rtx op = x;
4480
4481   /* Zero and sign extraction of a widened value.  */
4482   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4483       && XEXP (op, 2) == const0_rtx
4484       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4485                                          XEXP (op, 1)))
4486     return XEXP (XEXP (op, 0), 0);
4487
4488   /* It can also be represented (for zero-extend) as an AND with an
4489      immediate.  */
4490   if (GET_CODE (op) == AND
4491       && GET_CODE (XEXP (op, 0)) == MULT
4492       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4493       && CONST_INT_P (XEXP (op, 1))
4494       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4495                            INTVAL (XEXP (op, 1))) != 0)
4496     return XEXP (XEXP (op, 0), 0);
4497
4498   /* Now handle extended register, as this may also have an optional
4499      left shift by 1..4.  */
4500   if (GET_CODE (op) == ASHIFT
4501       && CONST_INT_P (XEXP (op, 1))
4502       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4503     op = XEXP (op, 0);
4504
4505   if (GET_CODE (op) == ZERO_EXTEND
4506       || GET_CODE (op) == SIGN_EXTEND)
4507     op = XEXP (op, 0);
4508
4509   if (op != x)
4510     return op;
4511
4512   return aarch64_strip_shift (x);
4513 }
4514
4515 /* Calculate the cost of calculating X, storing it in *COST.  Result
4516    is true if the total cost of the operation has now been calculated.  */
4517 static bool
4518 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4519                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4520 {
4521   rtx op0, op1;
4522   const struct cpu_cost_table *extra_cost
4523     = aarch64_tune_params->insn_extra_cost;
4524
4525   switch (code)
4526     {
4527     case SET:
4528       op0 = SET_DEST (x);
4529       op1 = SET_SRC (x);
4530
4531       switch (GET_CODE (op0))
4532         {
4533         case MEM:
4534           if (speed)
4535             *cost += extra_cost->ldst.store;
4536
4537           if (op1 != const0_rtx)
4538             *cost += rtx_cost (op1, SET, 1, speed);
4539           return true;
4540
4541         case SUBREG:
4542           if (! REG_P (SUBREG_REG (op0)))
4543             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4544           /* Fall through.  */
4545         case REG:
4546           /* Cost is just the cost of the RHS of the set.  */
4547           *cost += rtx_cost (op1, SET, 1, true);
4548           return true;
4549
4550         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4551         case SIGN_EXTRACT:
4552           /* Strip any redundant widening of the RHS to meet the width of
4553              the target.  */
4554           if (GET_CODE (op1) == SUBREG)
4555             op1 = SUBREG_REG (op1);
4556           if ((GET_CODE (op1) == ZERO_EXTEND
4557                || GET_CODE (op1) == SIGN_EXTEND)
4558               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4559               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4560                   >= INTVAL (XEXP (op0, 1))))
4561             op1 = XEXP (op1, 0);
4562           *cost += rtx_cost (op1, SET, 1, speed);
4563           return true;
4564
4565         default:
4566           break;
4567         }
4568       return false;
4569
4570     case MEM:
4571       if (speed)
4572         *cost += extra_cost->ldst.load;
4573
4574       return true;
4575
4576     case NEG:
4577       op0 = CONST0_RTX (GET_MODE (x));
4578       op1 = XEXP (x, 0);
4579       goto cost_minus;
4580
4581     case COMPARE:
4582       op0 = XEXP (x, 0);
4583       op1 = XEXP (x, 1);
4584
4585       if (op1 == const0_rtx
4586           && GET_CODE (op0) == AND)
4587         {
4588           x = op0;
4589           goto cost_logic;
4590         }
4591
4592       /* Comparisons can work if the order is swapped.
4593          Canonicalization puts the more complex operation first, but
4594          we want it in op1.  */
4595       if (! (REG_P (op0)
4596              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4597         {
4598           op0 = XEXP (x, 1);
4599           op1 = XEXP (x, 0);
4600         }
4601       goto cost_minus;
4602
4603     case MINUS:
4604       op0 = XEXP (x, 0);
4605       op1 = XEXP (x, 1);
4606
4607     cost_minus:
4608       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4609           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4610               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4611         {
4612           if (op0 != const0_rtx)
4613             *cost += rtx_cost (op0, MINUS, 0, speed);
4614
4615           if (CONST_INT_P (op1))
4616             {
4617               if (!aarch64_uimm12_shift (INTVAL (op1)))
4618                 *cost += rtx_cost (op1, MINUS, 1, speed);
4619             }
4620           else
4621             {
4622               op1 = aarch64_strip_shift_or_extend (op1);
4623               *cost += rtx_cost (op1, MINUS, 1, speed);
4624             }
4625           return true;
4626         }
4627
4628       return false;
4629
4630     case PLUS:
4631       op0 = XEXP (x, 0);
4632       op1 = XEXP (x, 1);
4633
4634       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4635         {
4636           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4637             {
4638               *cost += rtx_cost (op0, PLUS, 0, speed);
4639             }
4640           else
4641             {
4642               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4643
4644               if (new_op0 == op0
4645                   && GET_CODE (op0) == MULT)
4646                 {
4647                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4648                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4649                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4650                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4651                     {
4652                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4653                                           speed)
4654                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4655                                             speed)
4656                                 + rtx_cost (op1, PLUS, 1, speed));
4657                       if (speed)
4658                         *cost +=
4659                           extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4660                       return true;
4661                     }
4662
4663                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4664                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4665                             + rtx_cost (op1, PLUS, 1, speed));
4666
4667                   if (speed)
4668                     *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4669
4670                   return true;
4671                 }
4672
4673               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4674                         + rtx_cost (op1, PLUS, 1, speed));
4675             }
4676           return true;
4677         }
4678
4679       return false;
4680
4681     case IOR:
4682     case XOR:
4683     case AND:
4684     cost_logic:
4685       op0 = XEXP (x, 0);
4686       op1 = XEXP (x, 1);
4687
4688       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4689         {
4690           if (CONST_INT_P (op1)
4691               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4692             {
4693               *cost += rtx_cost (op0, AND, 0, speed);
4694             }
4695           else
4696             {
4697               if (GET_CODE (op0) == NOT)
4698                 op0 = XEXP (op0, 0);
4699               op0 = aarch64_strip_shift (op0);
4700               *cost += (rtx_cost (op0, AND, 0, speed)
4701                         + rtx_cost (op1, AND, 1, speed));
4702             }
4703           return true;
4704         }
4705       return false;
4706
4707     case ZERO_EXTEND:
4708       if ((GET_MODE (x) == DImode
4709            && GET_MODE (XEXP (x, 0)) == SImode)
4710           || GET_CODE (XEXP (x, 0)) == MEM)
4711         {
4712           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4713           return true;
4714         }
4715       return false;
4716
4717     case SIGN_EXTEND:
4718       if (GET_CODE (XEXP (x, 0)) == MEM)
4719         {
4720           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4721           return true;
4722         }
4723       return false;
4724
4725     case ROTATE:
4726       if (!CONST_INT_P (XEXP (x, 1)))
4727         *cost += COSTS_N_INSNS (2);
4728       /* Fall through.  */
4729     case ROTATERT:
4730     case LSHIFTRT:
4731     case ASHIFT:
4732     case ASHIFTRT:
4733
4734       /* Shifting by a register often takes an extra cycle.  */
4735       if (speed && !CONST_INT_P (XEXP (x, 1)))
4736         *cost += extra_cost->alu.arith_shift_reg;
4737
4738       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4739       return true;
4740
4741     case HIGH:
4742       if (!CONSTANT_P (XEXP (x, 0)))
4743         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4744       return true;
4745
4746     case LO_SUM:
4747       if (!CONSTANT_P (XEXP (x, 1)))
4748         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4749       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4750       return true;
4751
4752     case ZERO_EXTRACT:
4753     case SIGN_EXTRACT:
4754       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4755       return true;
4756
4757     case MULT:
4758       op0 = XEXP (x, 0);
4759       op1 = XEXP (x, 1);
4760
4761       *cost = COSTS_N_INSNS (1);
4762       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4763         {
4764           if (CONST_INT_P (op1)
4765               && exact_log2 (INTVAL (op1)) > 0)
4766             {
4767               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4768               return true;
4769             }
4770
4771           if ((GET_CODE (op0) == ZERO_EXTEND
4772                && GET_CODE (op1) == ZERO_EXTEND)
4773               || (GET_CODE (op0) == SIGN_EXTEND
4774                   && GET_CODE (op1) == SIGN_EXTEND))
4775             {
4776               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4777                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4778               if (speed)
4779                 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4780               return true;
4781             }
4782
4783           if (speed)
4784             *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4785         }
4786       else if (speed)
4787         {
4788           if (GET_MODE (x) == DFmode)
4789             *cost += extra_cost->fp[1].mult;
4790           else if (GET_MODE (x) == SFmode)
4791             *cost += extra_cost->fp[0].mult;
4792         }
4793
4794       return false;  /* All arguments need to be in registers.  */
4795
4796     case MOD:
4797     case UMOD:
4798       *cost = COSTS_N_INSNS (2);
4799       if (speed)
4800         {
4801           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4802             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4803                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4804           else if (GET_MODE (x) == DFmode)
4805             *cost += (extra_cost->fp[1].mult
4806                       + extra_cost->fp[1].div);
4807           else if (GET_MODE (x) == SFmode)
4808             *cost += (extra_cost->fp[0].mult
4809                       + extra_cost->fp[0].div);
4810         }
4811       return false;  /* All arguments need to be in registers.  */
4812
4813     case DIV:
4814     case UDIV:
4815       *cost = COSTS_N_INSNS (1);
4816       if (speed)
4817         {
4818           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4819             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4820           else if (GET_MODE (x) == DFmode)
4821             *cost += extra_cost->fp[1].div;
4822           else if (GET_MODE (x) == SFmode)
4823             *cost += extra_cost->fp[0].div;
4824         }
4825       return false;  /* All arguments need to be in registers.  */
4826
4827     default:
4828       break;
4829     }
4830   return false;
4831 }
4832
4833 static int
4834 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4835                   enum machine_mode mode ATTRIBUTE_UNUSED,
4836                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4837 {
4838   enum rtx_code c  = GET_CODE (x);
4839   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4840
4841   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4842     return addr_cost->pre_modify;
4843
4844   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4845     return addr_cost->post_modify;
4846
4847   if (c == PLUS)
4848     {
4849       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4850         return addr_cost->imm_offset;
4851       else if (GET_CODE (XEXP (x, 0)) == MULT
4852                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4853                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4854         return addr_cost->register_extend;
4855
4856       return addr_cost->register_offset;
4857     }
4858   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4859     return addr_cost->imm_offset;
4860
4861   return 0;
4862 }
4863
4864 static int
4865 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4866                             reg_class_t from, reg_class_t to)
4867 {
4868   const struct cpu_regmove_cost *regmove_cost
4869     = aarch64_tune_params->regmove_cost;
4870
4871   if (from == GENERAL_REGS && to == GENERAL_REGS)
4872     return regmove_cost->GP2GP;
4873   else if (from == GENERAL_REGS)
4874     return regmove_cost->GP2FP;
4875   else if (to == GENERAL_REGS)
4876     return regmove_cost->FP2GP;
4877
4878   /* When AdvSIMD instructions are disabled it is not possible to move
4879      a 128-bit value directly between Q registers.  This is handled in
4880      secondary reload.  A general register is used as a scratch to move
4881      the upper DI value and the lower DI value is moved directly,
4882      hence the cost is the sum of three moves. */
4883
4884   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4885     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4886
4887   return regmove_cost->FP2FP;
4888 }
4889
4890 static int
4891 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4892                           reg_class_t rclass ATTRIBUTE_UNUSED,
4893                           bool in ATTRIBUTE_UNUSED)
4894 {
4895   return aarch64_tune_params->memmov_cost;
4896 }
4897
4898 /* Vectorizer cost model target hooks.  */
4899
4900 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4901 static int
4902 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4903                                     tree vectype,
4904                                     int misalign ATTRIBUTE_UNUSED)
4905 {
4906   unsigned elements;
4907
4908   switch (type_of_cost)
4909     {
4910       case scalar_stmt:
4911         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4912
4913       case scalar_load:
4914         return aarch64_tune_params->vec_costs->scalar_load_cost;
4915
4916       case scalar_store:
4917         return aarch64_tune_params->vec_costs->scalar_store_cost;
4918
4919       case vector_stmt:
4920         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4921
4922       case vector_load:
4923         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4924
4925       case vector_store:
4926         return aarch64_tune_params->vec_costs->vec_store_cost;
4927
4928       case vec_to_scalar:
4929         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4930
4931       case scalar_to_vec:
4932         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4933
4934       case unaligned_load:
4935         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4936
4937       case unaligned_store:
4938         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4939
4940       case cond_branch_taken:
4941         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4942
4943       case cond_branch_not_taken:
4944         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4945
4946       case vec_perm:
4947       case vec_promote_demote:
4948         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4949
4950       case vec_construct:
4951         elements = TYPE_VECTOR_SUBPARTS (vectype);
4952         return elements / 2 + 1;
4953
4954       default:
4955         gcc_unreachable ();
4956     }
4957 }
4958
4959 /* Implement targetm.vectorize.add_stmt_cost.  */
4960 static unsigned
4961 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4962                        struct _stmt_vec_info *stmt_info, int misalign,
4963                        enum vect_cost_model_location where)
4964 {
4965   unsigned *cost = (unsigned *) data;
4966   unsigned retval = 0;
4967
4968   if (flag_vect_cost_model)
4969     {
4970       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4971       int stmt_cost =
4972             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4973
4974       /* Statements in an inner loop relative to the loop being
4975          vectorized are weighted more heavily.  The value here is
4976          a function (linear for now) of the loop nest level.  */
4977       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4978         {
4979           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4980           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4981           unsigned nest_level = loop_depth (loop);
4982
4983           count *= nest_level;
4984         }
4985
4986       retval = (unsigned) (count * stmt_cost);
4987       cost[where] += retval;
4988     }
4989
4990   return retval;
4991 }
4992
4993 static void initialize_aarch64_code_model (void);
4994
4995 /* Parse the architecture extension string.  */
4996
4997 static void
4998 aarch64_parse_extension (char *str)
4999 {
5000   /* The extension string is parsed left to right.  */
5001   const struct aarch64_option_extension *opt = NULL;
5002
5003   /* Flag to say whether we are adding or removing an extension.  */
5004   int adding_ext = -1;
5005
5006   while (str != NULL && *str != 0)
5007     {
5008       char *ext;
5009       size_t len;
5010
5011       str++;
5012       ext = strchr (str, '+');
5013
5014       if (ext != NULL)
5015         len = ext - str;
5016       else
5017         len = strlen (str);
5018
5019       if (len >= 2 && strncmp (str, "no", 2) == 0)
5020         {
5021           adding_ext = 0;
5022           len -= 2;
5023           str += 2;
5024         }
5025       else if (len > 0)
5026         adding_ext = 1;
5027
5028       if (len == 0)
5029         {
5030           error ("missing feature modifier after %qs", "+no");
5031           return;
5032         }
5033
5034       /* Scan over the extensions table trying to find an exact match.  */
5035       for (opt = all_extensions; opt->name != NULL; opt++)
5036         {
5037           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5038             {
5039               /* Add or remove the extension.  */
5040               if (adding_ext)
5041                 aarch64_isa_flags |= opt->flags_on;
5042               else
5043                 aarch64_isa_flags &= ~(opt->flags_off);
5044               break;
5045             }
5046         }
5047
5048       if (opt->name == NULL)
5049         {
5050           /* Extension not found in list.  */
5051           error ("unknown feature modifier %qs", str);
5052           return;
5053         }
5054
5055       str = ext;
5056     };
5057
5058   return;
5059 }
5060
5061 /* Parse the ARCH string.  */
5062
5063 static void
5064 aarch64_parse_arch (void)
5065 {
5066   char *ext;
5067   const struct processor *arch;
5068   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5069   size_t len;
5070
5071   strcpy (str, aarch64_arch_string);
5072
5073   ext = strchr (str, '+');
5074
5075   if (ext != NULL)
5076     len = ext - str;
5077   else
5078     len = strlen (str);
5079
5080   if (len == 0)
5081     {
5082       error ("missing arch name in -march=%qs", str);
5083       return;
5084     }
5085
5086   /* Loop through the list of supported ARCHs to find a match.  */
5087   for (arch = all_architectures; arch->name != NULL; arch++)
5088     {
5089       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5090         {
5091           selected_arch = arch;
5092           aarch64_isa_flags = selected_arch->flags;
5093           selected_cpu = &all_cores[selected_arch->core];
5094
5095           if (ext != NULL)
5096             {
5097               /* ARCH string contains at least one extension.  */
5098               aarch64_parse_extension (ext);
5099             }
5100
5101           return;
5102         }
5103     }
5104
5105   /* ARCH name not found in list.  */
5106   error ("unknown value %qs for -march", str);
5107   return;
5108 }
5109
5110 /* Parse the CPU string.  */
5111
5112 static void
5113 aarch64_parse_cpu (void)
5114 {
5115   char *ext;
5116   const struct processor *cpu;
5117   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5118   size_t len;
5119
5120   strcpy (str, aarch64_cpu_string);
5121
5122   ext = strchr (str, '+');
5123
5124   if (ext != NULL)
5125     len = ext - str;
5126   else
5127     len = strlen (str);
5128
5129   if (len == 0)
5130     {
5131       error ("missing cpu name in -mcpu=%qs", str);
5132       return;
5133     }
5134
5135   /* Loop through the list of supported CPUs to find a match.  */
5136   for (cpu = all_cores; cpu->name != NULL; cpu++)
5137     {
5138       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5139         {
5140           selected_cpu = cpu;
5141           selected_tune = cpu;
5142           aarch64_isa_flags = selected_cpu->flags;
5143
5144           if (ext != NULL)
5145             {
5146               /* CPU string contains at least one extension.  */
5147               aarch64_parse_extension (ext);
5148             }
5149
5150           return;
5151         }
5152     }
5153
5154   /* CPU name not found in list.  */
5155   error ("unknown value %qs for -mcpu", str);
5156   return;
5157 }
5158
5159 /* Parse the TUNE string.  */
5160
5161 static void
5162 aarch64_parse_tune (void)
5163 {
5164   const struct processor *cpu;
5165   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5166   strcpy (str, aarch64_tune_string);
5167
5168   /* Loop through the list of supported CPUs to find a match.  */
5169   for (cpu = all_cores; cpu->name != NULL; cpu++)
5170     {
5171       if (strcmp (cpu->name, str) == 0)
5172         {
5173           selected_tune = cpu;
5174           return;
5175         }
5176     }
5177
5178   /* CPU name not found in list.  */
5179   error ("unknown value %qs for -mtune", str);
5180   return;
5181 }
5182
5183
5184 /* Implement TARGET_OPTION_OVERRIDE.  */
5185
5186 static void
5187 aarch64_override_options (void)
5188 {
5189   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5190      otherwise march remains undefined.  mtune can be used with either march or
5191      mcpu.  */
5192
5193   if (aarch64_arch_string)
5194     {
5195       aarch64_parse_arch ();
5196       aarch64_cpu_string = NULL;
5197     }
5198
5199   if (aarch64_cpu_string)
5200     {
5201       aarch64_parse_cpu ();
5202       selected_arch = NULL;
5203     }
5204
5205   if (aarch64_tune_string)
5206     {
5207       aarch64_parse_tune ();
5208     }
5209
5210 #ifndef HAVE_AS_MABI_OPTION
5211   /* The compiler may have been configured with 2.23.* binutils, which does
5212      not have support for ILP32.  */
5213   if (TARGET_ILP32)
5214     error ("Assembler does not support -mabi=ilp32");
5215 #endif
5216
5217   initialize_aarch64_code_model ();
5218
5219   aarch64_build_bitmask_table ();
5220
5221   /* This target defaults to strict volatile bitfields.  */
5222   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5223     flag_strict_volatile_bitfields = 1;
5224
5225   /* If the user did not specify a processor, choose the default
5226      one for them.  This will be the CPU set during configuration using
5227      --with-cpu, otherwise it is "coretex-a53".  */
5228   if (!selected_cpu)
5229     {
5230       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5231       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5232     }
5233
5234   gcc_assert (selected_cpu);
5235
5236   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5237   if (!selected_tune)
5238     selected_tune = &all_cores[selected_cpu->core];
5239
5240   aarch64_tune_flags = selected_tune->flags;
5241   aarch64_tune = selected_tune->core;
5242   aarch64_tune_params = selected_tune->tune;
5243
5244   aarch64_override_options_after_change ();
5245 }
5246
5247 /* Implement targetm.override_options_after_change.  */
5248
5249 static void
5250 aarch64_override_options_after_change (void)
5251 {
5252   faked_omit_frame_pointer = false;
5253
5254   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5255      that aarch64_frame_pointer_required will be called.  We need to remember
5256      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5257
5258   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5259     {
5260       flag_omit_frame_pointer = true;
5261       faked_omit_frame_pointer = true;
5262     }
5263 }
5264
5265 static struct machine_function *
5266 aarch64_init_machine_status (void)
5267 {
5268   struct machine_function *machine;
5269   machine = ggc_alloc_cleared_machine_function ();
5270   return machine;
5271 }
5272
5273 void
5274 aarch64_init_expanders (void)
5275 {
5276   init_machine_status = aarch64_init_machine_status;
5277 }
5278
5279 /* A checking mechanism for the implementation of the various code models.  */
5280 static void
5281 initialize_aarch64_code_model (void)
5282 {
5283    if (flag_pic)
5284      {
5285        switch (aarch64_cmodel_var)
5286          {
5287          case AARCH64_CMODEL_TINY:
5288            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5289            break;
5290          case AARCH64_CMODEL_SMALL:
5291            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5292            break;
5293          case AARCH64_CMODEL_LARGE:
5294            sorry ("code model %qs with -f%s", "large",
5295                   flag_pic > 1 ? "PIC" : "pic");
5296          default:
5297            gcc_unreachable ();
5298          }
5299      }
5300    else
5301      aarch64_cmodel = aarch64_cmodel_var;
5302 }
5303
5304 /* Return true if SYMBOL_REF X binds locally.  */
5305
5306 static bool
5307 aarch64_symbol_binds_local_p (const_rtx x)
5308 {
5309   return (SYMBOL_REF_DECL (x)
5310           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5311           : SYMBOL_REF_LOCAL_P (x));
5312 }
5313
5314 /* Return true if SYMBOL_REF X is thread local */
5315 static bool
5316 aarch64_tls_symbol_p (rtx x)
5317 {
5318   if (! TARGET_HAVE_TLS)
5319     return false;
5320
5321   if (GET_CODE (x) != SYMBOL_REF)
5322     return false;
5323
5324   return SYMBOL_REF_TLS_MODEL (x) != 0;
5325 }
5326
5327 /* Classify a TLS symbol into one of the TLS kinds.  */
5328 enum aarch64_symbol_type
5329 aarch64_classify_tls_symbol (rtx x)
5330 {
5331   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5332
5333   switch (tls_kind)
5334     {
5335     case TLS_MODEL_GLOBAL_DYNAMIC:
5336     case TLS_MODEL_LOCAL_DYNAMIC:
5337       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5338
5339     case TLS_MODEL_INITIAL_EXEC:
5340       return SYMBOL_SMALL_GOTTPREL;
5341
5342     case TLS_MODEL_LOCAL_EXEC:
5343       return SYMBOL_SMALL_TPREL;
5344
5345     case TLS_MODEL_EMULATED:
5346     case TLS_MODEL_NONE:
5347       return SYMBOL_FORCE_TO_MEM;
5348
5349     default:
5350       gcc_unreachable ();
5351     }
5352 }
5353
5354 /* Return the method that should be used to access SYMBOL_REF or
5355    LABEL_REF X in context CONTEXT.  */
5356
5357 enum aarch64_symbol_type
5358 aarch64_classify_symbol (rtx x,
5359                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5360 {
5361   if (GET_CODE (x) == LABEL_REF)
5362     {
5363       switch (aarch64_cmodel)
5364         {
5365         case AARCH64_CMODEL_LARGE:
5366           return SYMBOL_FORCE_TO_MEM;
5367
5368         case AARCH64_CMODEL_TINY_PIC:
5369         case AARCH64_CMODEL_TINY:
5370           return SYMBOL_TINY_ABSOLUTE;
5371
5372         case AARCH64_CMODEL_SMALL_PIC:
5373         case AARCH64_CMODEL_SMALL:
5374           return SYMBOL_SMALL_ABSOLUTE;
5375
5376         default:
5377           gcc_unreachable ();
5378         }
5379     }
5380
5381   if (GET_CODE (x) == SYMBOL_REF)
5382     {
5383       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5384           || CONSTANT_POOL_ADDRESS_P (x))
5385         return SYMBOL_FORCE_TO_MEM;
5386
5387       if (aarch64_tls_symbol_p (x))
5388         return aarch64_classify_tls_symbol (x);
5389
5390       switch (aarch64_cmodel)
5391         {
5392         case AARCH64_CMODEL_TINY:
5393           if (SYMBOL_REF_WEAK (x))
5394             return SYMBOL_FORCE_TO_MEM;
5395           return SYMBOL_TINY_ABSOLUTE;
5396
5397         case AARCH64_CMODEL_SMALL:
5398           if (SYMBOL_REF_WEAK (x))
5399             return SYMBOL_FORCE_TO_MEM;
5400           return SYMBOL_SMALL_ABSOLUTE;
5401
5402         case AARCH64_CMODEL_TINY_PIC:
5403           if (!aarch64_symbol_binds_local_p (x))
5404             return SYMBOL_TINY_GOT;
5405           return SYMBOL_TINY_ABSOLUTE;
5406
5407         case AARCH64_CMODEL_SMALL_PIC:
5408           if (!aarch64_symbol_binds_local_p (x))
5409             return SYMBOL_SMALL_GOT;
5410           return SYMBOL_SMALL_ABSOLUTE;
5411
5412         default:
5413           gcc_unreachable ();
5414         }
5415     }
5416
5417   /* By default push everything into the constant pool.  */
5418   return SYMBOL_FORCE_TO_MEM;
5419 }
5420
5421 bool
5422 aarch64_constant_address_p (rtx x)
5423 {
5424   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5425 }
5426
5427 bool
5428 aarch64_legitimate_pic_operand_p (rtx x)
5429 {
5430   if (GET_CODE (x) == SYMBOL_REF
5431       || (GET_CODE (x) == CONST
5432           && GET_CODE (XEXP (x, 0)) == PLUS
5433           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5434      return false;
5435
5436   return true;
5437 }
5438
5439 /* Return true if X holds either a quarter-precision or
5440      floating-point +0.0 constant.  */
5441 static bool
5442 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5443 {
5444   if (!CONST_DOUBLE_P (x))
5445     return false;
5446
5447   /* TODO: We could handle moving 0.0 to a TFmode register,
5448      but first we would like to refactor the movtf_aarch64
5449      to be more amicable to split moves properly and
5450      correctly gate on TARGET_SIMD.  For now - reject all
5451      constants which are not to SFmode or DFmode registers.  */
5452   if (!(mode == SFmode || mode == DFmode))
5453     return false;
5454
5455   if (aarch64_float_const_zero_rtx_p (x))
5456     return true;
5457   return aarch64_float_const_representable_p (x);
5458 }
5459
5460 static bool
5461 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5462 {
5463   /* Do not allow vector struct mode constants.  We could support
5464      0 and -1 easily, but they need support in aarch64-simd.md.  */
5465   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5466     return false;
5467
5468   /* This could probably go away because
5469      we now decompose CONST_INTs according to expand_mov_immediate.  */
5470   if ((GET_CODE (x) == CONST_VECTOR
5471        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5472       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5473         return !targetm.cannot_force_const_mem (mode, x);
5474
5475   if (GET_CODE (x) == HIGH
5476       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5477     return true;
5478
5479   return aarch64_constant_address_p (x);
5480 }
5481
5482 rtx
5483 aarch64_load_tp (rtx target)
5484 {
5485   if (!target
5486       || GET_MODE (target) != Pmode
5487       || !register_operand (target, Pmode))
5488     target = gen_reg_rtx (Pmode);
5489
5490   /* Can return in any reg.  */
5491   emit_insn (gen_aarch64_load_tp_hard (target));
5492   return target;
5493 }
5494
5495 /* On AAPCS systems, this is the "struct __va_list".  */
5496 static GTY(()) tree va_list_type;
5497
5498 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5499    Return the type to use as __builtin_va_list.
5500
5501    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5502
5503    struct __va_list
5504    {
5505      void *__stack;
5506      void *__gr_top;
5507      void *__vr_top;
5508      int   __gr_offs;
5509      int   __vr_offs;
5510    };  */
5511
5512 static tree
5513 aarch64_build_builtin_va_list (void)
5514 {
5515   tree va_list_name;
5516   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5517
5518   /* Create the type.  */
5519   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5520   /* Give it the required name.  */
5521   va_list_name = build_decl (BUILTINS_LOCATION,
5522                              TYPE_DECL,
5523                              get_identifier ("__va_list"),
5524                              va_list_type);
5525   DECL_ARTIFICIAL (va_list_name) = 1;
5526   TYPE_NAME (va_list_type) = va_list_name;
5527   TYPE_STUB_DECL (va_list_type) = va_list_name;
5528
5529   /* Create the fields.  */
5530   f_stack = build_decl (BUILTINS_LOCATION,
5531                         FIELD_DECL, get_identifier ("__stack"),
5532                         ptr_type_node);
5533   f_grtop = build_decl (BUILTINS_LOCATION,
5534                         FIELD_DECL, get_identifier ("__gr_top"),
5535                         ptr_type_node);
5536   f_vrtop = build_decl (BUILTINS_LOCATION,
5537                         FIELD_DECL, get_identifier ("__vr_top"),
5538                         ptr_type_node);
5539   f_groff = build_decl (BUILTINS_LOCATION,
5540                         FIELD_DECL, get_identifier ("__gr_offs"),
5541                         integer_type_node);
5542   f_vroff = build_decl (BUILTINS_LOCATION,
5543                         FIELD_DECL, get_identifier ("__vr_offs"),
5544                         integer_type_node);
5545
5546   DECL_ARTIFICIAL (f_stack) = 1;
5547   DECL_ARTIFICIAL (f_grtop) = 1;
5548   DECL_ARTIFICIAL (f_vrtop) = 1;
5549   DECL_ARTIFICIAL (f_groff) = 1;
5550   DECL_ARTIFICIAL (f_vroff) = 1;
5551
5552   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5553   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5554   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5555   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5556   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5557
5558   TYPE_FIELDS (va_list_type) = f_stack;
5559   DECL_CHAIN (f_stack) = f_grtop;
5560   DECL_CHAIN (f_grtop) = f_vrtop;
5561   DECL_CHAIN (f_vrtop) = f_groff;
5562   DECL_CHAIN (f_groff) = f_vroff;
5563
5564   /* Compute its layout.  */
5565   layout_type (va_list_type);
5566
5567   return va_list_type;
5568 }
5569
5570 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5571 static void
5572 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5573 {
5574   const CUMULATIVE_ARGS *cum;
5575   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576   tree stack, grtop, vrtop, groff, vroff;
5577   tree t;
5578   int gr_save_area_size;
5579   int vr_save_area_size;
5580   int vr_offset;
5581
5582   cum = &crtl->args.info;
5583   gr_save_area_size
5584     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5585   vr_save_area_size
5586     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5587
5588   if (TARGET_GENERAL_REGS_ONLY)
5589     {
5590       if (cum->aapcs_nvrn > 0)
5591         sorry ("%qs and floating point or vector arguments",
5592                "-mgeneral-regs-only");
5593       vr_save_area_size = 0;
5594     }
5595
5596   f_stack = TYPE_FIELDS (va_list_type_node);
5597   f_grtop = DECL_CHAIN (f_stack);
5598   f_vrtop = DECL_CHAIN (f_grtop);
5599   f_groff = DECL_CHAIN (f_vrtop);
5600   f_vroff = DECL_CHAIN (f_groff);
5601
5602   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5603                   NULL_TREE);
5604   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5605                   NULL_TREE);
5606   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5607                   NULL_TREE);
5608   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5609                   NULL_TREE);
5610   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5611                   NULL_TREE);
5612
5613   /* Emit code to initialize STACK, which points to the next varargs stack
5614      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5615      by named arguments.  STACK is 8-byte aligned.  */
5616   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5617   if (cum->aapcs_stack_size > 0)
5618     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5619   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5620   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5621
5622   /* Emit code to initialize GRTOP, the top of the GR save area.
5623      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5624   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5625   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5626   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5627
5628   /* Emit code to initialize VRTOP, the top of the VR save area.
5629      This address is gr_save_area_bytes below GRTOP, rounded
5630      down to the next 16-byte boundary.  */
5631   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5632   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5633                              STACK_BOUNDARY / BITS_PER_UNIT);
5634
5635   if (vr_offset)
5636     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5637   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5638   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5639
5640   /* Emit code to initialize GROFF, the offset from GRTOP of the
5641      next GPR argument.  */
5642   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5643               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5644   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5645
5646   /* Likewise emit code to initialize VROFF, the offset from FTOP
5647      of the next VR argument.  */
5648   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5649               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5650   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5651 }
5652
5653 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5654
5655 static tree
5656 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5657                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5658 {
5659   tree addr;
5660   bool indirect_p;
5661   bool is_ha;           /* is HFA or HVA.  */
5662   bool dw_align;        /* double-word align.  */
5663   enum machine_mode ag_mode = VOIDmode;
5664   int nregs;
5665   enum machine_mode mode;
5666
5667   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5668   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5669   HOST_WIDE_INT size, rsize, adjust, align;
5670   tree t, u, cond1, cond2;
5671
5672   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5673   if (indirect_p)
5674     type = build_pointer_type (type);
5675
5676   mode = TYPE_MODE (type);
5677
5678   f_stack = TYPE_FIELDS (va_list_type_node);
5679   f_grtop = DECL_CHAIN (f_stack);
5680   f_vrtop = DECL_CHAIN (f_grtop);
5681   f_groff = DECL_CHAIN (f_vrtop);
5682   f_vroff = DECL_CHAIN (f_groff);
5683
5684   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5685                   f_stack, NULL_TREE);
5686   size = int_size_in_bytes (type);
5687   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5688
5689   dw_align = false;
5690   adjust = 0;
5691   if (aarch64_vfp_is_call_or_return_candidate (mode,
5692                                                type,
5693                                                &ag_mode,
5694                                                &nregs,
5695                                                &is_ha))
5696     {
5697       /* TYPE passed in fp/simd registers.  */
5698       if (TARGET_GENERAL_REGS_ONLY)
5699         sorry ("%qs and floating point or vector arguments",
5700                "-mgeneral-regs-only");
5701
5702       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5703                       unshare_expr (valist), f_vrtop, NULL_TREE);
5704       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5705                       unshare_expr (valist), f_vroff, NULL_TREE);
5706
5707       rsize = nregs * UNITS_PER_VREG;
5708
5709       if (is_ha)
5710         {
5711           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5712             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5713         }
5714       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5715                && size < UNITS_PER_VREG)
5716         {
5717           adjust = UNITS_PER_VREG - size;
5718         }
5719     }
5720   else
5721     {
5722       /* TYPE passed in general registers.  */
5723       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5724                       unshare_expr (valist), f_grtop, NULL_TREE);
5725       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5726                       unshare_expr (valist), f_groff, NULL_TREE);
5727       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5728       nregs = rsize / UNITS_PER_WORD;
5729
5730       if (align > 8)
5731         dw_align = true;
5732
5733       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5734           && size < UNITS_PER_WORD)
5735         {
5736           adjust = UNITS_PER_WORD  - size;
5737         }
5738     }
5739
5740   /* Get a local temporary for the field value.  */
5741   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5742
5743   /* Emit code to branch if off >= 0.  */
5744   t = build2 (GE_EXPR, boolean_type_node, off,
5745               build_int_cst (TREE_TYPE (off), 0));
5746   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5747
5748   if (dw_align)
5749     {
5750       /* Emit: offs = (offs + 15) & -16.  */
5751       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5752                   build_int_cst (TREE_TYPE (off), 15));
5753       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5754                   build_int_cst (TREE_TYPE (off), -16));
5755       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5756     }
5757   else
5758     roundup = NULL;
5759
5760   /* Update ap.__[g|v]r_offs  */
5761   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5762               build_int_cst (TREE_TYPE (off), rsize));
5763   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5764
5765   /* String up.  */
5766   if (roundup)
5767     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5768
5769   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5770   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5771               build_int_cst (TREE_TYPE (f_off), 0));
5772   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5773
5774   /* String up: make sure the assignment happens before the use.  */
5775   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5776   COND_EXPR_ELSE (cond1) = t;
5777
5778   /* Prepare the trees handling the argument that is passed on the stack;
5779      the top level node will store in ON_STACK.  */
5780   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5781   if (align > 8)
5782     {
5783       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5784       t = fold_convert (intDI_type_node, arg);
5785       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5786                   build_int_cst (TREE_TYPE (t), 15));
5787       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5788                   build_int_cst (TREE_TYPE (t), -16));
5789       t = fold_convert (TREE_TYPE (arg), t);
5790       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5791     }
5792   else
5793     roundup = NULL;
5794   /* Advance ap.__stack  */
5795   t = fold_convert (intDI_type_node, arg);
5796   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5797               build_int_cst (TREE_TYPE (t), size + 7));
5798   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5799               build_int_cst (TREE_TYPE (t), -8));
5800   t = fold_convert (TREE_TYPE (arg), t);
5801   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5802   /* String up roundup and advance.  */
5803   if (roundup)
5804     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5805   /* String up with arg */
5806   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5807   /* Big-endianness related address adjustment.  */
5808   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5809       && size < UNITS_PER_WORD)
5810   {
5811     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5812                 size_int (UNITS_PER_WORD - size));
5813     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5814   }
5815
5816   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5817   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5818
5819   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5820   t = off;
5821   if (adjust)
5822     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5823                 build_int_cst (TREE_TYPE (off), adjust));
5824
5825   t = fold_convert (sizetype, t);
5826   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5827
5828   if (is_ha)
5829     {
5830       /* type ha; // treat as "struct {ftype field[n];}"
5831          ... [computing offs]
5832          for (i = 0; i <nregs; ++i, offs += 16)
5833            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5834          return ha;  */
5835       int i;
5836       tree tmp_ha, field_t, field_ptr_t;
5837
5838       /* Declare a local variable.  */
5839       tmp_ha = create_tmp_var_raw (type, "ha");
5840       gimple_add_tmp_var (tmp_ha);
5841
5842       /* Establish the base type.  */
5843       switch (ag_mode)
5844         {
5845         case SFmode:
5846           field_t = float_type_node;
5847           field_ptr_t = float_ptr_type_node;
5848           break;
5849         case DFmode:
5850           field_t = double_type_node;
5851           field_ptr_t = double_ptr_type_node;
5852           break;
5853         case TFmode:
5854           field_t = long_double_type_node;
5855           field_ptr_t = long_double_ptr_type_node;
5856           break;
5857 /* The half precision and quad precision are not fully supported yet.  Enable
5858    the following code after the support is complete.  Need to find the correct
5859    type node for __fp16 *.  */
5860 #if 0
5861         case HFmode:
5862           field_t = float_type_node;
5863           field_ptr_t = float_ptr_type_node;
5864           break;
5865 #endif
5866         case V2SImode:
5867         case V4SImode:
5868             {
5869               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5870               field_t = build_vector_type_for_mode (innertype, ag_mode);
5871               field_ptr_t = build_pointer_type (field_t);
5872             }
5873           break;
5874         default:
5875           gcc_assert (0);
5876         }
5877
5878       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5879       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5880       addr = t;
5881       t = fold_convert (field_ptr_t, addr);
5882       t = build2 (MODIFY_EXPR, field_t,
5883                   build1 (INDIRECT_REF, field_t, tmp_ha),
5884                   build1 (INDIRECT_REF, field_t, t));
5885
5886       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5887       for (i = 1; i < nregs; ++i)
5888         {
5889           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5890           u = fold_convert (field_ptr_t, addr);
5891           u = build2 (MODIFY_EXPR, field_t,
5892                       build2 (MEM_REF, field_t, tmp_ha,
5893                               build_int_cst (field_ptr_t,
5894                                              (i *
5895                                               int_size_in_bytes (field_t)))),
5896                       build1 (INDIRECT_REF, field_t, u));
5897           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5898         }
5899
5900       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5901       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5902     }
5903
5904   COND_EXPR_ELSE (cond2) = t;
5905   addr = fold_convert (build_pointer_type (type), cond1);
5906   addr = build_va_arg_indirect_ref (addr);
5907
5908   if (indirect_p)
5909     addr = build_va_arg_indirect_ref (addr);
5910
5911   return addr;
5912 }
5913
5914 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5915
5916 static void
5917 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5918                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5919                                 int no_rtl)
5920 {
5921   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5922   CUMULATIVE_ARGS local_cum;
5923   int gr_saved, vr_saved;
5924
5925   /* The caller has advanced CUM up to, but not beyond, the last named
5926      argument.  Advance a local copy of CUM past the last "real" named
5927      argument, to find out how many registers are left over.  */
5928   local_cum = *cum;
5929   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5930
5931   /* Found out how many registers we need to save.  */
5932   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5933   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5934
5935   if (TARGET_GENERAL_REGS_ONLY)
5936     {
5937       if (local_cum.aapcs_nvrn > 0)
5938         sorry ("%qs and floating point or vector arguments",
5939                "-mgeneral-regs-only");
5940       vr_saved = 0;
5941     }
5942
5943   if (!no_rtl)
5944     {
5945       if (gr_saved > 0)
5946         {
5947           rtx ptr, mem;
5948
5949           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5950           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5951                                - gr_saved * UNITS_PER_WORD);
5952           mem = gen_frame_mem (BLKmode, ptr);
5953           set_mem_alias_set (mem, get_varargs_alias_set ());
5954
5955           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5956                                mem, gr_saved);
5957         }
5958       if (vr_saved > 0)
5959         {
5960           /* We can't use move_block_from_reg, because it will use
5961              the wrong mode, storing D regs only.  */
5962           enum machine_mode mode = TImode;
5963           int off, i;
5964
5965           /* Set OFF to the offset from virtual_incoming_args_rtx of
5966              the first vector register.  The VR save area lies below
5967              the GR one, and is aligned to 16 bytes.  */
5968           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5969                                    STACK_BOUNDARY / BITS_PER_UNIT);
5970           off -= vr_saved * UNITS_PER_VREG;
5971
5972           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5973             {
5974               rtx ptr, mem;
5975
5976               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5977               mem = gen_frame_mem (mode, ptr);
5978               set_mem_alias_set (mem, get_varargs_alias_set ());
5979               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5980               off += UNITS_PER_VREG;
5981             }
5982         }
5983     }
5984
5985   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5986      any complication of having crtl->args.pretend_args_size changed.  */
5987   cfun->machine->saved_varargs_size
5988     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5989                       STACK_BOUNDARY / BITS_PER_UNIT)
5990        + vr_saved * UNITS_PER_VREG);
5991 }
5992
5993 static void
5994 aarch64_conditional_register_usage (void)
5995 {
5996   int i;
5997   if (!TARGET_FLOAT)
5998     {
5999       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6000         {
6001           fixed_regs[i] = 1;
6002           call_used_regs[i] = 1;
6003         }
6004     }
6005 }
6006
6007 /* Walk down the type tree of TYPE counting consecutive base elements.
6008    If *MODEP is VOIDmode, then set it to the first valid floating point
6009    type.  If a non-floating point type is found, or if a floating point
6010    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6011    otherwise return the count in the sub-tree.  */
6012 static int
6013 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6014 {
6015   enum machine_mode mode;
6016   HOST_WIDE_INT size;
6017
6018   switch (TREE_CODE (type))
6019     {
6020     case REAL_TYPE:
6021       mode = TYPE_MODE (type);
6022       if (mode != DFmode && mode != SFmode && mode != TFmode)
6023         return -1;
6024
6025       if (*modep == VOIDmode)
6026         *modep = mode;
6027
6028       if (*modep == mode)
6029         return 1;
6030
6031       break;
6032
6033     case COMPLEX_TYPE:
6034       mode = TYPE_MODE (TREE_TYPE (type));
6035       if (mode != DFmode && mode != SFmode && mode != TFmode)
6036         return -1;
6037
6038       if (*modep == VOIDmode)
6039         *modep = mode;
6040
6041       if (*modep == mode)
6042         return 2;
6043
6044       break;
6045
6046     case VECTOR_TYPE:
6047       /* Use V2SImode and V4SImode as representatives of all 64-bit
6048          and 128-bit vector types.  */
6049       size = int_size_in_bytes (type);
6050       switch (size)
6051         {
6052         case 8:
6053           mode = V2SImode;
6054           break;
6055         case 16:
6056           mode = V4SImode;
6057           break;
6058         default:
6059           return -1;
6060         }
6061
6062       if (*modep == VOIDmode)
6063         *modep = mode;
6064
6065       /* Vector modes are considered to be opaque: two vectors are
6066          equivalent for the purposes of being homogeneous aggregates
6067          if they are the same size.  */
6068       if (*modep == mode)
6069         return 1;
6070
6071       break;
6072
6073     case ARRAY_TYPE:
6074       {
6075         int count;
6076         tree index = TYPE_DOMAIN (type);
6077
6078         /* Can't handle incomplete types.  */
6079         if (!COMPLETE_TYPE_P (type))
6080           return -1;
6081
6082         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6083         if (count == -1
6084             || !index
6085             || !TYPE_MAX_VALUE (index)
6086             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6087             || !TYPE_MIN_VALUE (index)
6088             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6089             || count < 0)
6090           return -1;
6091
6092         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6093                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6094
6095         /* There must be no padding.  */
6096         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6097             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6098                 != count * GET_MODE_BITSIZE (*modep)))
6099           return -1;
6100
6101         return count;
6102       }
6103
6104     case RECORD_TYPE:
6105       {
6106         int count = 0;
6107         int sub_count;
6108         tree field;
6109
6110         /* Can't handle incomplete types.  */
6111         if (!COMPLETE_TYPE_P (type))
6112           return -1;
6113
6114         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6115           {
6116             if (TREE_CODE (field) != FIELD_DECL)
6117               continue;
6118
6119             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6120             if (sub_count < 0)
6121               return -1;
6122             count += sub_count;
6123           }
6124
6125         /* There must be no padding.  */
6126         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6127             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6128                 != count * GET_MODE_BITSIZE (*modep)))
6129           return -1;
6130
6131         return count;
6132       }
6133
6134     case UNION_TYPE:
6135     case QUAL_UNION_TYPE:
6136       {
6137         /* These aren't very interesting except in a degenerate case.  */
6138         int count = 0;
6139         int sub_count;
6140         tree field;
6141
6142         /* Can't handle incomplete types.  */
6143         if (!COMPLETE_TYPE_P (type))
6144           return -1;
6145
6146         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6147           {
6148             if (TREE_CODE (field) != FIELD_DECL)
6149               continue;
6150
6151             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6152             if (sub_count < 0)
6153               return -1;
6154             count = count > sub_count ? count : sub_count;
6155           }
6156
6157         /* There must be no padding.  */
6158         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6159             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6160                 != count * GET_MODE_BITSIZE (*modep)))
6161           return -1;
6162
6163         return count;
6164       }
6165
6166     default:
6167       break;
6168     }
6169
6170   return -1;
6171 }
6172
6173 /* Return true if we use LRA instead of reload pass.  */
6174 static bool
6175 aarch64_lra_p (void)
6176 {
6177   return aarch64_lra_flag;
6178 }
6179
6180 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6181    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6182    array types.  The C99 floating-point complex types are also considered
6183    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6184    types, which are GCC extensions and out of the scope of AAPCS64, are
6185    treated as composite types here as well.
6186
6187    Note that MODE itself is not sufficient in determining whether a type
6188    is such a composite type or not.  This is because
6189    stor-layout.c:compute_record_mode may have already changed the MODE
6190    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6191    structure with only one field may have its MODE set to the mode of the
6192    field.  Also an integer mode whose size matches the size of the
6193    RECORD_TYPE type may be used to substitute the original mode
6194    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6195    solely relied on.  */
6196
6197 static bool
6198 aarch64_composite_type_p (const_tree type,
6199                           enum machine_mode mode)
6200 {
6201   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6202     return true;
6203
6204   if (mode == BLKmode
6205       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6206       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6207     return true;
6208
6209   return false;
6210 }
6211
6212 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6213    type as described in AAPCS64 \S 4.1.2.
6214
6215    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6216
6217 static bool
6218 aarch64_short_vector_p (const_tree type,
6219                         enum machine_mode mode)
6220 {
6221   HOST_WIDE_INT size = -1;
6222
6223   if (type && TREE_CODE (type) == VECTOR_TYPE)
6224     size = int_size_in_bytes (type);
6225   else if (!aarch64_composite_type_p (type, mode)
6226            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6227                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6228     size = GET_MODE_SIZE (mode);
6229
6230   return (size == 8 || size == 16) ? true : false;
6231 }
6232
6233 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6234    shall be passed or returned in simd/fp register(s) (providing these
6235    parameter passing registers are available).
6236
6237    Upon successful return, *COUNT returns the number of needed registers,
6238    *BASE_MODE returns the mode of the individual register and when IS_HAF
6239    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6240    floating-point aggregate or a homogeneous short-vector aggregate.  */
6241
6242 static bool
6243 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6244                                          const_tree type,
6245                                          enum machine_mode *base_mode,
6246                                          int *count,
6247                                          bool *is_ha)
6248 {
6249   enum machine_mode new_mode = VOIDmode;
6250   bool composite_p = aarch64_composite_type_p (type, mode);
6251
6252   if (is_ha != NULL) *is_ha = false;
6253
6254   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6255       || aarch64_short_vector_p (type, mode))
6256     {
6257       *count = 1;
6258       new_mode = mode;
6259     }
6260   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6261     {
6262       if (is_ha != NULL) *is_ha = true;
6263       *count = 2;
6264       new_mode = GET_MODE_INNER (mode);
6265     }
6266   else if (type && composite_p)
6267     {
6268       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6269
6270       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6271         {
6272           if (is_ha != NULL) *is_ha = true;
6273           *count = ag_count;
6274         }
6275       else
6276         return false;
6277     }
6278   else
6279     return false;
6280
6281   *base_mode = new_mode;
6282   return true;
6283 }
6284
6285 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6286
6287 static rtx
6288 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6289                           int incoming ATTRIBUTE_UNUSED)
6290 {
6291   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6292 }
6293
6294 /* Implements target hook vector_mode_supported_p.  */
6295 static bool
6296 aarch64_vector_mode_supported_p (enum machine_mode mode)
6297 {
6298   if (TARGET_SIMD
6299       && (mode == V4SImode  || mode == V8HImode
6300           || mode == V16QImode || mode == V2DImode
6301           || mode == V2SImode  || mode == V4HImode
6302           || mode == V8QImode || mode == V2SFmode
6303           || mode == V4SFmode || mode == V2DFmode))
6304     return true;
6305
6306   return false;
6307 }
6308
6309 /* Return appropriate SIMD container
6310    for MODE within a vector of WIDTH bits.  */
6311 static enum machine_mode
6312 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6313 {
6314   gcc_assert (width == 64 || width == 128);
6315   if (TARGET_SIMD)
6316     {
6317       if (width == 128)
6318         switch (mode)
6319           {
6320           case DFmode:
6321             return V2DFmode;
6322           case SFmode:
6323             return V4SFmode;
6324           case SImode:
6325             return V4SImode;
6326           case HImode:
6327             return V8HImode;
6328           case QImode:
6329             return V16QImode;
6330           case DImode:
6331             return V2DImode;
6332           default:
6333             break;
6334           }
6335       else
6336         switch (mode)
6337           {
6338           case SFmode:
6339             return V2SFmode;
6340           case SImode:
6341             return V2SImode;
6342           case HImode:
6343             return V4HImode;
6344           case QImode:
6345             return V8QImode;
6346           default:
6347             break;
6348           }
6349     }
6350   return word_mode;
6351 }
6352
6353 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6354 static enum machine_mode
6355 aarch64_preferred_simd_mode (enum machine_mode mode)
6356 {
6357   return aarch64_simd_container_mode (mode, 128);
6358 }
6359
6360 /* Return the bitmask of possible vector sizes for the vectorizer
6361    to iterate over.  */
6362 static unsigned int
6363 aarch64_autovectorize_vector_sizes (void)
6364 {
6365   return (16 | 8);
6366 }
6367
6368 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6369    vector types in order to conform to the AAPCS64 (see "Procedure
6370    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6371    qualify for emission with the mangled names defined in that document,
6372    a vector type must not only be of the correct mode but also be
6373    composed of AdvSIMD vector element types (e.g.
6374    _builtin_aarch64_simd_qi); these types are registered by
6375    aarch64_init_simd_builtins ().  In other words, vector types defined
6376    in other ways e.g. via vector_size attribute will get default
6377    mangled names.  */
6378 typedef struct
6379 {
6380   enum machine_mode mode;
6381   const char *element_type_name;
6382   const char *mangled_name;
6383 } aarch64_simd_mangle_map_entry;
6384
6385 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6386   /* 64-bit containerized types.  */
6387   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6388   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6389   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6390   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6391   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6392   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6393   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6394   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6395   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6396   /* 128-bit containerized types.  */
6397   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6398   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6399   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6400   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6401   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6402   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6403   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6404   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6405   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6406   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6407   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6408   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6409   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6410   { VOIDmode, NULL, NULL }
6411 };
6412
6413 /* Implement TARGET_MANGLE_TYPE.  */
6414
6415 static const char *
6416 aarch64_mangle_type (const_tree type)
6417 {
6418   /* The AArch64 ABI documents say that "__va_list" has to be
6419      managled as if it is in the "std" namespace.  */
6420   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6421     return "St9__va_list";
6422
6423   /* Check the mode of the vector type, and the name of the vector
6424      element type, against the table.  */
6425   if (TREE_CODE (type) == VECTOR_TYPE)
6426     {
6427       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6428
6429       while (pos->mode != VOIDmode)
6430         {
6431           tree elt_type = TREE_TYPE (type);
6432
6433           if (pos->mode == TYPE_MODE (type)
6434               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6435               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6436                           pos->element_type_name))
6437             return pos->mangled_name;
6438
6439           pos++;
6440         }
6441     }
6442
6443   /* Use the default mangling.  */
6444   return NULL;
6445 }
6446
6447 /* Return the equivalent letter for size.  */
6448 static char
6449 sizetochar (int size)
6450 {
6451   switch (size)
6452     {
6453     case 64: return 'd';
6454     case 32: return 's';
6455     case 16: return 'h';
6456     case 8 : return 'b';
6457     default: gcc_unreachable ();
6458     }
6459 }
6460
6461 /* Return true iff x is a uniform vector of floating-point
6462    constants, and the constant can be represented in
6463    quarter-precision form.  Note, as aarch64_float_const_representable
6464    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6465 static bool
6466 aarch64_vect_float_const_representable_p (rtx x)
6467 {
6468   int i = 0;
6469   REAL_VALUE_TYPE r0, ri;
6470   rtx x0, xi;
6471
6472   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6473     return false;
6474
6475   x0 = CONST_VECTOR_ELT (x, 0);
6476   if (!CONST_DOUBLE_P (x0))
6477     return false;
6478
6479   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6480
6481   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6482     {
6483       xi = CONST_VECTOR_ELT (x, i);
6484       if (!CONST_DOUBLE_P (xi))
6485         return false;
6486
6487       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6488       if (!REAL_VALUES_EQUAL (r0, ri))
6489         return false;
6490     }
6491
6492   return aarch64_float_const_representable_p (x0);
6493 }
6494
6495 /* Return true for valid and false for invalid.  */
6496 bool
6497 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6498                               struct simd_immediate_info *info)
6499 {
6500 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6501   matches = 1;                                          \
6502   for (i = 0; i < idx; i += (STRIDE))                   \
6503     if (!(TEST))                                        \
6504       matches = 0;                                      \
6505   if (matches)                                          \
6506     {                                                   \
6507       immtype = (CLASS);                                \
6508       elsize = (ELSIZE);                                \
6509       eshift = (SHIFT);                                 \
6510       emvn = (NEG);                                     \
6511       break;                                            \
6512     }
6513
6514   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6515   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6516   unsigned char bytes[16];
6517   int immtype = -1, matches;
6518   unsigned int invmask = inverse ? 0xff : 0;
6519   int eshift, emvn;
6520
6521   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6522     {
6523       if (! (aarch64_simd_imm_zero_p (op, mode)
6524              || aarch64_vect_float_const_representable_p (op)))
6525         return false;
6526
6527       if (info)
6528         {
6529           info->value = CONST_VECTOR_ELT (op, 0);
6530           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6531           info->mvn = false;
6532           info->shift = 0;
6533         }
6534
6535       return true;
6536     }
6537
6538   /* Splat vector constant out into a byte vector.  */
6539   for (i = 0; i < n_elts; i++)
6540     {
6541       rtx el = CONST_VECTOR_ELT (op, i);
6542       unsigned HOST_WIDE_INT elpart;
6543       unsigned int part, parts;
6544
6545       if (GET_CODE (el) == CONST_INT)
6546         {
6547           elpart = INTVAL (el);
6548           parts = 1;
6549         }
6550       else if (GET_CODE (el) == CONST_DOUBLE)
6551         {
6552           elpart = CONST_DOUBLE_LOW (el);
6553           parts = 2;
6554         }
6555       else
6556         gcc_unreachable ();
6557
6558       for (part = 0; part < parts; part++)
6559         {
6560           unsigned int byte;
6561           for (byte = 0; byte < innersize; byte++)
6562             {
6563               bytes[idx++] = (elpart & 0xff) ^ invmask;
6564               elpart >>= BITS_PER_UNIT;
6565             }
6566           if (GET_CODE (el) == CONST_DOUBLE)
6567             elpart = CONST_DOUBLE_HIGH (el);
6568         }
6569     }
6570
6571   /* Sanity check.  */
6572   gcc_assert (idx == GET_MODE_SIZE (mode));
6573
6574   do
6575     {
6576       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6577              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6578
6579       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6580              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6581
6582       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6583              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6584
6585       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6586              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6587
6588       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6589
6590       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6591
6592       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6593              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6594
6595       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6596              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6597
6598       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6599              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6600
6601       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6602              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6603
6604       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6605
6606       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6607
6608       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6609              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6610
6611       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6612              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6613
6614       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6615              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6616
6617       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6618              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6619
6620       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6621
6622       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6623              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6624     }
6625   while (0);
6626
6627   if (immtype == -1)
6628     return false;
6629
6630   if (info)
6631     {
6632       info->element_width = elsize;
6633       info->mvn = emvn != 0;
6634       info->shift = eshift;
6635
6636       unsigned HOST_WIDE_INT imm = 0;
6637
6638       if (immtype >= 12 && immtype <= 15)
6639         info->msl = true;
6640
6641       /* Un-invert bytes of recognized vector, if necessary.  */
6642       if (invmask != 0)
6643         for (i = 0; i < idx; i++)
6644           bytes[i] ^= invmask;
6645
6646       if (immtype == 17)
6647         {
6648           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6649           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6650
6651           for (i = 0; i < 8; i++)
6652             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6653               << (i * BITS_PER_UNIT);
6654
6655
6656           info->value = GEN_INT (imm);
6657         }
6658       else
6659         {
6660           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6661             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6662
6663           /* Construct 'abcdefgh' because the assembler cannot handle
6664              generic constants.  */
6665           if (info->mvn)
6666             imm = ~imm;
6667           imm = (imm >> info->shift) & 0xff;
6668           info->value = GEN_INT (imm);
6669         }
6670     }
6671
6672   return true;
6673 #undef CHECK
6674 }
6675
6676 static bool
6677 aarch64_const_vec_all_same_int_p (rtx x,
6678                                   HOST_WIDE_INT minval,
6679                                   HOST_WIDE_INT maxval)
6680 {
6681   HOST_WIDE_INT firstval;
6682   int count, i;
6683
6684   if (GET_CODE (x) != CONST_VECTOR
6685       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6686     return false;
6687
6688   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6689   if (firstval < minval || firstval > maxval)
6690     return false;
6691
6692   count = CONST_VECTOR_NUNITS (x);
6693   for (i = 1; i < count; i++)
6694     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6695       return false;
6696
6697   return true;
6698 }
6699
6700 /* Check of immediate shift constants are within range.  */
6701 bool
6702 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6703 {
6704   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6705   if (left)
6706     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6707   else
6708     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6709 }
6710
6711 /* Return true if X is a uniform vector where all elements
6712    are either the floating-point constant 0.0 or the
6713    integer constant 0.  */
6714 bool
6715 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6716 {
6717   return x == CONST0_RTX (mode);
6718 }
6719
6720 bool
6721 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6722 {
6723   HOST_WIDE_INT imm = INTVAL (x);
6724   int i;
6725
6726   for (i = 0; i < 8; i++)
6727     {
6728       unsigned int byte = imm & 0xff;
6729       if (byte != 0xff && byte != 0)
6730        return false;
6731       imm >>= 8;
6732     }
6733
6734   return true;
6735 }
6736
6737 bool
6738 aarch64_mov_operand_p (rtx x,
6739                        enum aarch64_symbol_context context,
6740                        enum machine_mode mode)
6741 {
6742   if (GET_CODE (x) == HIGH
6743       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6744     return true;
6745
6746   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6747     return true;
6748
6749   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6750     return true;
6751
6752   return aarch64_classify_symbolic_expression (x, context)
6753     == SYMBOL_TINY_ABSOLUTE;
6754 }
6755
6756 /* Return a const_int vector of VAL.  */
6757 rtx
6758 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6759 {
6760   int nunits = GET_MODE_NUNITS (mode);
6761   rtvec v = rtvec_alloc (nunits);
6762   int i;
6763
6764   for (i=0; i < nunits; i++)
6765     RTVEC_ELT (v, i) = GEN_INT (val);
6766
6767   return gen_rtx_CONST_VECTOR (mode, v);
6768 }
6769
6770 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6771
6772 bool
6773 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6774 {
6775   enum machine_mode vmode;
6776
6777   gcc_assert (!VECTOR_MODE_P (mode));
6778   vmode = aarch64_preferred_simd_mode (mode);
6779   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6780   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6781 }
6782
6783 /* Construct and return a PARALLEL RTX vector.  */
6784 rtx
6785 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6786 {
6787   int nunits = GET_MODE_NUNITS (mode);
6788   rtvec v = rtvec_alloc (nunits / 2);
6789   int base = high ? nunits / 2 : 0;
6790   rtx t1;
6791   int i;
6792
6793   for (i=0; i < nunits / 2; i++)
6794     RTVEC_ELT (v, i) = GEN_INT (base + i);
6795
6796   t1 = gen_rtx_PARALLEL (mode, v);
6797   return t1;
6798 }
6799
6800 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6801    HIGH (exclusive).  */
6802 void
6803 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6804 {
6805   HOST_WIDE_INT lane;
6806   gcc_assert (GET_CODE (operand) == CONST_INT);
6807   lane = INTVAL (operand);
6808
6809   if (lane < low || lane >= high)
6810     error ("lane out of range");
6811 }
6812
6813 void
6814 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6815 {
6816   gcc_assert (GET_CODE (operand) == CONST_INT);
6817   HOST_WIDE_INT lane = INTVAL (operand);
6818
6819   if (lane < low || lane >= high)
6820     error ("constant out of range");
6821 }
6822
6823 /* Emit code to reinterpret one AdvSIMD type as another,
6824    without altering bits.  */
6825 void
6826 aarch64_simd_reinterpret (rtx dest, rtx src)
6827 {
6828   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6829 }
6830
6831 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6832    registers).  */
6833 void
6834 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6835                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6836                             rtx op1)
6837 {
6838   rtx mem = gen_rtx_MEM (mode, destaddr);
6839   rtx tmp1 = gen_reg_rtx (mode);
6840   rtx tmp2 = gen_reg_rtx (mode);
6841
6842   emit_insn (intfn (tmp1, op1, tmp2));
6843
6844   emit_move_insn (mem, tmp1);
6845   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6846   emit_move_insn (mem, tmp2);
6847 }
6848
6849 /* Return TRUE if OP is a valid vector addressing mode.  */
6850 bool
6851 aarch64_simd_mem_operand_p (rtx op)
6852 {
6853   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6854                         || GET_CODE (XEXP (op, 0)) == REG);
6855 }
6856
6857 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6858    not to early-clobber SRC registers in the process.
6859
6860    We assume that the operands described by SRC and DEST represent a
6861    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6862    number of components into which the copy has been decomposed.  */
6863 void
6864 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6865                                 rtx *src, unsigned int count)
6866 {
6867   unsigned int i;
6868
6869   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6870       || REGNO (operands[0]) < REGNO (operands[1]))
6871     {
6872       for (i = 0; i < count; i++)
6873         {
6874           operands[2 * i] = dest[i];
6875           operands[2 * i + 1] = src[i];
6876         }
6877     }
6878   else
6879     {
6880       for (i = 0; i < count; i++)
6881         {
6882           operands[2 * i] = dest[count - i - 1];
6883           operands[2 * i + 1] = src[count - i - 1];
6884         }
6885     }
6886 }
6887
6888 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6889    one of VSTRUCT modes: OI, CI or XI.  */
6890 int
6891 aarch64_simd_attr_length_move (rtx insn)
6892 {
6893   enum machine_mode mode;
6894
6895   extract_insn_cached (insn);
6896
6897   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6898     {
6899       mode = GET_MODE (recog_data.operand[0]);
6900       switch (mode)
6901         {
6902         case OImode:
6903           return 8;
6904         case CImode:
6905           return 12;
6906         case XImode:
6907           return 16;
6908         default:
6909           gcc_unreachable ();
6910         }
6911     }
6912   return 4;
6913 }
6914
6915 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6916    alignment of a vector to 128 bits.  */
6917 static HOST_WIDE_INT
6918 aarch64_simd_vector_alignment (const_tree type)
6919 {
6920   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6921   return MIN (align, 128);
6922 }
6923
6924 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6925 static bool
6926 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6927 {
6928   if (is_packed)
6929     return false;
6930
6931   /* We guarantee alignment for vectors up to 128-bits.  */
6932   if (tree_int_cst_compare (TYPE_SIZE (type),
6933                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6934     return false;
6935
6936   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6937   return true;
6938 }
6939
6940 /* If VALS is a vector constant that can be loaded into a register
6941    using DUP, generate instructions to do so and return an RTX to
6942    assign to the register.  Otherwise return NULL_RTX.  */
6943 static rtx
6944 aarch64_simd_dup_constant (rtx vals)
6945 {
6946   enum machine_mode mode = GET_MODE (vals);
6947   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6948   int n_elts = GET_MODE_NUNITS (mode);
6949   bool all_same = true;
6950   rtx x;
6951   int i;
6952
6953   if (GET_CODE (vals) != CONST_VECTOR)
6954     return NULL_RTX;
6955
6956   for (i = 1; i < n_elts; ++i)
6957     {
6958       x = CONST_VECTOR_ELT (vals, i);
6959       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6960         all_same = false;
6961     }
6962
6963   if (!all_same)
6964     return NULL_RTX;
6965
6966   /* We can load this constant by using DUP and a constant in a
6967      single ARM register.  This will be cheaper than a vector
6968      load.  */
6969   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6970   return gen_rtx_VEC_DUPLICATE (mode, x);
6971 }
6972
6973
6974 /* Generate code to load VALS, which is a PARALLEL containing only
6975    constants (for vec_init) or CONST_VECTOR, efficiently into a
6976    register.  Returns an RTX to copy into the register, or NULL_RTX
6977    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6978 static rtx
6979 aarch64_simd_make_constant (rtx vals)
6980 {
6981   enum machine_mode mode = GET_MODE (vals);
6982   rtx const_dup;
6983   rtx const_vec = NULL_RTX;
6984   int n_elts = GET_MODE_NUNITS (mode);
6985   int n_const = 0;
6986   int i;
6987
6988   if (GET_CODE (vals) == CONST_VECTOR)
6989     const_vec = vals;
6990   else if (GET_CODE (vals) == PARALLEL)
6991     {
6992       /* A CONST_VECTOR must contain only CONST_INTs and
6993          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6994          Only store valid constants in a CONST_VECTOR.  */
6995       for (i = 0; i < n_elts; ++i)
6996         {
6997           rtx x = XVECEXP (vals, 0, i);
6998           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6999             n_const++;
7000         }
7001       if (n_const == n_elts)
7002         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7003     }
7004   else
7005     gcc_unreachable ();
7006
7007   if (const_vec != NULL_RTX
7008       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7009     /* Load using MOVI/MVNI.  */
7010     return const_vec;
7011   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7012     /* Loaded using DUP.  */
7013     return const_dup;
7014   else if (const_vec != NULL_RTX)
7015     /* Load from constant pool. We can not take advantage of single-cycle
7016        LD1 because we need a PC-relative addressing mode.  */
7017     return const_vec;
7018   else
7019     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7020        We can not construct an initializer.  */
7021     return NULL_RTX;
7022 }
7023
7024 void
7025 aarch64_expand_vector_init (rtx target, rtx vals)
7026 {
7027   enum machine_mode mode = GET_MODE (target);
7028   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7029   int n_elts = GET_MODE_NUNITS (mode);
7030   int n_var = 0, one_var = -1;
7031   bool all_same = true;
7032   rtx x, mem;
7033   int i;
7034
7035   x = XVECEXP (vals, 0, 0);
7036   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7037     n_var = 1, one_var = 0;
7038
7039   for (i = 1; i < n_elts; ++i)
7040     {
7041       x = XVECEXP (vals, 0, i);
7042       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7043         ++n_var, one_var = i;
7044
7045       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7046         all_same = false;
7047     }
7048
7049   if (n_var == 0)
7050     {
7051       rtx constant = aarch64_simd_make_constant (vals);
7052       if (constant != NULL_RTX)
7053         {
7054           emit_move_insn (target, constant);
7055           return;
7056         }
7057     }
7058
7059   /* Splat a single non-constant element if we can.  */
7060   if (all_same)
7061     {
7062       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7063       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7064       return;
7065     }
7066
7067   /* One field is non-constant.  Load constant then overwrite varying
7068      field.  This is more efficient than using the stack.  */
7069   if (n_var == 1)
7070     {
7071       rtx copy = copy_rtx (vals);
7072       rtx index = GEN_INT (one_var);
7073       enum insn_code icode;
7074
7075       /* Load constant part of vector, substitute neighboring value for
7076          varying element.  */
7077       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7078       aarch64_expand_vector_init (target, copy);
7079
7080       /* Insert variable.  */
7081       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7082       icode = optab_handler (vec_set_optab, mode);
7083       gcc_assert (icode != CODE_FOR_nothing);
7084       emit_insn (GEN_FCN (icode) (target, x, index));
7085       return;
7086     }
7087
7088   /* Construct the vector in memory one field at a time
7089      and load the whole vector.  */
7090   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7091   for (i = 0; i < n_elts; i++)
7092     emit_move_insn (adjust_address_nv (mem, inner_mode,
7093                                     i * GET_MODE_SIZE (inner_mode)),
7094                     XVECEXP (vals, 0, i));
7095   emit_move_insn (target, mem);
7096
7097 }
7098
7099 static unsigned HOST_WIDE_INT
7100 aarch64_shift_truncation_mask (enum machine_mode mode)
7101 {
7102   return
7103     (aarch64_vector_mode_supported_p (mode)
7104      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7105 }
7106
7107 #ifndef TLS_SECTION_ASM_FLAG
7108 #define TLS_SECTION_ASM_FLAG 'T'
7109 #endif
7110
7111 void
7112 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7113                                tree decl ATTRIBUTE_UNUSED)
7114 {
7115   char flagchars[10], *f = flagchars;
7116
7117   /* If we have already declared this section, we can use an
7118      abbreviated form to switch back to it -- unless this section is
7119      part of a COMDAT groups, in which case GAS requires the full
7120      declaration every time.  */
7121   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7122       && (flags & SECTION_DECLARED))
7123     {
7124       fprintf (asm_out_file, "\t.section\t%s\n", name);
7125       return;
7126     }
7127
7128   if (!(flags & SECTION_DEBUG))
7129     *f++ = 'a';
7130   if (flags & SECTION_WRITE)
7131     *f++ = 'w';
7132   if (flags & SECTION_CODE)
7133     *f++ = 'x';
7134   if (flags & SECTION_SMALL)
7135     *f++ = 's';
7136   if (flags & SECTION_MERGE)
7137     *f++ = 'M';
7138   if (flags & SECTION_STRINGS)
7139     *f++ = 'S';
7140   if (flags & SECTION_TLS)
7141     *f++ = TLS_SECTION_ASM_FLAG;
7142   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7143     *f++ = 'G';
7144   *f = '\0';
7145
7146   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7147
7148   if (!(flags & SECTION_NOTYPE))
7149     {
7150       const char *type;
7151       const char *format;
7152
7153       if (flags & SECTION_BSS)
7154         type = "nobits";
7155       else
7156         type = "progbits";
7157
7158 #ifdef TYPE_OPERAND_FMT
7159       format = "," TYPE_OPERAND_FMT;
7160 #else
7161       format = ",@%s";
7162 #endif
7163
7164       fprintf (asm_out_file, format, type);
7165
7166       if (flags & SECTION_ENTSIZE)
7167         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7168       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7169         {
7170           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7171             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7172           else
7173             fprintf (asm_out_file, ",%s,comdat",
7174                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7175         }
7176     }
7177
7178   putc ('\n', asm_out_file);
7179 }
7180
7181 /* Select a format to encode pointers in exception handling data.  */
7182 int
7183 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7184 {
7185    int type;
7186    switch (aarch64_cmodel)
7187      {
7188      case AARCH64_CMODEL_TINY:
7189      case AARCH64_CMODEL_TINY_PIC:
7190      case AARCH64_CMODEL_SMALL:
7191      case AARCH64_CMODEL_SMALL_PIC:
7192        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7193           for everything.  */
7194        type = DW_EH_PE_sdata4;
7195        break;
7196      default:
7197        /* No assumptions here.  8-byte relocs required.  */
7198        type = DW_EH_PE_sdata8;
7199        break;
7200      }
7201    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7202 }
7203
7204 /* Emit load exclusive.  */
7205
7206 static void
7207 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7208                              rtx mem, rtx model_rtx)
7209 {
7210   rtx (*gen) (rtx, rtx, rtx);
7211
7212   switch (mode)
7213     {
7214     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7215     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7216     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7217     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7218     default:
7219       gcc_unreachable ();
7220     }
7221
7222   emit_insn (gen (rval, mem, model_rtx));
7223 }
7224
7225 /* Emit store exclusive.  */
7226
7227 static void
7228 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7229                               rtx rval, rtx mem, rtx model_rtx)
7230 {
7231   rtx (*gen) (rtx, rtx, rtx, rtx);
7232
7233   switch (mode)
7234     {
7235     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7236     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7237     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7238     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7239     default:
7240       gcc_unreachable ();
7241     }
7242
7243   emit_insn (gen (bval, rval, mem, model_rtx));
7244 }
7245
7246 /* Mark the previous jump instruction as unlikely.  */
7247
7248 static void
7249 aarch64_emit_unlikely_jump (rtx insn)
7250 {
7251   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7252
7253   insn = emit_jump_insn (insn);
7254   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7255 }
7256
7257 /* Expand a compare and swap pattern.  */
7258
7259 void
7260 aarch64_expand_compare_and_swap (rtx operands[])
7261 {
7262   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7263   enum machine_mode mode, cmp_mode;
7264   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7265
7266   bval = operands[0];
7267   rval = operands[1];
7268   mem = operands[2];
7269   oldval = operands[3];
7270   newval = operands[4];
7271   is_weak = operands[5];
7272   mod_s = operands[6];
7273   mod_f = operands[7];
7274   mode = GET_MODE (mem);
7275   cmp_mode = mode;
7276
7277   /* Normally the succ memory model must be stronger than fail, but in the
7278      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7279      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7280
7281   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7282       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7283     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7284
7285   switch (mode)
7286     {
7287     case QImode:
7288     case HImode:
7289       /* For short modes, we're going to perform the comparison in SImode,
7290          so do the zero-extension now.  */
7291       cmp_mode = SImode;
7292       rval = gen_reg_rtx (SImode);
7293       oldval = convert_modes (SImode, mode, oldval, true);
7294       /* Fall through.  */
7295
7296     case SImode:
7297     case DImode:
7298       /* Force the value into a register if needed.  */
7299       if (!aarch64_plus_operand (oldval, mode))
7300         oldval = force_reg (cmp_mode, oldval);
7301       break;
7302
7303     default:
7304       gcc_unreachable ();
7305     }
7306
7307   switch (mode)
7308     {
7309     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7310     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7311     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7312     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7313     default:
7314       gcc_unreachable ();
7315     }
7316
7317   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7318
7319   if (mode == QImode || mode == HImode)
7320     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7321
7322   x = gen_rtx_REG (CCmode, CC_REGNUM);
7323   x = gen_rtx_EQ (SImode, x, const0_rtx);
7324   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7325 }
7326
7327 /* Split a compare and swap pattern.  */
7328
7329 void
7330 aarch64_split_compare_and_swap (rtx operands[])
7331 {
7332   rtx rval, mem, oldval, newval, scratch;
7333   enum machine_mode mode;
7334   bool is_weak;
7335   rtx label1, label2, x, cond;
7336
7337   rval = operands[0];
7338   mem = operands[1];
7339   oldval = operands[2];
7340   newval = operands[3];
7341   is_weak = (operands[4] != const0_rtx);
7342   scratch = operands[7];
7343   mode = GET_MODE (mem);
7344
7345   label1 = NULL_RTX;
7346   if (!is_weak)
7347     {
7348       label1 = gen_label_rtx ();
7349       emit_label (label1);
7350     }
7351   label2 = gen_label_rtx ();
7352
7353   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7354
7355   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7356   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7357   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7358                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7359   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7360
7361   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7362
7363   if (!is_weak)
7364     {
7365       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7366       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7367                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7368       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7369     }
7370   else
7371     {
7372       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7373       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7374       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7375     }
7376
7377   emit_label (label2);
7378 }
7379
7380 /* Split an atomic operation.  */
7381
7382 void
7383 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7384                      rtx value, rtx model_rtx, rtx cond)
7385 {
7386   enum machine_mode mode = GET_MODE (mem);
7387   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7388   rtx label, x;
7389
7390   label = gen_label_rtx ();
7391   emit_label (label);
7392
7393   if (new_out)
7394     new_out = gen_lowpart (wmode, new_out);
7395   if (old_out)
7396     old_out = gen_lowpart (wmode, old_out);
7397   else
7398     old_out = new_out;
7399   value = simplify_gen_subreg (wmode, value, mode, 0);
7400
7401   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7402
7403   switch (code)
7404     {
7405     case SET:
7406       new_out = value;
7407       break;
7408
7409     case NOT:
7410       x = gen_rtx_AND (wmode, old_out, value);
7411       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7412       x = gen_rtx_NOT (wmode, new_out);
7413       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7414       break;
7415
7416     case MINUS:
7417       if (CONST_INT_P (value))
7418         {
7419           value = GEN_INT (-INTVAL (value));
7420           code = PLUS;
7421         }
7422       /* Fall through.  */
7423
7424     default:
7425       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7426       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7427       break;
7428     }
7429
7430   aarch64_emit_store_exclusive (mode, cond, mem,
7431                                 gen_lowpart (mode, new_out), model_rtx);
7432
7433   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7434   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7435                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7436   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7437 }
7438
7439 static void
7440 aarch64_print_extension (void)
7441 {
7442   const struct aarch64_option_extension *opt = NULL;
7443
7444   for (opt = all_extensions; opt->name != NULL; opt++)
7445     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7446       asm_fprintf (asm_out_file, "+%s", opt->name);
7447
7448   asm_fprintf (asm_out_file, "\n");
7449 }
7450
7451 static void
7452 aarch64_start_file (void)
7453 {
7454   if (selected_arch)
7455     {
7456       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7457       aarch64_print_extension ();
7458     }
7459   else if (selected_cpu)
7460     {
7461       const char *truncated_name
7462             = aarch64_rewrite_selected_cpu (selected_cpu->name);
7463       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7464       aarch64_print_extension ();
7465     }
7466   default_file_start();
7467 }
7468
7469 /* Target hook for c_mode_for_suffix.  */
7470 static enum machine_mode
7471 aarch64_c_mode_for_suffix (char suffix)
7472 {
7473   if (suffix == 'q')
7474     return TFmode;
7475
7476   return VOIDmode;
7477 }
7478
7479 /* We can only represent floating point constants which will fit in
7480    "quarter-precision" values.  These values are characterised by
7481    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7482    by:
7483
7484    (-1)^s * (n/16) * 2^r
7485
7486    Where:
7487      's' is the sign bit.
7488      'n' is an integer in the range 16 <= n <= 31.
7489      'r' is an integer in the range -3 <= r <= 4.  */
7490
7491 /* Return true iff X can be represented by a quarter-precision
7492    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7493 bool
7494 aarch64_float_const_representable_p (rtx x)
7495 {
7496   /* This represents our current view of how many bits
7497      make up the mantissa.  */
7498   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7499   int exponent;
7500   unsigned HOST_WIDE_INT mantissa, mask;
7501   HOST_WIDE_INT m1, m2;
7502   REAL_VALUE_TYPE r, m;
7503
7504   if (!CONST_DOUBLE_P (x))
7505     return false;
7506
7507   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7508
7509   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7510      know if we have +zero until we analyse the mantissa, but we
7511      can reject the other invalid values.  */
7512   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7513       || REAL_VALUE_MINUS_ZERO (r))
7514     return false;
7515
7516   /* Extract exponent.  */
7517   r = real_value_abs (&r);
7518   exponent = REAL_EXP (&r);
7519
7520   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7521      highest (sign) bit, with a fixed binary point at bit point_pos.
7522      m1 holds the low part of the mantissa, m2 the high part.
7523      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7524      bits for the mantissa, this can fail (low bits will be lost).  */
7525   real_ldexp (&m, &r, point_pos - exponent);
7526   REAL_VALUE_TO_INT (&m1, &m2, m);
7527
7528   /* If the low part of the mantissa has bits set we cannot represent
7529      the value.  */
7530   if (m1 != 0)
7531     return false;
7532   /* We have rejected the lower HOST_WIDE_INT, so update our
7533      understanding of how many bits lie in the mantissa and
7534      look only at the high HOST_WIDE_INT.  */
7535   mantissa = m2;
7536   point_pos -= HOST_BITS_PER_WIDE_INT;
7537
7538   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7539   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7540   if ((mantissa & mask) != 0)
7541     return false;
7542
7543   /* Having filtered unrepresentable values, we may now remove all
7544      but the highest 5 bits.  */
7545   mantissa >>= point_pos - 5;
7546
7547   /* We cannot represent the value 0.0, so reject it.  This is handled
7548      elsewhere.  */
7549   if (mantissa == 0)
7550     return false;
7551
7552   /* Then, as bit 4 is always set, we can mask it off, leaving
7553      the mantissa in the range [0, 15].  */
7554   mantissa &= ~(1 << 4);
7555   gcc_assert (mantissa <= 15);
7556
7557   /* GCC internally does not use IEEE754-like encoding (where normalized
7558      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7559      Our mantissa values are shifted 4 places to the left relative to
7560      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7561      by 5 places to correct for GCC's representation.  */
7562   exponent = 5 - exponent;
7563
7564   return (exponent >= 0 && exponent <= 7);
7565 }
7566
7567 char*
7568 aarch64_output_simd_mov_immediate (rtx const_vector,
7569                                    enum machine_mode mode,
7570                                    unsigned width)
7571 {
7572   bool is_valid;
7573   static char templ[40];
7574   const char *mnemonic;
7575   const char *shift_op;
7576   unsigned int lane_count = 0;
7577   char element_char;
7578
7579   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7580
7581   /* This will return true to show const_vector is legal for use as either
7582      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7583      also update INFO to show how the immediate should be generated.  */
7584   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7585   gcc_assert (is_valid);
7586
7587   element_char = sizetochar (info.element_width);
7588   lane_count = width / info.element_width;
7589
7590   mode = GET_MODE_INNER (mode);
7591   if (mode == SFmode || mode == DFmode)
7592     {
7593       gcc_assert (info.shift == 0 && ! info.mvn);
7594       if (aarch64_float_const_zero_rtx_p (info.value))
7595         info.value = GEN_INT (0);
7596       else
7597         {
7598 #define buf_size 20
7599           REAL_VALUE_TYPE r;
7600           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7601           char float_buf[buf_size] = {'\0'};
7602           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7603 #undef buf_size
7604
7605           if (lane_count == 1)
7606             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7607           else
7608             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7609                       lane_count, element_char, float_buf);
7610           return templ;
7611         }
7612     }
7613
7614   mnemonic = info.mvn ? "mvni" : "movi";
7615   shift_op = info.msl ? "msl" : "lsl";
7616
7617   if (lane_count == 1)
7618     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7619               mnemonic, UINTVAL (info.value));
7620   else if (info.shift)
7621     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7622               ", %s %d", mnemonic, lane_count, element_char,
7623               UINTVAL (info.value), shift_op, info.shift);
7624   else
7625     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7626               mnemonic, lane_count, element_char, UINTVAL (info.value));
7627   return templ;
7628 }
7629
7630 char*
7631 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7632                                           enum machine_mode mode)
7633 {
7634   enum machine_mode vmode;
7635
7636   gcc_assert (!VECTOR_MODE_P (mode));
7637   vmode = aarch64_simd_container_mode (mode, 64);
7638   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7639   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7640 }
7641
7642 /* Split operands into moves from op[1] + op[2] into op[0].  */
7643
7644 void
7645 aarch64_split_combinev16qi (rtx operands[3])
7646 {
7647   unsigned int dest = REGNO (operands[0]);
7648   unsigned int src1 = REGNO (operands[1]);
7649   unsigned int src2 = REGNO (operands[2]);
7650   enum machine_mode halfmode = GET_MODE (operands[1]);
7651   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7652   rtx destlo, desthi;
7653
7654   gcc_assert (halfmode == V16QImode);
7655
7656   if (src1 == dest && src2 == dest + halfregs)
7657     {
7658       /* No-op move.  Can't split to nothing; emit something.  */
7659       emit_note (NOTE_INSN_DELETED);
7660       return;
7661     }
7662
7663   /* Preserve register attributes for variable tracking.  */
7664   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7665   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7666                                GET_MODE_SIZE (halfmode));
7667
7668   /* Special case of reversed high/low parts.  */
7669   if (reg_overlap_mentioned_p (operands[2], destlo)
7670       && reg_overlap_mentioned_p (operands[1], desthi))
7671     {
7672       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7673       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7674       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7675     }
7676   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7677     {
7678       /* Try to avoid unnecessary moves if part of the result
7679          is in the right place already.  */
7680       if (src1 != dest)
7681         emit_move_insn (destlo, operands[1]);
7682       if (src2 != dest + halfregs)
7683         emit_move_insn (desthi, operands[2]);
7684     }
7685   else
7686     {
7687       if (src2 != dest + halfregs)
7688         emit_move_insn (desthi, operands[2]);
7689       if (src1 != dest)
7690         emit_move_insn (destlo, operands[1]);
7691     }
7692 }
7693
7694 /* vec_perm support.  */
7695
7696 #define MAX_VECT_LEN 16
7697
7698 struct expand_vec_perm_d
7699 {
7700   rtx target, op0, op1;
7701   unsigned char perm[MAX_VECT_LEN];
7702   enum machine_mode vmode;
7703   unsigned char nelt;
7704   bool one_vector_p;
7705   bool testing_p;
7706 };
7707
7708 /* Generate a variable permutation.  */
7709
7710 static void
7711 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7712 {
7713   enum machine_mode vmode = GET_MODE (target);
7714   bool one_vector_p = rtx_equal_p (op0, op1);
7715
7716   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7717   gcc_checking_assert (GET_MODE (op0) == vmode);
7718   gcc_checking_assert (GET_MODE (op1) == vmode);
7719   gcc_checking_assert (GET_MODE (sel) == vmode);
7720   gcc_checking_assert (TARGET_SIMD);
7721
7722   if (one_vector_p)
7723     {
7724       if (vmode == V8QImode)
7725         {
7726           /* Expand the argument to a V16QI mode by duplicating it.  */
7727           rtx pair = gen_reg_rtx (V16QImode);
7728           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7729           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7730         }
7731       else
7732         {
7733           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7734         }
7735     }
7736   else
7737     {
7738       rtx pair;
7739
7740       if (vmode == V8QImode)
7741         {
7742           pair = gen_reg_rtx (V16QImode);
7743           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7744           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7745         }
7746       else
7747         {
7748           pair = gen_reg_rtx (OImode);
7749           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7750           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7751         }
7752     }
7753 }
7754
7755 void
7756 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7757 {
7758   enum machine_mode vmode = GET_MODE (target);
7759   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7760   bool one_vector_p = rtx_equal_p (op0, op1);
7761   rtx rmask[MAX_VECT_LEN], mask;
7762
7763   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7764
7765   /* The TBL instruction does not use a modulo index, so we must take care
7766      of that ourselves.  */
7767   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7768   for (i = 0; i < nelt; ++i)
7769     rmask[i] = mask;
7770   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7771   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7772
7773   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7774 }
7775
7776 /* Recognize patterns suitable for the TRN instructions.  */
7777 static bool
7778 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7779 {
7780   unsigned int i, odd, mask, nelt = d->nelt;
7781   rtx out, in0, in1, x;
7782   rtx (*gen) (rtx, rtx, rtx);
7783   enum machine_mode vmode = d->vmode;
7784
7785   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7786     return false;
7787
7788   /* Note that these are little-endian tests.
7789      We correct for big-endian later.  */
7790   if (d->perm[0] == 0)
7791     odd = 0;
7792   else if (d->perm[0] == 1)
7793     odd = 1;
7794   else
7795     return false;
7796   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7797
7798   for (i = 0; i < nelt; i += 2)
7799     {
7800       if (d->perm[i] != i + odd)
7801         return false;
7802       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7803         return false;
7804     }
7805
7806   /* Success!  */
7807   if (d->testing_p)
7808     return true;
7809
7810   in0 = d->op0;
7811   in1 = d->op1;
7812   if (BYTES_BIG_ENDIAN)
7813     {
7814       x = in0, in0 = in1, in1 = x;
7815       odd = !odd;
7816     }
7817   out = d->target;
7818
7819   if (odd)
7820     {
7821       switch (vmode)
7822         {
7823         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7824         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7825         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7826         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7827         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7828         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7829         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7830         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7831         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7832         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7833         default:
7834           return false;
7835         }
7836     }
7837   else
7838     {
7839       switch (vmode)
7840         {
7841         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7842         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7843         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7844         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7845         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7846         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7847         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7848         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7849         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7850         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7851         default:
7852           return false;
7853         }
7854     }
7855
7856   emit_insn (gen (out, in0, in1));
7857   return true;
7858 }
7859
7860 /* Recognize patterns suitable for the UZP instructions.  */
7861 static bool
7862 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7863 {
7864   unsigned int i, odd, mask, nelt = d->nelt;
7865   rtx out, in0, in1, x;
7866   rtx (*gen) (rtx, rtx, rtx);
7867   enum machine_mode vmode = d->vmode;
7868
7869   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7870     return false;
7871
7872   /* Note that these are little-endian tests.
7873      We correct for big-endian later.  */
7874   if (d->perm[0] == 0)
7875     odd = 0;
7876   else if (d->perm[0] == 1)
7877     odd = 1;
7878   else
7879     return false;
7880   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7881
7882   for (i = 0; i < nelt; i++)
7883     {
7884       unsigned elt = (i * 2 + odd) & mask;
7885       if (d->perm[i] != elt)
7886         return false;
7887     }
7888
7889   /* Success!  */
7890   if (d->testing_p)
7891     return true;
7892
7893   in0 = d->op0;
7894   in1 = d->op1;
7895   if (BYTES_BIG_ENDIAN)
7896     {
7897       x = in0, in0 = in1, in1 = x;
7898       odd = !odd;
7899     }
7900   out = d->target;
7901
7902   if (odd)
7903     {
7904       switch (vmode)
7905         {
7906         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7907         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7908         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7909         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7910         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7911         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7912         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7913         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7914         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7915         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7916         default:
7917           return false;
7918         }
7919     }
7920   else
7921     {
7922       switch (vmode)
7923         {
7924         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7925         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7926         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7927         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7928         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7929         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7930         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7931         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7932         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7933         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7934         default:
7935           return false;
7936         }
7937     }
7938
7939   emit_insn (gen (out, in0, in1));
7940   return true;
7941 }
7942
7943 /* Recognize patterns suitable for the ZIP instructions.  */
7944 static bool
7945 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7946 {
7947   unsigned int i, high, mask, nelt = d->nelt;
7948   rtx out, in0, in1, x;
7949   rtx (*gen) (rtx, rtx, rtx);
7950   enum machine_mode vmode = d->vmode;
7951
7952   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7953     return false;
7954
7955   /* Note that these are little-endian tests.
7956      We correct for big-endian later.  */
7957   high = nelt / 2;
7958   if (d->perm[0] == high)
7959     /* Do Nothing.  */
7960     ;
7961   else if (d->perm[0] == 0)
7962     high = 0;
7963   else
7964     return false;
7965   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7966
7967   for (i = 0; i < nelt / 2; i++)
7968     {
7969       unsigned elt = (i + high) & mask;
7970       if (d->perm[i * 2] != elt)
7971         return false;
7972       elt = (elt + nelt) & mask;
7973       if (d->perm[i * 2 + 1] != elt)
7974         return false;
7975     }
7976
7977   /* Success!  */
7978   if (d->testing_p)
7979     return true;
7980
7981   in0 = d->op0;
7982   in1 = d->op1;
7983   if (BYTES_BIG_ENDIAN)
7984     {
7985       x = in0, in0 = in1, in1 = x;
7986       high = !high;
7987     }
7988   out = d->target;
7989
7990   if (high)
7991     {
7992       switch (vmode)
7993         {
7994         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7995         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7996         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7997         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7998         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7999         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8000         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8001         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8002         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8003         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8004         default:
8005           return false;
8006         }
8007     }
8008   else
8009     {
8010       switch (vmode)
8011         {
8012         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8013         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8014         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8015         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8016         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8017         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8018         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8019         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8020         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8021         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8022         default:
8023           return false;
8024         }
8025     }
8026
8027   emit_insn (gen (out, in0, in1));
8028   return true;
8029 }
8030
8031 static bool
8032 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8033 {
8034   rtx (*gen) (rtx, rtx, rtx);
8035   rtx out = d->target;
8036   rtx in0;
8037   enum machine_mode vmode = d->vmode;
8038   unsigned int i, elt, nelt = d->nelt;
8039   rtx lane;
8040
8041   /* TODO: This may not be big-endian safe.  */
8042   if (BYTES_BIG_ENDIAN)
8043     return false;
8044
8045   elt = d->perm[0];
8046   for (i = 1; i < nelt; i++)
8047     {
8048       if (elt != d->perm[i])
8049         return false;
8050     }
8051
8052   /* The generic preparation in aarch64_expand_vec_perm_const_1
8053      swaps the operand order and the permute indices if it finds
8054      d->perm[0] to be in the second operand.  Thus, we can always
8055      use d->op0 and need not do any extra arithmetic to get the
8056      correct lane number.  */
8057   in0 = d->op0;
8058   lane = GEN_INT (elt);
8059
8060   switch (vmode)
8061     {
8062     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8063     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8064     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8065     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8066     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8067     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8068     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8069     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8070     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8071     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8072     default:
8073       return false;
8074     }
8075
8076   emit_insn (gen (out, in0, lane));
8077   return true;
8078 }
8079
8080 static bool
8081 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8082 {
8083   rtx rperm[MAX_VECT_LEN], sel;
8084   enum machine_mode vmode = d->vmode;
8085   unsigned int i, nelt = d->nelt;
8086
8087   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8088      numbering of elements for big-endian, we must reverse the order.  */
8089   if (BYTES_BIG_ENDIAN)
8090     return false;
8091
8092   if (d->testing_p)
8093     return true;
8094
8095   /* Generic code will try constant permutation twice.  Once with the
8096      original mode and again with the elements lowered to QImode.
8097      So wait and don't do the selector expansion ourselves.  */
8098   if (vmode != V8QImode && vmode != V16QImode)
8099     return false;
8100
8101   for (i = 0; i < nelt; ++i)
8102     rperm[i] = GEN_INT (d->perm[i]);
8103   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8104   sel = force_reg (vmode, sel);
8105
8106   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8107   return true;
8108 }
8109
8110 static bool
8111 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8112 {
8113   /* The pattern matching functions above are written to look for a small
8114      number to begin the sequence (0, 1, N/2).  If we begin with an index
8115      from the second operand, we can swap the operands.  */
8116   if (d->perm[0] >= d->nelt)
8117     {
8118       unsigned i, nelt = d->nelt;
8119       rtx x;
8120
8121       for (i = 0; i < nelt; ++i)
8122         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8123
8124       x = d->op0;
8125       d->op0 = d->op1;
8126       d->op1 = x;
8127     }
8128
8129   if (TARGET_SIMD)
8130     {
8131       if (aarch64_evpc_zip (d))
8132         return true;
8133       else if (aarch64_evpc_uzp (d))
8134         return true;
8135       else if (aarch64_evpc_trn (d))
8136         return true;
8137       else if (aarch64_evpc_dup (d))
8138         return true;
8139       return aarch64_evpc_tbl (d);
8140     }
8141   return false;
8142 }
8143
8144 /* Expand a vec_perm_const pattern.  */
8145
8146 bool
8147 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8148 {
8149   struct expand_vec_perm_d d;
8150   int i, nelt, which;
8151
8152   d.target = target;
8153   d.op0 = op0;
8154   d.op1 = op1;
8155
8156   d.vmode = GET_MODE (target);
8157   gcc_assert (VECTOR_MODE_P (d.vmode));
8158   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8159   d.testing_p = false;
8160
8161   for (i = which = 0; i < nelt; ++i)
8162     {
8163       rtx e = XVECEXP (sel, 0, i);
8164       int ei = INTVAL (e) & (2 * nelt - 1);
8165       which |= (ei < nelt ? 1 : 2);
8166       d.perm[i] = ei;
8167     }
8168
8169   switch (which)
8170     {
8171     default:
8172       gcc_unreachable ();
8173
8174     case 3:
8175       d.one_vector_p = false;
8176       if (!rtx_equal_p (op0, op1))
8177         break;
8178
8179       /* The elements of PERM do not suggest that only the first operand
8180          is used, but both operands are identical.  Allow easier matching
8181          of the permutation by folding the permutation into the single
8182          input vector.  */
8183       /* Fall Through.  */
8184     case 2:
8185       for (i = 0; i < nelt; ++i)
8186         d.perm[i] &= nelt - 1;
8187       d.op0 = op1;
8188       d.one_vector_p = true;
8189       break;
8190
8191     case 1:
8192       d.op1 = op0;
8193       d.one_vector_p = true;
8194       break;
8195     }
8196
8197   return aarch64_expand_vec_perm_const_1 (&d);
8198 }
8199
8200 static bool
8201 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8202                                      const unsigned char *sel)
8203 {
8204   struct expand_vec_perm_d d;
8205   unsigned int i, nelt, which;
8206   bool ret;
8207
8208   d.vmode = vmode;
8209   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8210   d.testing_p = true;
8211   memcpy (d.perm, sel, nelt);
8212
8213   /* Calculate whether all elements are in one vector.  */
8214   for (i = which = 0; i < nelt; ++i)
8215     {
8216       unsigned char e = d.perm[i];
8217       gcc_assert (e < 2 * nelt);
8218       which |= (e < nelt ? 1 : 2);
8219     }
8220
8221   /* If all elements are from the second vector, reindex as if from the
8222      first vector.  */
8223   if (which == 2)
8224     for (i = 0; i < nelt; ++i)
8225       d.perm[i] -= nelt;
8226
8227   /* Check whether the mask can be applied to a single vector.  */
8228   d.one_vector_p = (which != 3);
8229
8230   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8231   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8232   if (!d.one_vector_p)
8233     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8234
8235   start_sequence ();
8236   ret = aarch64_expand_vec_perm_const_1 (&d);
8237   end_sequence ();
8238
8239   return ret;
8240 }
8241
8242 #undef TARGET_ADDRESS_COST
8243 #define TARGET_ADDRESS_COST aarch64_address_cost
8244
8245 /* This hook will determines whether unnamed bitfields affect the alignment
8246    of the containing structure.  The hook returns true if the structure
8247    should inherit the alignment requirements of an unnamed bitfield's
8248    type.  */
8249 #undef TARGET_ALIGN_ANON_BITFIELD
8250 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8251
8252 #undef TARGET_ASM_ALIGNED_DI_OP
8253 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8254
8255 #undef TARGET_ASM_ALIGNED_HI_OP
8256 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8257
8258 #undef TARGET_ASM_ALIGNED_SI_OP
8259 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8260
8261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8263   hook_bool_const_tree_hwi_hwi_const_tree_true
8264
8265 #undef TARGET_ASM_FILE_START
8266 #define TARGET_ASM_FILE_START aarch64_start_file
8267
8268 #undef TARGET_ASM_OUTPUT_MI_THUNK
8269 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8270
8271 #undef TARGET_ASM_SELECT_RTX_SECTION
8272 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8273
8274 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8275 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8276
8277 #undef TARGET_BUILD_BUILTIN_VA_LIST
8278 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8279
8280 #undef TARGET_CALLEE_COPIES
8281 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8282
8283 #undef TARGET_CAN_ELIMINATE
8284 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8285
8286 #undef TARGET_CANNOT_FORCE_CONST_MEM
8287 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8288
8289 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8290 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8291
8292 /* Only the least significant bit is used for initialization guard
8293    variables.  */
8294 #undef TARGET_CXX_GUARD_MASK_BIT
8295 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8296
8297 #undef TARGET_C_MODE_FOR_SUFFIX
8298 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8299
8300 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8301 #undef  TARGET_DEFAULT_TARGET_FLAGS
8302 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8303 #endif
8304
8305 #undef TARGET_CLASS_MAX_NREGS
8306 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8307
8308 #undef TARGET_BUILTIN_DECL
8309 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8310
8311 #undef  TARGET_EXPAND_BUILTIN
8312 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8313
8314 #undef TARGET_EXPAND_BUILTIN_VA_START
8315 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8316
8317 #undef TARGET_FOLD_BUILTIN
8318 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8319
8320 #undef TARGET_FUNCTION_ARG
8321 #define TARGET_FUNCTION_ARG aarch64_function_arg
8322
8323 #undef TARGET_FUNCTION_ARG_ADVANCE
8324 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8325
8326 #undef TARGET_FUNCTION_ARG_BOUNDARY
8327 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8328
8329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8330 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8331
8332 #undef TARGET_FUNCTION_VALUE
8333 #define TARGET_FUNCTION_VALUE aarch64_function_value
8334
8335 #undef TARGET_FUNCTION_VALUE_REGNO_P
8336 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8337
8338 #undef TARGET_FRAME_POINTER_REQUIRED
8339 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8340
8341 #undef TARGET_GIMPLE_FOLD_BUILTIN
8342 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8343
8344 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8345 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8346
8347 #undef  TARGET_INIT_BUILTINS
8348 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8349
8350 #undef TARGET_LEGITIMATE_ADDRESS_P
8351 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8352
8353 #undef TARGET_LEGITIMATE_CONSTANT_P
8354 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8355
8356 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8357 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8358
8359 #undef TARGET_LRA_P
8360 #define TARGET_LRA_P aarch64_lra_p
8361
8362 #undef TARGET_MANGLE_TYPE
8363 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8364
8365 #undef TARGET_MEMORY_MOVE_COST
8366 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8367
8368 #undef TARGET_MUST_PASS_IN_STACK
8369 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8370
8371 /* This target hook should return true if accesses to volatile bitfields
8372    should use the narrowest mode possible.  It should return false if these
8373    accesses should use the bitfield container type.  */
8374 #undef TARGET_NARROW_VOLATILE_BITFIELD
8375 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8376
8377 #undef  TARGET_OPTION_OVERRIDE
8378 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8379
8380 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8381 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8382   aarch64_override_options_after_change
8383
8384 #undef TARGET_PASS_BY_REFERENCE
8385 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8386
8387 #undef TARGET_PREFERRED_RELOAD_CLASS
8388 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8389
8390 #undef TARGET_SECONDARY_RELOAD
8391 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8392
8393 #undef TARGET_SHIFT_TRUNCATION_MASK
8394 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8395
8396 #undef TARGET_SETUP_INCOMING_VARARGS
8397 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8398
8399 #undef TARGET_STRUCT_VALUE_RTX
8400 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8401
8402 #undef TARGET_REGISTER_MOVE_COST
8403 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8404
8405 #undef TARGET_RETURN_IN_MEMORY
8406 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8407
8408 #undef TARGET_RETURN_IN_MSB
8409 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8410
8411 #undef TARGET_RTX_COSTS
8412 #define TARGET_RTX_COSTS aarch64_rtx_costs
8413
8414 #undef TARGET_TRAMPOLINE_INIT
8415 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8416
8417 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8418 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8419
8420 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8421 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8422
8423 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8424 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8425
8426 #undef TARGET_VECTORIZE_ADD_STMT_COST
8427 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8428
8429 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8430 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8431   aarch64_builtin_vectorization_cost
8432
8433 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8434 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8435
8436 #undef TARGET_VECTORIZE_BUILTINS
8437 #define TARGET_VECTORIZE_BUILTINS
8438
8439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8441   aarch64_builtin_vectorized_function
8442
8443 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8444 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8445   aarch64_autovectorize_vector_sizes
8446
8447 /* Section anchor support.  */
8448
8449 #undef TARGET_MIN_ANCHOR_OFFSET
8450 #define TARGET_MIN_ANCHOR_OFFSET -256
8451
8452 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8453    byte offset; we can do much more for larger data types, but have no way
8454    to determine the size of the access.  We assume accesses are aligned.  */
8455 #undef TARGET_MAX_ANCHOR_OFFSET
8456 #define TARGET_MAX_ANCHOR_OFFSET 4095
8457
8458 #undef TARGET_VECTOR_ALIGNMENT
8459 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8460
8461 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8462 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8463   aarch64_simd_vector_alignment_reachable
8464
8465 /* vec_perm support.  */
8466
8467 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8468 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8469   aarch64_vectorize_vec_perm_const_ok
8470
8471
8472 #undef TARGET_FIXED_CONDITION_CODE_REGS
8473 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8474
8475 struct gcc_target targetm = TARGET_INITIALIZER;
8476
8477 #include "gt-aarch64.h"