gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66
  67 /* Defined for convenience.  */
  68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  69
  70 /* Classifies an address.
  71
  72    ADDRESS_REG_IMM
  73        A simple base register plus immediate offset.
  74
  75    ADDRESS_REG_WB
  76        A base register indexed by immediate offset with writeback.
  77
  78    ADDRESS_REG_REG
  79        A base register indexed by (optionally scaled) register.
  80
  81    ADDRESS_REG_UXTW
  82        A base register indexed by (optionally scaled) zero-extended register.
  83
  84    ADDRESS_REG_SXTW
  85        A base register indexed by (optionally scaled) sign-extended register.
  86
  87    ADDRESS_LO_SUM
  88        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  89
  90    ADDRESS_SYMBOLIC:
  91        A constant symbolic address, in pc-relative literal pool.  */
  92
  93 enum aarch64_address_type {
  94   ADDRESS_REG_IMM,
  95   ADDRESS_REG_WB,
  96   ADDRESS_REG_REG,
  97   ADDRESS_REG_UXTW,
  98   ADDRESS_REG_SXTW,
  99   ADDRESS_LO_SUM,
 100   ADDRESS_SYMBOLIC
 101 };
 102
 103 struct aarch64_address_info {
 104   enum aarch64_address_type type;
 105   rtx base;
 106   rtx offset;
 107   int shift;
 108   enum aarch64_symbol_type symbol_type;
 109 };
 110
 111 struct simd_immediate_info
 112 {
 113   rtx value;
 114   int shift;
 115   int element_width;
 116   bool mvn;
 117   bool msl;
 118 };
 119
 120 /* The current code model.  */
 121 enum aarch64_code_model aarch64_cmodel;
 122
 123 #ifdef HAVE_AS_TLS
 124 #undef TARGET_HAVE_TLS
 125 #define TARGET_HAVE_TLS 1
 126 #endif
 127
 128 static bool aarch64_lra_p (void);
 129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 131                                                      const_tree,
 132                                                      enum machine_mode *, int *,
 133                                                      bool *);
 134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_override_options_after_change (void);
 137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 138 static unsigned bit_count (unsigned HOST_WIDE_INT);
 139 static bool aarch64_const_vec_all_same_int_p (rtx,
 140                                               HOST_WIDE_INT, HOST_WIDE_INT);
 141
 142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 143                                                  const unsigned char *sel);
 144
 145 /* The processor for which instructions should be scheduled.  */
 146 enum aarch64_processor aarch64_tune = cortexa53;
 147
 148 /* The current tuning set.  */
 149 const struct tune_params *aarch64_tune_params;
 150
 151 /* Mask to specify which instructions we are allowed to generate.  */
 152 unsigned long aarch64_isa_flags = 0;
 153
 154 /* Mask to specify which instruction scheduling options should be used.  */
 155 unsigned long aarch64_tune_flags = 0;
 156
 157 /* Tuning parameters.  */
 158
 159 #if HAVE_DESIGNATED_INITIALIZERS
 160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 161 #else
 162 #define NAMED_PARAM(NAME, VAL) (VAL)
 163 #endif
 164
 165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 166 __extension__
 167 #endif
 168
 169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 170 __extension__
 171 #endif
 172 static const struct cpu_addrcost_table generic_addrcost_table =
 173 {
 174   NAMED_PARAM (pre_modify, 0),
 175   NAMED_PARAM (post_modify, 0),
 176   NAMED_PARAM (register_offset, 0),
 177   NAMED_PARAM (register_extend, 0),
 178   NAMED_PARAM (imm_offset, 0)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct cpu_regmove_cost generic_regmove_cost =
 185 {
 186   NAMED_PARAM (GP2GP, 1),
 187   NAMED_PARAM (GP2FP, 2),
 188   NAMED_PARAM (FP2GP, 2),
 189   /* We currently do not provide direct support for TFmode Q->Q move.
 190      Therefore we need to raise the cost above 2 in order to have
 191      reload handle the situation.  */
 192   NAMED_PARAM (FP2FP, 4)
 193 };
 194
 195 /* Generic costs for vector insn classes.  */
 196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 197 __extension__
 198 #endif
 199 static const struct cpu_vector_cost generic_vector_cost =
 200 {
 201   NAMED_PARAM (scalar_stmt_cost, 1),
 202   NAMED_PARAM (scalar_load_cost, 1),
 203   NAMED_PARAM (scalar_store_cost, 1),
 204   NAMED_PARAM (vec_stmt_cost, 1),
 205   NAMED_PARAM (vec_to_scalar_cost, 1),
 206   NAMED_PARAM (scalar_to_vec_cost, 1),
 207   NAMED_PARAM (vec_align_load_cost, 1),
 208   NAMED_PARAM (vec_unalign_load_cost, 1),
 209   NAMED_PARAM (vec_unalign_store_cost, 1),
 210   NAMED_PARAM (vec_store_cost, 1),
 211   NAMED_PARAM (cond_taken_branch_cost, 3),
 212   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 213 };
 214
 215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 216 __extension__
 217 #endif
 218 static const struct tune_params generic_tunings =
 219 {
 220   &generic_extra_costs,
 221   &generic_addrcost_table,
 222   &generic_regmove_cost,
 223   &generic_vector_cost,
 224   NAMED_PARAM (memmov_cost, 4)
 225 };
 226
 227 static const struct tune_params cortexa53_tunings =
 228 {
 229   &cortexa53_extra_costs,
 230   &generic_addrcost_table,
 231   &generic_regmove_cost,
 232   &generic_vector_cost,
 233   NAMED_PARAM (memmov_cost, 4)
 234 };
 235
 236 /* A processor implementing AArch64.  */
 237 struct processor
 238 {
 239   const char *const name;
 240   enum aarch64_processor core;
 241   const char *arch;
 242   const unsigned long flags;
 243   const struct tune_params *const tune;
 244 };
 245
 246 /* Processor cores implementing AArch64.  */
 247 static const struct processor all_cores[] =
 248 {
 249 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 250   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 251 #include "aarch64-cores.def"
 252 #undef AARCH64_CORE
 253   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 254   {NULL, aarch64_none, NULL, 0, NULL}
 255 };
 256
 257 /* Architectures implementing AArch64.  */
 258 static const struct processor all_architectures[] =
 259 {
 260 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 261   {NAME, CORE, #ARCH, FLAGS, NULL},
 262 #include "aarch64-arches.def"
 263 #undef AARCH64_ARCH
 264   {NULL, aarch64_none, NULL, 0, NULL}
 265 };
 266
 267 /* Target specification.  These are populated as commandline arguments
 268    are processed, or NULL if not specified.  */
 269 static const struct processor *selected_arch;
 270 static const struct processor *selected_cpu;
 271 static const struct processor *selected_tune;
 272
 273 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 274
 275 /* An ISA extension in the co-processor and main instruction set space.  */
 276 struct aarch64_option_extension
 277 {
 278   const char *const name;
 279   const unsigned long flags_on;
 280   const unsigned long flags_off;
 281 };
 282
 283 /* ISA extensions in AArch64.  */
 284 static const struct aarch64_option_extension all_extensions[] =
 285 {
 286 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 287   {NAME, FLAGS_ON, FLAGS_OFF},
 288 #include "aarch64-option-extensions.def"
 289 #undef AARCH64_OPT_EXTENSION
 290   {NULL, 0, 0}
 291 };
 292
 293 /* Used to track the size of an address when generating a pre/post
 294    increment address.  */
 295 static enum machine_mode aarch64_memory_reference_mode;
 296
 297 /* Used to force GTY into this file.  */
 298 static GTY(()) int gty_dummy;
 299
 300 /* A table of valid AArch64 "bitmask immediate" values for
 301    logical instructions.  */
 302
 303 #define AARCH64_NUM_BITMASKS  5334
 304 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 305
 306 /* Did we set flag_omit_frame_pointer just so
 307    aarch64_frame_pointer_required would be called? */
 308 static bool faked_omit_frame_pointer;
 309
 310 typedef enum aarch64_cond_code
 311 {
 312   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 313   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 314   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 315 }
 316 aarch64_cc;
 317
 318 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 319
 320 /* The condition codes of the processor, and the inverse function.  */
 321 static const char * const aarch64_condition_codes[] =
 322 {
 323   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 324   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 325 };
 326
 327 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 328 unsigned
 329 aarch64_dbx_register_number (unsigned regno)
 330 {
 331    if (GP_REGNUM_P (regno))
 332      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 333    else if (regno == SP_REGNUM)
 334      return AARCH64_DWARF_SP;
 335    else if (FP_REGNUM_P (regno))
 336      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 337
 338    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 339       equivalent DWARF register.  */
 340    return DWARF_FRAME_REGISTERS;
 341 }
 342
 343 /* Return TRUE if MODE is any of the large INT modes.  */
 344 static bool
 345 aarch64_vect_struct_mode_p (enum machine_mode mode)
 346 {
 347   return mode == OImode || mode == CImode || mode == XImode;
 348 }
 349
 350 /* Return TRUE if MODE is any of the vector modes.  */
 351 static bool
 352 aarch64_vector_mode_p (enum machine_mode mode)
 353 {
 354   return aarch64_vector_mode_supported_p (mode)
 355          || aarch64_vect_struct_mode_p (mode);
 356 }
 357
 358 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 359 static bool
 360 aarch64_array_mode_supported_p (enum machine_mode mode,
 361                                 unsigned HOST_WIDE_INT nelems)
 362 {
 363   if (TARGET_SIMD
 364       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 365       && (nelems >= 2 && nelems <= 4))
 366     return true;
 367
 368   return false;
 369 }
 370
 371 /* Implement HARD_REGNO_NREGS.  */
 372
 373 int
 374 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 375 {
 376   switch (aarch64_regno_regclass (regno))
 377     {
 378     case FP_REGS:
 379     case FP_LO_REGS:
 380       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 381     default:
 382       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 383     }
 384   gcc_unreachable ();
 385 }
 386
 387 /* Implement HARD_REGNO_MODE_OK.  */
 388
 389 int
 390 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 391 {
 392   if (GET_MODE_CLASS (mode) == MODE_CC)
 393     return regno == CC_REGNUM;
 394
 395   if (regno == SP_REGNUM)
 396     /* The purpose of comparing with ptr_mode is to support the
 397        global register variable associated with the stack pointer
 398        register via the syntax of asm ("wsp") in ILP32.  */
 399     return mode == Pmode || mode == ptr_mode;
 400
 401   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 402     return mode == Pmode;
 403
 404   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 405     return 1;
 406
 407   if (FP_REGNUM_P (regno))
 408     {
 409       if (aarch64_vect_struct_mode_p (mode))
 410         return
 411           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 412       else
 413         return 1;
 414     }
 415
 416   return 0;
 417 }
 418
 419 /* Return true if calls to DECL should be treated as
 420    long-calls (ie called via a register).  */
 421 static bool
 422 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 423 {
 424   return false;
 425 }
 426
 427 /* Return true if calls to symbol-ref SYM should be treated as
 428    long-calls (ie called via a register).  */
 429 bool
 430 aarch64_is_long_call_p (rtx sym)
 431 {
 432   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 433 }
 434
 435 /* Return true if the offsets to a zero/sign-extract operation
 436    represent an expression that matches an extend operation.  The
 437    operands represent the paramters from
 438
 439    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 440 bool
 441 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 442                                 rtx extract_imm)
 443 {
 444   HOST_WIDE_INT mult_val, extract_val;
 445
 446   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 447     return false;
 448
 449   mult_val = INTVAL (mult_imm);
 450   extract_val = INTVAL (extract_imm);
 451
 452   if (extract_val > 8
 453       && extract_val < GET_MODE_BITSIZE (mode)
 454       && exact_log2 (extract_val & ~7) > 0
 455       && (extract_val & 7) <= 4
 456       && mult_val == (1 << (extract_val & 7)))
 457     return true;
 458
 459   return false;
 460 }
 461
 462 /* Emit an insn that's a simple single-set.  Both the operands must be
 463    known to be valid.  */
 464 inline static rtx
 465 emit_set_insn (rtx x, rtx y)
 466 {
 467   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 468 }
 469
 470 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 471    return the rtx for register 0 in the proper mode.  */
 472 rtx
 473 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 474 {
 475   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 476   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 477
 478   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 479   return cc_reg;
 480 }
 481
 482 /* Build the SYMBOL_REF for __tls_get_addr.  */
 483
 484 static GTY(()) rtx tls_get_addr_libfunc;
 485
 486 rtx
 487 aarch64_tls_get_addr (void)
 488 {
 489   if (!tls_get_addr_libfunc)
 490     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 491   return tls_get_addr_libfunc;
 492 }
 493
 494 /* Return the TLS model to use for ADDR.  */
 495
 496 static enum tls_model
 497 tls_symbolic_operand_type (rtx addr)
 498 {
 499   enum tls_model tls_kind = TLS_MODEL_NONE;
 500   rtx sym, addend;
 501
 502   if (GET_CODE (addr) == CONST)
 503     {
 504       split_const (addr, &sym, &addend);
 505       if (GET_CODE (sym) == SYMBOL_REF)
 506         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 507     }
 508   else if (GET_CODE (addr) == SYMBOL_REF)
 509     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 510
 511   return tls_kind;
 512 }
 513
 514 /* We'll allow lo_sum's in addresses in our legitimate addresses
 515    so that combine would take care of combining addresses where
 516    necessary, but for generation purposes, we'll generate the address
 517    as :
 518    RTL                               Absolute
 519    tmp = hi (symbol_ref);            adrp  x1, foo
 520    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 521                                      nop
 522
 523    PIC                               TLS
 524    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 525    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 526                                      bl   __tls_get_addr
 527                                      nop
 528
 529    Load TLS symbol, depending on TLS mechanism and TLS access model.
 530
 531    Global Dynamic - Traditional TLS:
 532    adrp tmp, :tlsgd:imm
 533    add  dest, tmp, #:tlsgd_lo12:imm
 534    bl   __tls_get_addr
 535
 536    Global Dynamic - TLS Descriptors:
 537    adrp dest, :tlsdesc:imm
 538    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 539    add  dest, dest, #:tlsdesc_lo12:imm
 540    blr  tmp
 541    mrs  tp, tpidr_el0
 542    add  dest, dest, tp
 543
 544    Initial Exec:
 545    mrs  tp, tpidr_el0
 546    adrp tmp, :gottprel:imm
 547    ldr  dest, [tmp, #:gottprel_lo12:imm]
 548    add  dest, dest, tp
 549
 550    Local Exec:
 551    mrs  tp, tpidr_el0
 552    add  t0, tp, #:tprel_hi12:imm
 553    add  t0, #:tprel_lo12_nc:imm
 554 */
 555
 556 static void
 557 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 558                                    enum aarch64_symbol_type type)
 559 {
 560   switch (type)
 561     {
 562     case SYMBOL_SMALL_ABSOLUTE:
 563       {
 564         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 565         rtx tmp_reg = dest;
 566         enum machine_mode mode = GET_MODE (dest);
 567
 568         gcc_assert (mode == Pmode || mode == ptr_mode);
 569
 570         if (can_create_pseudo_p ())
 571           tmp_reg = gen_reg_rtx (mode);
 572
 573         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 574         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 575         return;
 576       }
 577
 578     case SYMBOL_TINY_ABSOLUTE:
 579       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 580       return;
 581
 582     case SYMBOL_SMALL_GOT:
 583       {
 584         /* In ILP32, the mode of dest can be either SImode or DImode,
 585            while the got entry is always of SImode size.  The mode of
 586            dest depends on how dest is used: if dest is assigned to a
 587            pointer (e.g. in the memory), it has SImode; it may have
 588            DImode if dest is dereferenced to access the memeory.
 589            This is why we have to handle three different ldr_got_small
 590            patterns here (two patterns for ILP32).  */
 591         rtx tmp_reg = dest;
 592         enum machine_mode mode = GET_MODE (dest);
 593
 594         if (can_create_pseudo_p ())
 595           tmp_reg = gen_reg_rtx (mode);
 596
 597         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 598         if (mode == ptr_mode)
 599           {
 600             if (mode == DImode)
 601               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 602             else
 603               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 604           }
 605         else
 606           {
 607             gcc_assert (mode == Pmode);
 608             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 609           }
 610
 611         return;
 612       }
 613
 614     case SYMBOL_SMALL_TLSGD:
 615       {
 616         rtx insns;
 617         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 618
 619         start_sequence ();
 620         emit_call_insn (gen_tlsgd_small (result, imm));
 621         insns = get_insns ();
 622         end_sequence ();
 623
 624         RTL_CONST_CALL_P (insns) = 1;
 625         emit_libcall_block (insns, dest, result, imm);
 626         return;
 627       }
 628
 629     case SYMBOL_SMALL_TLSDESC:
 630       {
 631         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 632         rtx tp;
 633
 634         emit_insn (gen_tlsdesc_small (imm));
 635         tp = aarch64_load_tp (NULL);
 636         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 637         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 638         return;
 639       }
 640
 641     case SYMBOL_SMALL_GOTTPREL:
 642       {
 643         rtx tmp_reg = gen_reg_rtx (Pmode);
 644         rtx tp = aarch64_load_tp (NULL);
 645         emit_insn (gen_tlsie_small (tmp_reg, imm));
 646         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 647         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 648         return;
 649       }
 650
 651     case SYMBOL_SMALL_TPREL:
 652       {
 653         rtx tp = aarch64_load_tp (NULL);
 654         emit_insn (gen_tlsle_small (dest, tp, imm));
 655         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 656         return;
 657       }
 658
 659     case SYMBOL_TINY_GOT:
 660       emit_insn (gen_ldr_got_tiny (dest, imm));
 661       return;
 662
 663     default:
 664       gcc_unreachable ();
 665     }
 666 }
 667
 668 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 669    handle all moves if !can_create_pseudo_p ().  The distinction is
 670    important because, unlike emit_move_insn, the move expanders know
 671    how to force Pmode objects into the constant pool even when the
 672    constant pool address is not itself legitimate.  */
 673 static rtx
 674 aarch64_emit_move (rtx dest, rtx src)
 675 {
 676   return (can_create_pseudo_p ()
 677           ? emit_move_insn (dest, src)
 678           : emit_move_insn_1 (dest, src));
 679 }
 680
 681 void
 682 aarch64_split_128bit_move (rtx dst, rtx src)
 683 {
 684   rtx low_dst;
 685
 686   enum machine_mode src_mode = GET_MODE (src);
 687   enum machine_mode dst_mode = GET_MODE (dst);
 688   int src_regno = REGNO (src);
 689   int dst_regno = REGNO (dst);
 690
 691   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 692
 693   if (REG_P (dst) && REG_P (src))
 694     {
 695       gcc_assert (src_mode == TImode || src_mode == TFmode);
 696
 697       /* Handle r -> w, w -> r.  */
 698       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 699         {
 700           switch (src_mode) {
 701           case TImode:
 702             emit_insn
 703               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 704             emit_insn
 705               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 706             return;
 707           case TFmode:
 708             emit_insn
 709               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 710             emit_insn
 711               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 712             return;
 713           default:
 714             gcc_unreachable ();
 715           }
 716         }
 717       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 718         {
 719           switch (src_mode) {
 720           case TImode:
 721             emit_insn
 722               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 723             emit_insn
 724               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 725             return;
 726           case TFmode:
 727             emit_insn
 728               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 729             emit_insn
 730               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 731             return;
 732           default:
 733             gcc_unreachable ();
 734           }
 735         }
 736       /* Fall through to r -> r cases.  */
 737     }
 738
 739   switch (dst_mode) {
 740   case TImode:
 741     low_dst = gen_lowpart (word_mode, dst);
 742     if (REG_P (low_dst)
 743         && reg_overlap_mentioned_p (low_dst, src))
 744       {
 745         aarch64_emit_move (gen_highpart (word_mode, dst),
 746                            gen_highpart_mode (word_mode, TImode, src));
 747         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 748       }
 749     else
 750       {
 751         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 752         aarch64_emit_move (gen_highpart (word_mode, dst),
 753                            gen_highpart_mode (word_mode, TImode, src));
 754       }
 755     return;
 756   case TFmode:
 757     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 758                     gen_rtx_REG (DFmode, src_regno));
 759     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 760                     gen_rtx_REG (DFmode, src_regno + 1));
 761     return;
 762   default:
 763     gcc_unreachable ();
 764   }
 765 }
 766
 767 bool
 768 aarch64_split_128bit_move_p (rtx dst, rtx src)
 769 {
 770   return (! REG_P (src)
 771           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 772 }
 773
 774 /* Split a complex SIMD combine.  */
 775
 776 void
 777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 778 {
 779   enum machine_mode src_mode = GET_MODE (src1);
 780   enum machine_mode dst_mode = GET_MODE (dst);
 781
 782   gcc_assert (VECTOR_MODE_P (dst_mode));
 783
 784   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 785     {
 786       rtx (*gen) (rtx, rtx, rtx);
 787
 788       switch (src_mode)
 789         {
 790         case V8QImode:
 791           gen = gen_aarch64_simd_combinev8qi;
 792           break;
 793         case V4HImode:
 794           gen = gen_aarch64_simd_combinev4hi;
 795           break;
 796         case V2SImode:
 797           gen = gen_aarch64_simd_combinev2si;
 798           break;
 799         case V2SFmode:
 800           gen = gen_aarch64_simd_combinev2sf;
 801           break;
 802         case DImode:
 803           gen = gen_aarch64_simd_combinedi;
 804           break;
 805         case DFmode:
 806           gen = gen_aarch64_simd_combinedf;
 807           break;
 808         default:
 809           gcc_unreachable ();
 810         }
 811
 812       emit_insn (gen (dst, src1, src2));
 813       return;
 814     }
 815 }
 816
 817 /* Split a complex SIMD move.  */
 818
 819 void
 820 aarch64_split_simd_move (rtx dst, rtx src)
 821 {
 822   enum machine_mode src_mode = GET_MODE (src);
 823   enum machine_mode dst_mode = GET_MODE (dst);
 824
 825   gcc_assert (VECTOR_MODE_P (dst_mode));
 826
 827   if (REG_P (dst) && REG_P (src))
 828     {
 829       rtx (*gen) (rtx, rtx);
 830
 831       gcc_assert (VECTOR_MODE_P (src_mode));
 832
 833       switch (src_mode)
 834         {
 835         case V16QImode:
 836           gen = gen_aarch64_split_simd_movv16qi;
 837           break;
 838         case V8HImode:
 839           gen = gen_aarch64_split_simd_movv8hi;
 840           break;
 841         case V4SImode:
 842           gen = gen_aarch64_split_simd_movv4si;
 843           break;
 844         case V2DImode:
 845           gen = gen_aarch64_split_simd_movv2di;
 846           break;
 847         case V4SFmode:
 848           gen = gen_aarch64_split_simd_movv4sf;
 849           break;
 850         case V2DFmode:
 851           gen = gen_aarch64_split_simd_movv2df;
 852           break;
 853         default:
 854           gcc_unreachable ();
 855         }
 856
 857       emit_insn (gen (dst, src));
 858       return;
 859     }
 860 }
 861
 862 static rtx
 863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 864 {
 865   if (can_create_pseudo_p ())
 866     return force_reg (mode, value);
 867   else
 868     {
 869       x = aarch64_emit_move (x, value);
 870       return x;
 871     }
 872 }
 873
 874
 875 static rtx
 876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 877 {
 878   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 879     {
 880       rtx high;
 881       /* Load the full offset into a register.  This
 882          might be improvable in the future.  */
 883       high = GEN_INT (offset);
 884       offset = 0;
 885       high = aarch64_force_temporary (mode, temp, high);
 886       reg = aarch64_force_temporary (mode, temp,
 887                                      gen_rtx_PLUS (mode, high, reg));
 888     }
 889   return plus_constant (mode, reg, offset);
 890 }
 891
 892 void
 893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 894 {
 895   enum machine_mode mode = GET_MODE (dest);
 896   unsigned HOST_WIDE_INT mask;
 897   int i;
 898   bool first;
 899   unsigned HOST_WIDE_INT val;
 900   bool subtargets;
 901   rtx subtarget;
 902   int one_match, zero_match;
 903
 904   gcc_assert (mode == SImode || mode == DImode);
 905
 906   /* Check on what type of symbol it is.  */
 907   if (GET_CODE (imm) == SYMBOL_REF
 908       || GET_CODE (imm) == LABEL_REF
 909       || GET_CODE (imm) == CONST)
 910     {
 911       rtx mem, base, offset;
 912       enum aarch64_symbol_type sty;
 913
 914       /* If we have (const (plus symbol offset)), separate out the offset
 915          before we start classifying the symbol.  */
 916       split_const (imm, &base, &offset);
 917
 918       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 919       switch (sty)
 920         {
 921         case SYMBOL_FORCE_TO_MEM:
 922           if (offset != const0_rtx
 923               && targetm.cannot_force_const_mem (mode, imm))
 924             {
 925               gcc_assert(can_create_pseudo_p ());
 926               base = aarch64_force_temporary (mode, dest, base);
 927               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 928               aarch64_emit_move (dest, base);
 929               return;
 930             }
 931           mem = force_const_mem (ptr_mode, imm);
 932           gcc_assert (mem);
 933           if (mode != ptr_mode)
 934             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 935           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 936           return;
 937
 938         case SYMBOL_SMALL_TLSGD:
 939         case SYMBOL_SMALL_TLSDESC:
 940         case SYMBOL_SMALL_GOTTPREL:
 941         case SYMBOL_SMALL_GOT:
 942         case SYMBOL_TINY_GOT:
 943           if (offset != const0_rtx)
 944             {
 945               gcc_assert(can_create_pseudo_p ());
 946               base = aarch64_force_temporary (mode, dest, base);
 947               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 948               aarch64_emit_move (dest, base);
 949               return;
 950             }
 951           /* FALLTHRU */
 952
 953         case SYMBOL_SMALL_TPREL:
 954         case SYMBOL_SMALL_ABSOLUTE:
 955         case SYMBOL_TINY_ABSOLUTE:
 956           aarch64_load_symref_appropriately (dest, imm, sty);
 957           return;
 958
 959         default:
 960           gcc_unreachable ();
 961         }
 962     }
 963
 964   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 965     {
 966       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 967       return;
 968     }
 969
 970   if (!CONST_INT_P (imm))
 971     {
 972       if (GET_CODE (imm) == HIGH)
 973         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 974       else
 975         {
 976           rtx mem = force_const_mem (mode, imm);
 977           gcc_assert (mem);
 978           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 979         }
 980
 981       return;
 982     }
 983
 984   if (mode == SImode)
 985     {
 986       /* We know we can't do this in 1 insn, and we must be able to do it
 987          in two; so don't mess around looking for sequences that don't buy
 988          us anything.  */
 989       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 990       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 991                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 992       return;
 993     }
 994
 995   /* Remaining cases are all for DImode.  */
 996
 997   val = INTVAL (imm);
 998   subtargets = optimize && can_create_pseudo_p ();
 999
1000   one_match = 0;
1001   zero_match = 0;
1002   mask = 0xffff;
1003
1004   for (i = 0; i < 64; i += 16, mask <<= 16)
1005     {
1006       if ((val & mask) == 0)
1007         zero_match++;
1008       else if ((val & mask) == mask)
1009         one_match++;
1010     }
1011
1012   if (one_match == 2)
1013     {
1014       mask = 0xffff;
1015       for (i = 0; i < 64; i += 16, mask <<= 16)
1016         {
1017           if ((val & mask) != mask)
1018             {
1019               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021                                          GEN_INT ((val >> i) & 0xffff)));
1022               return;
1023             }
1024         }
1025       gcc_unreachable ();
1026     }
1027
1028   if (zero_match == 2)
1029     goto simple_sequence;
1030
1031   mask = 0x0ffff0000UL;
1032   for (i = 16; i < 64; i += 16, mask <<= 16)
1033     {
1034       HOST_WIDE_INT comp = mask & ~(mask - 1);
1035
1036       if (aarch64_uimm12_shift (val - (val & mask)))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041           emit_insn (gen_adddi3 (dest, subtarget,
1042                                  GEN_INT (val - (val & mask))));
1043           return;
1044         }
1045       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1046         {
1047           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050                                   GEN_INT ((val + comp) & mask)));
1051           emit_insn (gen_adddi3 (dest, subtarget,
1052                                  GEN_INT (val - ((val + comp) & mask))));
1053           return;
1054         }
1055       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1056         {
1057           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060                                   GEN_INT ((val - comp) | ~mask)));
1061           emit_insn (gen_adddi3 (dest, subtarget,
1062                                  GEN_INT (val - ((val - comp) | ~mask))));
1063           return;
1064         }
1065       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1066         {
1067           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1068
1069           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070                                   GEN_INT (val | ~mask)));
1071           emit_insn (gen_adddi3 (dest, subtarget,
1072                                  GEN_INT (val - (val | ~mask))));
1073           return;
1074         }
1075     }
1076
1077   /* See if we can do it by arithmetically combining two
1078      immediates.  */
1079   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1080     {
1081       int j;
1082       mask = 0xffff;
1083
1084       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1086         {
1087           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089                                   GEN_INT (aarch64_bitmasks[i])));
1090           emit_insn (gen_adddi3 (dest, subtarget,
1091                                  GEN_INT (val - aarch64_bitmasks[i])));
1092           return;
1093         }
1094
1095       for (j = 0; j < 64; j += 16, mask <<= 16)
1096         {
1097           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1098             {
1099               emit_insn (gen_rtx_SET (VOIDmode, dest,
1100                                       GEN_INT (aarch64_bitmasks[i])));
1101               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102                                          GEN_INT ((val >> j) & 0xffff)));
1103               return;
1104             }
1105         }
1106     }
1107
1108   /* See if we can do it by logically combining two immediates.  */
1109   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1110     {
1111       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1112         {
1113           int j;
1114
1115           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1117               {
1118                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120                                         GEN_INT (aarch64_bitmasks[i])));
1121                 emit_insn (gen_iordi3 (dest, subtarget,
1122                                        GEN_INT (aarch64_bitmasks[j])));
1123                 return;
1124               }
1125         }
1126       else if ((val & aarch64_bitmasks[i]) == val)
1127         {
1128           int j;
1129
1130           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1132               {
1133
1134                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136                                         GEN_INT (aarch64_bitmasks[j])));
1137                 emit_insn (gen_anddi3 (dest, subtarget,
1138                                        GEN_INT (aarch64_bitmasks[i])));
1139                 return;
1140               }
1141         }
1142     }
1143
1144  simple_sequence:
1145   first = true;
1146   mask = 0xffff;
1147   for (i = 0; i < 64; i += 16, mask <<= 16)
1148     {
1149       if ((val & mask) != 0)
1150         {
1151           if (first)
1152             {
1153               emit_insn (gen_rtx_SET (VOIDmode, dest,
1154                                       GEN_INT (val & mask)));
1155               first = false;
1156             }
1157           else
1158             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159                                        GEN_INT ((val >> i) & 0xffff)));
1160         }
1161     }
1162 }
1163
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1166 {
1167   /* Indirect calls are not currently supported.  */
1168   if (decl == NULL)
1169     return false;
1170
1171   /* Cannot tail-call to long-calls, since these are outside of the
1172      range of a branch instruction (we could handle this if we added
1173      support for indirect tail-calls.  */
1174   if (aarch64_decl_is_long_call_p (decl))
1175     return false;
1176
1177   return true;
1178 }
1179
1180 /* Implement TARGET_PASS_BY_REFERENCE.  */
1181
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184                            enum machine_mode mode,
1185                            const_tree type,
1186                            bool named ATTRIBUTE_UNUSED)
1187 {
1188   HOST_WIDE_INT size;
1189   enum machine_mode dummymode;
1190   int nregs;
1191
1192   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1193   size = (mode == BLKmode && type)
1194     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1195
1196   if (type)
1197     {
1198       /* Arrays always passed by reference.  */
1199       if (TREE_CODE (type) == ARRAY_TYPE)
1200         return true;
1201       /* Other aggregates based on their size.  */
1202       if (AGGREGATE_TYPE_P (type))
1203         size = int_size_in_bytes (type);
1204     }
1205
1206   /* Variable sized arguments are always returned by reference.  */
1207   if (size < 0)
1208     return true;
1209
1210   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1211   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1212                                                &dummymode, &nregs,
1213                                                NULL))
1214     return false;
1215
1216   /* Arguments which are variable sized or larger than 2 registers are
1217      passed by reference unless they are a homogenous floating point
1218      aggregate.  */
1219   return size > 2 * UNITS_PER_WORD;
1220 }
1221
1222 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1223 static bool
1224 aarch64_return_in_msb (const_tree valtype)
1225 {
1226   enum machine_mode dummy_mode;
1227   int dummy_int;
1228
1229   /* Never happens in little-endian mode.  */
1230   if (!BYTES_BIG_ENDIAN)
1231     return false;
1232
1233   /* Only composite types smaller than or equal to 16 bytes can
1234      be potentially returned in registers.  */
1235   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1236       || int_size_in_bytes (valtype) <= 0
1237       || int_size_in_bytes (valtype) > 16)
1238     return false;
1239
1240   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1241      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1242      is always passed/returned in the least significant bits of fp/simd
1243      register(s).  */
1244   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1245                                                &dummy_mode, &dummy_int, NULL))
1246     return false;
1247
1248   return true;
1249 }
1250
1251 /* Implement TARGET_FUNCTION_VALUE.
1252    Define how to find the value returned by a function.  */
1253
1254 static rtx
1255 aarch64_function_value (const_tree type, const_tree func,
1256                         bool outgoing ATTRIBUTE_UNUSED)
1257 {
1258   enum machine_mode mode;
1259   int unsignedp;
1260   int count;
1261   enum machine_mode ag_mode;
1262
1263   mode = TYPE_MODE (type);
1264   if (INTEGRAL_TYPE_P (type))
1265     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1266
1267   if (aarch64_return_in_msb (type))
1268     {
1269       HOST_WIDE_INT size = int_size_in_bytes (type);
1270
1271       if (size % UNITS_PER_WORD != 0)
1272         {
1273           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1274           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1275         }
1276     }
1277
1278   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1279                                                &ag_mode, &count, NULL))
1280     {
1281       if (!aarch64_composite_type_p (type, mode))
1282         {
1283           gcc_assert (count == 1 && mode == ag_mode);
1284           return gen_rtx_REG (mode, V0_REGNUM);
1285         }
1286       else
1287         {
1288           int i;
1289           rtx par;
1290
1291           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1292           for (i = 0; i < count; i++)
1293             {
1294               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1295               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1296                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1297               XVECEXP (par, 0, i) = tmp;
1298             }
1299           return par;
1300         }
1301     }
1302   else
1303     return gen_rtx_REG (mode, R0_REGNUM);
1304 }
1305
1306 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1307    Return true if REGNO is the number of a hard register in which the values
1308    of called function may come back.  */
1309
1310 static bool
1311 aarch64_function_value_regno_p (const unsigned int regno)
1312 {
1313   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1314      of 16-byte return values are: 128-bit integers and 16-byte small
1315      structures (excluding homogeneous floating-point aggregates).  */
1316   if (regno == R0_REGNUM || regno == R1_REGNUM)
1317     return true;
1318
1319   /* Up to four fp/simd registers can return a function value, e.g. a
1320      homogeneous floating-point aggregate having four members.  */
1321   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1322     return !TARGET_GENERAL_REGS_ONLY;
1323
1324   return false;
1325 }
1326
1327 /* Implement TARGET_RETURN_IN_MEMORY.
1328
1329    If the type T of the result of a function is such that
1330      void func (T arg)
1331    would require that arg be passed as a value in a register (or set of
1332    registers) according to the parameter passing rules, then the result
1333    is returned in the same registers as would be used for such an
1334    argument.  */
1335
1336 static bool
1337 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1338 {
1339   HOST_WIDE_INT size;
1340   enum machine_mode ag_mode;
1341   int count;
1342
1343   if (!AGGREGATE_TYPE_P (type)
1344       && TREE_CODE (type) != COMPLEX_TYPE
1345       && TREE_CODE (type) != VECTOR_TYPE)
1346     /* Simple scalar types always returned in registers.  */
1347     return false;
1348
1349   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1350                                                type,
1351                                                &ag_mode,
1352                                                &count,
1353                                                NULL))
1354     return false;
1355
1356   /* Types larger than 2 registers returned in memory.  */
1357   size = int_size_in_bytes (type);
1358   return (size < 0 || size > 2 * UNITS_PER_WORD);
1359 }
1360
1361 static bool
1362 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1363                                const_tree type, int *nregs)
1364 {
1365   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1366   return aarch64_vfp_is_call_or_return_candidate (mode,
1367                                                   type,
1368                                                   &pcum->aapcs_vfp_rmode,
1369                                                   nregs,
1370                                                   NULL);
1371 }
1372
1373 /* Given MODE and TYPE of a function argument, return the alignment in
1374    bits.  The idea is to suppress any stronger alignment requested by
1375    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1376    This is a helper function for local use only.  */
1377
1378 static unsigned int
1379 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1380 {
1381   unsigned int alignment;
1382
1383   if (type)
1384     {
1385       if (!integer_zerop (TYPE_SIZE (type)))
1386         {
1387           if (TYPE_MODE (type) == mode)
1388             alignment = TYPE_ALIGN (type);
1389           else
1390             alignment = GET_MODE_ALIGNMENT (mode);
1391         }
1392       else
1393         alignment = 0;
1394     }
1395   else
1396     alignment = GET_MODE_ALIGNMENT (mode);
1397
1398   return alignment;
1399 }
1400
1401 /* Layout a function argument according to the AAPCS64 rules.  The rule
1402    numbers refer to the rule numbers in the AAPCS64.  */
1403
1404 static void
1405 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1406                     const_tree type,
1407                     bool named ATTRIBUTE_UNUSED)
1408 {
1409   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1410   int ncrn, nvrn, nregs;
1411   bool allocate_ncrn, allocate_nvrn;
1412
1413   /* We need to do this once per argument.  */
1414   if (pcum->aapcs_arg_processed)
1415     return;
1416
1417   pcum->aapcs_arg_processed = true;
1418
1419   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1420   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1421                                                  mode,
1422                                                  type,
1423                                                  &nregs);
1424
1425   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1426      The following code thus handles passing by SIMD/FP registers first.  */
1427
1428   nvrn = pcum->aapcs_nvrn;
1429
1430   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1431      and homogenous short-vector aggregates (HVA).  */
1432   if (allocate_nvrn)
1433     {
1434       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1435         {
1436           pcum->aapcs_nextnvrn = nvrn + nregs;
1437           if (!aarch64_composite_type_p (type, mode))
1438             {
1439               gcc_assert (nregs == 1);
1440               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1441             }
1442           else
1443             {
1444               rtx par;
1445               int i;
1446               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1447               for (i = 0; i < nregs; i++)
1448                 {
1449                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1450                                          V0_REGNUM + nvrn + i);
1451                   tmp = gen_rtx_EXPR_LIST
1452                     (VOIDmode, tmp,
1453                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1454                   XVECEXP (par, 0, i) = tmp;
1455                 }
1456               pcum->aapcs_reg = par;
1457             }
1458           return;
1459         }
1460       else
1461         {
1462           /* C.3 NSRN is set to 8.  */
1463           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1464           goto on_stack;
1465         }
1466     }
1467
1468   ncrn = pcum->aapcs_ncrn;
1469   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1470            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1471
1472
1473   /* C6 - C9.  though the sign and zero extension semantics are
1474      handled elsewhere.  This is the case where the argument fits
1475      entirely general registers.  */
1476   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1477     {
1478       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1479
1480       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1481
1482       /* C.8 if the argument has an alignment of 16 then the NGRN is
1483          rounded up to the next even number.  */
1484       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1485         {
1486           ++ncrn;
1487           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1488         }
1489       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1490          A reg is still generated for it, but the caller should be smart
1491          enough not to use it.  */
1492       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1493         {
1494           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1495         }
1496       else
1497         {
1498           rtx par;
1499           int i;
1500
1501           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1502           for (i = 0; i < nregs; i++)
1503             {
1504               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1505               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1506                                        GEN_INT (i * UNITS_PER_WORD));
1507               XVECEXP (par, 0, i) = tmp;
1508             }
1509           pcum->aapcs_reg = par;
1510         }
1511
1512       pcum->aapcs_nextncrn = ncrn + nregs;
1513       return;
1514     }
1515
1516   /* C.11  */
1517   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1518
1519   /* The argument is passed on stack; record the needed number of words for
1520      this argument (we can re-use NREGS) and align the total size if
1521      necessary.  */
1522 on_stack:
1523   pcum->aapcs_stack_words = nregs;
1524   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1525     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1526                                                16 / UNITS_PER_WORD) + 1;
1527   return;
1528 }
1529
1530 /* Implement TARGET_FUNCTION_ARG.  */
1531
1532 static rtx
1533 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1534                       const_tree type, bool named)
1535 {
1536   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1538
1539   if (mode == VOIDmode)
1540     return NULL_RTX;
1541
1542   aarch64_layout_arg (pcum_v, mode, type, named);
1543   return pcum->aapcs_reg;
1544 }
1545
1546 void
1547 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1548                            const_tree fntype ATTRIBUTE_UNUSED,
1549                            rtx libname ATTRIBUTE_UNUSED,
1550                            const_tree fndecl ATTRIBUTE_UNUSED,
1551                            unsigned n_named ATTRIBUTE_UNUSED)
1552 {
1553   pcum->aapcs_ncrn = 0;
1554   pcum->aapcs_nvrn = 0;
1555   pcum->aapcs_nextncrn = 0;
1556   pcum->aapcs_nextnvrn = 0;
1557   pcum->pcs_variant = ARM_PCS_AAPCS64;
1558   pcum->aapcs_reg = NULL_RTX;
1559   pcum->aapcs_arg_processed = false;
1560   pcum->aapcs_stack_words = 0;
1561   pcum->aapcs_stack_size = 0;
1562
1563   return;
1564 }
1565
1566 static void
1567 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1568                               enum machine_mode mode,
1569                               const_tree type,
1570                               bool named)
1571 {
1572   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1573   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1574     {
1575       aarch64_layout_arg (pcum_v, mode, type, named);
1576       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1577                   != (pcum->aapcs_stack_words != 0));
1578       pcum->aapcs_arg_processed = false;
1579       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1580       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1581       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1582       pcum->aapcs_stack_words = 0;
1583       pcum->aapcs_reg = NULL_RTX;
1584     }
1585 }
1586
1587 bool
1588 aarch64_function_arg_regno_p (unsigned regno)
1589 {
1590   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1591           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1592 }
1593
1594 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1595    PARM_BOUNDARY bits of alignment, but will be given anything up
1596    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1597    that both before and after the layout of each argument, the Next
1598    Stacked Argument Address (NSAA) will have a minimum alignment of
1599    8 bytes.  */
1600
1601 static unsigned int
1602 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1603 {
1604   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1605
1606   if (alignment < PARM_BOUNDARY)
1607     alignment = PARM_BOUNDARY;
1608   if (alignment > STACK_BOUNDARY)
1609     alignment = STACK_BOUNDARY;
1610   return alignment;
1611 }
1612
1613 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1614
1615    Return true if an argument passed on the stack should be padded upwards,
1616    i.e. if the least-significant byte of the stack slot has useful data.
1617
1618    Small aggregate types are placed in the lowest memory address.
1619
1620    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1621
1622 bool
1623 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1624 {
1625   /* On little-endian targets, the least significant byte of every stack
1626      argument is passed at the lowest byte address of the stack slot.  */
1627   if (!BYTES_BIG_ENDIAN)
1628     return true;
1629
1630   /* Otherwise, integral, floating-point and pointer types are padded downward:
1631      the least significant byte of a stack argument is passed at the highest
1632      byte address of the stack slot.  */
1633   if (type
1634       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1635          || POINTER_TYPE_P (type))
1636       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1637     return false;
1638
1639   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1640   return true;
1641 }
1642
1643 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1644
1645    It specifies padding for the last (may also be the only)
1646    element of a block move between registers and memory.  If
1647    assuming the block is in the memory, padding upward means that
1648    the last element is padded after its highest significant byte,
1649    while in downward padding, the last element is padded at the
1650    its least significant byte side.
1651
1652    Small aggregates and small complex types are always padded
1653    upwards.
1654
1655    We don't need to worry about homogeneous floating-point or
1656    short-vector aggregates; their move is not affected by the
1657    padding direction determined here.  Regardless of endianness,
1658    each element of such an aggregate is put in the least
1659    significant bits of a fp/simd register.
1660
1661    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1662    register has useful data, and return the opposite if the most
1663    significant byte does.  */
1664
1665 bool
1666 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1667                      bool first ATTRIBUTE_UNUSED)
1668 {
1669
1670   /* Small composite types are always padded upward.  */
1671   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1672     {
1673       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1674                             : GET_MODE_SIZE (mode));
1675       if (size < 2 * UNITS_PER_WORD)
1676         return true;
1677     }
1678
1679   /* Otherwise, use the default padding.  */
1680   return !BYTES_BIG_ENDIAN;
1681 }
1682
1683 static enum machine_mode
1684 aarch64_libgcc_cmp_return_mode (void)
1685 {
1686   return SImode;
1687 }
1688
1689 static bool
1690 aarch64_frame_pointer_required (void)
1691 {
1692   /* If the function contains dynamic stack allocations, we need to
1693      use the frame pointer to access the static parts of the frame.  */
1694   if (cfun->calls_alloca)
1695     return true;
1696
1697   /* We may have turned flag_omit_frame_pointer on in order to have this
1698      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1699      and we'll check it here.
1700      If we really did set flag_omit_frame_pointer normally, then we return false
1701      (no frame pointer required) in all cases.  */
1702
1703   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1704     return false;
1705   else if (flag_omit_leaf_frame_pointer)
1706     return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
1707   return true;
1708 }
1709
1710 /* Mark the registers that need to be saved by the callee and calculate
1711    the size of the callee-saved registers area and frame record (both FP
1712    and LR may be omitted).  */
1713 static void
1714 aarch64_layout_frame (void)
1715 {
1716   HOST_WIDE_INT offset = 0;
1717   int regno;
1718
1719   if (reload_completed && cfun->machine->frame.laid_out)
1720     return;
1721
1722   cfun->machine->frame.fp_lr_offset = 0;
1723
1724   /* First mark all the registers that really need to be saved...  */
1725   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1726     cfun->machine->frame.reg_offset[regno] = -1;
1727
1728   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1729     cfun->machine->frame.reg_offset[regno] = -1;
1730
1731   /* ... that includes the eh data registers (if needed)...  */
1732   if (crtl->calls_eh_return)
1733     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1734       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1735
1736   /* ... and any callee saved register that dataflow says is live.  */
1737   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1738     if (df_regs_ever_live_p (regno)
1739         && !call_used_regs[regno])
1740       cfun->machine->frame.reg_offset[regno] = 0;
1741
1742   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1743     if (df_regs_ever_live_p (regno)
1744         && !call_used_regs[regno])
1745       cfun->machine->frame.reg_offset[regno] = 0;
1746
1747   if (frame_pointer_needed)
1748     {
1749       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1750       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1751       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1752     }
1753
1754   /* Now assign stack slots for them.  */
1755   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1756     if (cfun->machine->frame.reg_offset[regno] != -1)
1757       {
1758         cfun->machine->frame.reg_offset[regno] = offset;
1759         offset += UNITS_PER_WORD;
1760       }
1761
1762   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1763     if (cfun->machine->frame.reg_offset[regno] != -1)
1764       {
1765         cfun->machine->frame.reg_offset[regno] = offset;
1766         offset += UNITS_PER_WORD;
1767       }
1768
1769   if (frame_pointer_needed)
1770     {
1771       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1772       offset += UNITS_PER_WORD;
1773       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1774     }
1775
1776   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1777     {
1778       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1779       offset += UNITS_PER_WORD;
1780       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1781     }
1782
1783   cfun->machine->frame.padding0 =
1784     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1785   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1786
1787   cfun->machine->frame.saved_regs_size = offset;
1788   cfun->machine->frame.laid_out = true;
1789 }
1790
1791 /* Make the last instruction frame-related and note that it performs
1792    the operation described by FRAME_PATTERN.  */
1793
1794 static void
1795 aarch64_set_frame_expr (rtx frame_pattern)
1796 {
1797   rtx insn;
1798
1799   insn = get_last_insn ();
1800   RTX_FRAME_RELATED_P (insn) = 1;
1801   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1802   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1803                                       frame_pattern,
1804                                       REG_NOTES (insn));
1805 }
1806
1807 static bool
1808 aarch64_register_saved_on_entry (int regno)
1809 {
1810   return cfun->machine->frame.reg_offset[regno] != -1;
1811 }
1812
1813
1814 static void
1815 aarch64_save_or_restore_fprs (int start_offset, int increment,
1816                               bool restore, rtx base_rtx)
1817
1818 {
1819   unsigned regno;
1820   unsigned regno2;
1821   rtx insn;
1822   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1823     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1824
1825
1826   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827     {
1828       if (aarch64_register_saved_on_entry (regno))
1829         {
1830           rtx mem;
1831           mem = gen_mem_ref (DFmode,
1832                              plus_constant (Pmode,
1833                                             base_rtx,
1834                                             start_offset));
1835
1836           for (regno2 = regno + 1;
1837                regno2 <= V31_REGNUM
1838                  && !aarch64_register_saved_on_entry (regno2);
1839                regno2++)
1840             {
1841               /* Empty loop.  */
1842             }
1843           if (regno2 <= V31_REGNUM &&
1844               aarch64_register_saved_on_entry (regno2))
1845             {
1846               rtx mem2;
1847               /* Next highest register to be saved.  */
1848               mem2 = gen_mem_ref (DFmode,
1849                                   plus_constant
1850                                   (Pmode,
1851                                    base_rtx,
1852                                    start_offset + increment));
1853               if (restore == false)
1854                 {
1855                   insn = emit_insn
1856                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1857                                         mem2, gen_rtx_REG (DFmode, regno2)));
1858
1859                 }
1860               else
1861                 {
1862                   insn = emit_insn
1863                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1864                                        gen_rtx_REG (DFmode, regno2), mem2));
1865
1866                   add_reg_note (insn, REG_CFA_RESTORE,
1867                                 gen_rtx_REG (DFmode, regno));
1868                   add_reg_note (insn, REG_CFA_RESTORE,
1869                                 gen_rtx_REG (DFmode, regno2));
1870                 }
1871
1872                   /* The first part of a frame-related parallel insn
1873                      is always assumed to be relevant to the frame
1874                      calculations; subsequent parts, are only
1875                      frame-related if explicitly marked.  */
1876               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1877               regno = regno2;
1878               start_offset += increment * 2;
1879             }
1880           else
1881             {
1882               if (restore == false)
1883                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1884               else
1885                 {
1886                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1887                   add_reg_note (insn, REG_CFA_RESTORE,
1888                                 gen_rtx_REG (DImode, regno));
1889                 }
1890               start_offset += increment;
1891             }
1892           RTX_FRAME_RELATED_P (insn) = 1;
1893         }
1894     }
1895
1896 }
1897
1898
1899 /* offset from the stack pointer of where the saves and
1900    restore's have to happen.  */
1901 static void
1902 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1903                                             bool restore)
1904 {
1905   rtx insn;
1906   rtx base_rtx = stack_pointer_rtx;
1907   HOST_WIDE_INT start_offset = offset;
1908   HOST_WIDE_INT increment = UNITS_PER_WORD;
1909   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1910   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1911   unsigned regno;
1912   unsigned regno2;
1913
1914   for (regno = R0_REGNUM; regno <= limit; regno++)
1915     {
1916       if (aarch64_register_saved_on_entry (regno))
1917         {
1918           rtx mem;
1919           mem = gen_mem_ref (Pmode,
1920                              plus_constant (Pmode,
1921                                             base_rtx,
1922                                             start_offset));
1923
1924           for (regno2 = regno + 1;
1925                regno2 <= limit
1926                  && !aarch64_register_saved_on_entry (regno2);
1927                regno2++)
1928             {
1929               /* Empty loop.  */
1930             }
1931           if (regno2 <= limit &&
1932               aarch64_register_saved_on_entry (regno2))
1933             {
1934               rtx mem2;
1935               /* Next highest register to be saved.  */
1936               mem2 = gen_mem_ref (Pmode,
1937                                   plus_constant
1938                                   (Pmode,
1939                                    base_rtx,
1940                                    start_offset + increment));
1941               if (restore == false)
1942                 {
1943                   insn = emit_insn
1944                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1945                                         mem2, gen_rtx_REG (DImode, regno2)));
1946
1947                 }
1948               else
1949                 {
1950                   insn = emit_insn
1951                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1952                                      gen_rtx_REG (DImode, regno2), mem2));
1953
1954                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1955                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1956                 }
1957
1958                   /* The first part of a frame-related parallel insn
1959                      is always assumed to be relevant to the frame
1960                      calculations; subsequent parts, are only
1961                      frame-related if explicitly marked.  */
1962               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1963                                             1)) = 1;
1964               regno = regno2;
1965               start_offset += increment * 2;
1966             }
1967           else
1968             {
1969               if (restore == false)
1970                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1971               else
1972                 {
1973                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1974                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1975                 }
1976               start_offset += increment;
1977             }
1978           RTX_FRAME_RELATED_P (insn) = 1;
1979         }
1980     }
1981
1982   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1983
1984 }
1985
1986 /* AArch64 stack frames generated by this compiler look like:
1987
1988         +-------------------------------+
1989         |                               |
1990         |  incoming stack arguments     |
1991         |                               |
1992         +-------------------------------+ <-- arg_pointer_rtx
1993         |                               |
1994         |  callee-allocated save area   |
1995         |  for register varargs         |
1996         |                               |
1997         +-------------------------------+
1998         |                               |
1999         |  local variables              |
2000         |                               |
2001         +-------------------------------+ <-- frame_pointer_rtx
2002         |                               |
2003         |  callee-saved registers       |
2004         |                               |
2005         +-------------------------------+
2006         |  LR'                          |
2007         +-------------------------------+
2008         |  FP'                          |
2009       P +-------------------------------+ <-- hard_frame_pointer_rtx
2010         |  dynamic allocation           |
2011         +-------------------------------+
2012         |                               |
2013         |  outgoing stack arguments     |
2014         |                               |
2015         +-------------------------------+ <-- stack_pointer_rtx
2016
2017    Dynamic stack allocations such as alloca insert data at point P.
2018    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2019    hard_frame_pointer_rtx unchanged.  */
2020
2021 /* Generate the prologue instructions for entry into a function.
2022    Establish the stack frame by decreasing the stack pointer with a
2023    properly calculated size and, if necessary, create a frame record
2024    filled with the values of LR and previous frame pointer.  The
2025    current FP is also set up if it is in use.  */
2026
2027 void
2028 aarch64_expand_prologue (void)
2029 {
2030   /* sub sp, sp, #<frame_size>
2031      stp {fp, lr}, [sp, #<frame_size> - 16]
2032      add fp, sp, #<frame_size> - hardfp_offset
2033      stp {cs_reg}, [fp, #-16] etc.
2034
2035      sub sp, sp, <final_adjustment_if_any>
2036   */
2037   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2038   HOST_WIDE_INT frame_size, offset;
2039   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2040   rtx insn;
2041
2042   aarch64_layout_frame ();
2043   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2044   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2045               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2046   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2047                 + crtl->outgoing_args_size);
2048   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2049                                           STACK_BOUNDARY / BITS_PER_UNIT);
2050
2051   if (flag_stack_usage_info)
2052     current_function_static_stack_size = frame_size;
2053
2054   fp_offset = (offset
2055                - original_frame_size
2056                - cfun->machine->frame.saved_regs_size);
2057
2058   /* Store pairs and load pairs have a range only -512 to 504.  */
2059   if (offset >= 512)
2060     {
2061       /* When the frame has a large size, an initial decrease is done on
2062          the stack pointer to jump over the callee-allocated save area for
2063          register varargs, the local variable area and/or the callee-saved
2064          register area.  This will allow the pre-index write-back
2065          store pair instructions to be used for setting up the stack frame
2066          efficiently.  */
2067       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2068       if (offset >= 512)
2069         offset = cfun->machine->frame.saved_regs_size;
2070
2071       frame_size -= (offset + crtl->outgoing_args_size);
2072       fp_offset = 0;
2073
2074       if (frame_size >= 0x1000000)
2075         {
2076           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2077           emit_move_insn (op0, GEN_INT (-frame_size));
2078           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2079           aarch64_set_frame_expr (gen_rtx_SET
2080                                   (Pmode, stack_pointer_rtx,
2081                                    plus_constant (Pmode,
2082                                                   stack_pointer_rtx,
2083                                                   -frame_size)));
2084         }
2085       else if (frame_size > 0)
2086         {
2087           if ((frame_size & 0xfff) != frame_size)
2088             {
2089               insn = emit_insn (gen_add2_insn
2090                                 (stack_pointer_rtx,
2091                                  GEN_INT (-(frame_size
2092                                             & ~(HOST_WIDE_INT)0xfff))));
2093               RTX_FRAME_RELATED_P (insn) = 1;
2094             }
2095           if ((frame_size & 0xfff) != 0)
2096             {
2097               insn = emit_insn (gen_add2_insn
2098                                 (stack_pointer_rtx,
2099                                  GEN_INT (-(frame_size
2100                                             & (HOST_WIDE_INT)0xfff))));
2101               RTX_FRAME_RELATED_P (insn) = 1;
2102             }
2103         }
2104     }
2105   else
2106     frame_size = -1;
2107
2108   if (offset > 0)
2109     {
2110       /* Save the frame pointer and lr if the frame pointer is needed
2111          first.  Make the frame pointer point to the location of the
2112          old frame pointer on the stack.  */
2113       if (frame_pointer_needed)
2114         {
2115           rtx mem_fp, mem_lr;
2116
2117           if (fp_offset)
2118             {
2119               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2120                                                GEN_INT (-offset)));
2121               RTX_FRAME_RELATED_P (insn) = 1;
2122               aarch64_set_frame_expr (gen_rtx_SET
2123                                       (Pmode, stack_pointer_rtx,
2124                                        gen_rtx_MINUS (Pmode,
2125                                                       stack_pointer_rtx,
2126                                                       GEN_INT (offset))));
2127               mem_fp = gen_frame_mem (DImode,
2128                                       plus_constant (Pmode,
2129                                                      stack_pointer_rtx,
2130                                                      fp_offset));
2131               mem_lr = gen_frame_mem (DImode,
2132                                       plus_constant (Pmode,
2133                                                      stack_pointer_rtx,
2134                                                      fp_offset
2135                                                      + UNITS_PER_WORD));
2136               insn = emit_insn (gen_store_pairdi (mem_fp,
2137                                                   hard_frame_pointer_rtx,
2138                                                   mem_lr,
2139                                                   gen_rtx_REG (DImode,
2140                                                                LR_REGNUM)));
2141             }
2142           else
2143             {
2144               insn = emit_insn (gen_storewb_pairdi_di
2145                                 (stack_pointer_rtx, stack_pointer_rtx,
2146                                  hard_frame_pointer_rtx,
2147                                  gen_rtx_REG (DImode, LR_REGNUM),
2148                                  GEN_INT (-offset),
2149                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2150               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2151             }
2152
2153           /* The first part of a frame-related parallel insn is always
2154              assumed to be relevant to the frame calculations;
2155              subsequent parts, are only frame-related if explicitly
2156              marked.  */
2157           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2158           RTX_FRAME_RELATED_P (insn) = 1;
2159
2160           /* Set up frame pointer to point to the location of the
2161              previous frame pointer on the stack.  */
2162           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2163                                            stack_pointer_rtx,
2164                                            GEN_INT (fp_offset)));
2165           aarch64_set_frame_expr (gen_rtx_SET
2166                                   (Pmode, hard_frame_pointer_rtx,
2167                                    plus_constant (Pmode,
2168                                                   stack_pointer_rtx,
2169                                                   fp_offset)));
2170           RTX_FRAME_RELATED_P (insn) = 1;
2171           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2172                                            hard_frame_pointer_rtx));
2173         }
2174       else
2175         {
2176           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2177                                            GEN_INT (-offset)));
2178           RTX_FRAME_RELATED_P (insn) = 1;
2179         }
2180
2181       aarch64_save_or_restore_callee_save_registers
2182         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2183     }
2184
2185   /* when offset >= 512,
2186      sub sp, sp, #<outgoing_args_size> */
2187   if (frame_size > -1)
2188     {
2189       if (crtl->outgoing_args_size > 0)
2190         {
2191           insn = emit_insn (gen_add2_insn
2192                             (stack_pointer_rtx,
2193                              GEN_INT (- crtl->outgoing_args_size)));
2194           RTX_FRAME_RELATED_P (insn) = 1;
2195         }
2196     }
2197 }
2198
2199 /* Generate the epilogue instructions for returning from a function.  */
2200 void
2201 aarch64_expand_epilogue (bool for_sibcall)
2202 {
2203   HOST_WIDE_INT original_frame_size, frame_size, offset;
2204   HOST_WIDE_INT fp_offset;
2205   rtx insn;
2206   rtx cfa_reg;
2207
2208   aarch64_layout_frame ();
2209   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2210   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2211                 + crtl->outgoing_args_size);
2212   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2213                                           STACK_BOUNDARY / BITS_PER_UNIT);
2214
2215   fp_offset = (offset
2216                - original_frame_size
2217                - cfun->machine->frame.saved_regs_size);
2218
2219   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2220
2221   /* Store pairs and load pairs have a range only -512 to 504.  */
2222   if (offset >= 512)
2223     {
2224       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2225       if (offset >= 512)
2226         offset = cfun->machine->frame.saved_regs_size;
2227
2228       frame_size -= (offset + crtl->outgoing_args_size);
2229       fp_offset = 0;
2230       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2231         {
2232           insn = emit_insn (gen_add2_insn
2233                             (stack_pointer_rtx,
2234                              GEN_INT (crtl->outgoing_args_size)));
2235           RTX_FRAME_RELATED_P (insn) = 1;
2236         }
2237     }
2238   else
2239     frame_size = -1;
2240
2241   /* If there were outgoing arguments or we've done dynamic stack
2242      allocation, then restore the stack pointer from the frame
2243      pointer.  This is at most one insn and more efficient than using
2244      GCC's internal mechanism.  */
2245   if (frame_pointer_needed
2246       && (crtl->outgoing_args_size || cfun->calls_alloca))
2247     {
2248       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2249                                        hard_frame_pointer_rtx,
2250                                        GEN_INT (- fp_offset)));
2251       RTX_FRAME_RELATED_P (insn) = 1;
2252       /* As SP is set to (FP - fp_offset), according to the rules in
2253          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2254          from the value of SP from now on.  */
2255       cfa_reg = stack_pointer_rtx;
2256     }
2257
2258   aarch64_save_or_restore_callee_save_registers
2259     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2260
2261   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2262   if (offset > 0)
2263     {
2264       if (frame_pointer_needed)
2265         {
2266           rtx mem_fp, mem_lr;
2267
2268           if (fp_offset)
2269             {
2270               mem_fp = gen_frame_mem (DImode,
2271                                       plus_constant (Pmode,
2272                                                      stack_pointer_rtx,
2273                                                      fp_offset));
2274               mem_lr = gen_frame_mem (DImode,
2275                                       plus_constant (Pmode,
2276                                                      stack_pointer_rtx,
2277                                                      fp_offset
2278                                                      + UNITS_PER_WORD));
2279               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2280                                                  mem_fp,
2281                                                  gen_rtx_REG (DImode,
2282                                                               LR_REGNUM),
2283                                                  mem_lr));
2284             }
2285           else
2286             {
2287               insn = emit_insn (gen_loadwb_pairdi_di
2288                                 (stack_pointer_rtx,
2289                                  stack_pointer_rtx,
2290                                  hard_frame_pointer_rtx,
2291                                  gen_rtx_REG (DImode, LR_REGNUM),
2292                                  GEN_INT (offset),
2293                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2294               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2295               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2296                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2297                                           plus_constant (Pmode, cfa_reg,
2298                                                          offset))));
2299             }
2300
2301           /* The first part of a frame-related parallel insn
2302              is always assumed to be relevant to the frame
2303              calculations; subsequent parts, are only
2304              frame-related if explicitly marked.  */
2305           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2306           RTX_FRAME_RELATED_P (insn) = 1;
2307           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2308           add_reg_note (insn, REG_CFA_RESTORE,
2309                         gen_rtx_REG (DImode, LR_REGNUM));
2310
2311           if (fp_offset)
2312             {
2313               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2314                                                GEN_INT (offset)));
2315               RTX_FRAME_RELATED_P (insn) = 1;
2316             }
2317         }
2318       else
2319         {
2320           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2321                                            GEN_INT (offset)));
2322           RTX_FRAME_RELATED_P (insn) = 1;
2323         }
2324     }
2325
2326   /* Stack adjustment for exception handler.  */
2327   if (crtl->calls_eh_return)
2328     {
2329       /* We need to unwind the stack by the offset computed by
2330          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2331          based on SP.  Ideally we would update the SP and define the
2332          CFA along the lines of:
2333
2334          SP = SP + EH_RETURN_STACKADJ_RTX
2335          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2336
2337          However the dwarf emitter only understands a constant
2338          register offset.
2339
2340          The solution chosen here is to use the otherwise unused IP0
2341          as a temporary register to hold the current SP value.  The
2342          CFA is described using IP0 then SP is modified.  */
2343
2344       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2345
2346       insn = emit_move_insn (ip0, stack_pointer_rtx);
2347       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2348       RTX_FRAME_RELATED_P (insn) = 1;
2349
2350       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2351
2352       /* Ensure the assignment to IP0 does not get optimized away.  */
2353       emit_use (ip0);
2354     }
2355
2356   if (frame_size > -1)
2357     {
2358       if (frame_size >= 0x1000000)
2359         {
2360           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2361           emit_move_insn (op0, GEN_INT (frame_size));
2362           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2363           aarch64_set_frame_expr (gen_rtx_SET
2364                                   (Pmode, stack_pointer_rtx,
2365                                    plus_constant (Pmode,
2366                                                   stack_pointer_rtx,
2367                                                   frame_size)));
2368         }
2369       else if (frame_size > 0)
2370         {
2371           if ((frame_size & 0xfff) != 0)
2372             {
2373               insn = emit_insn (gen_add2_insn
2374                                 (stack_pointer_rtx,
2375                                  GEN_INT ((frame_size
2376                                            & (HOST_WIDE_INT) 0xfff))));
2377               RTX_FRAME_RELATED_P (insn) = 1;
2378             }
2379           if ((frame_size & 0xfff) != frame_size)
2380             {
2381               insn = emit_insn (gen_add2_insn
2382                                 (stack_pointer_rtx,
2383                                  GEN_INT ((frame_size
2384                                            & ~ (HOST_WIDE_INT) 0xfff))));
2385               RTX_FRAME_RELATED_P (insn) = 1;
2386             }
2387         }
2388
2389         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2390                                              plus_constant (Pmode,
2391                                                             stack_pointer_rtx,
2392                                                             offset)));
2393     }
2394
2395   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2396   if (!for_sibcall)
2397     emit_jump_insn (ret_rtx);
2398 }
2399
2400 /* Return the place to copy the exception unwinding return address to.
2401    This will probably be a stack slot, but could (in theory be the
2402    return register).  */
2403 rtx
2404 aarch64_final_eh_return_addr (void)
2405 {
2406   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2407   aarch64_layout_frame ();
2408   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2409   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2410                 + crtl->outgoing_args_size);
2411   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2412                                           STACK_BOUNDARY / BITS_PER_UNIT);
2413   fp_offset = offset
2414     - original_frame_size
2415     - cfun->machine->frame.saved_regs_size;
2416
2417   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2418     return gen_rtx_REG (DImode, LR_REGNUM);
2419
2420   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2421      result in a store to save LR introduced by builtin_eh_return () being
2422      incorrectly deleted because the alias is not detected.
2423      So in the calculation of the address to copy the exception unwinding
2424      return address to, we note 2 cases.
2425      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2426      we return a SP-relative location since all the addresses are SP-relative
2427      in this case.  This prevents the store from being optimized away.
2428      If the fp_offset is not 0, then the addresses will be FP-relative and
2429      therefore we return a FP-relative location.  */
2430
2431   if (frame_pointer_needed)
2432     {
2433       if (fp_offset)
2434         return gen_frame_mem (DImode,
2435                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2436       else
2437         return gen_frame_mem (DImode,
2438                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2439     }
2440
2441   /* If FP is not needed, we calculate the location of LR, which would be
2442      at the top of the saved registers block.  */
2443
2444   return gen_frame_mem (DImode,
2445                         plus_constant (Pmode,
2446                                        stack_pointer_rtx,
2447                                        fp_offset
2448                                        + cfun->machine->frame.saved_regs_size
2449                                        - 2 * UNITS_PER_WORD));
2450 }
2451
2452 /* Output code to build up a constant in a register.  */
2453 static void
2454 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2455 {
2456   if (aarch64_bitmask_imm (val, DImode))
2457     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2458   else
2459     {
2460       int i;
2461       int ncount = 0;
2462       int zcount = 0;
2463       HOST_WIDE_INT valp = val >> 16;
2464       HOST_WIDE_INT valm;
2465       HOST_WIDE_INT tval;
2466
2467       for (i = 16; i < 64; i += 16)
2468         {
2469           valm = (valp & 0xffff);
2470
2471           if (valm != 0)
2472             ++ zcount;
2473
2474           if (valm != 0xffff)
2475             ++ ncount;
2476
2477           valp >>= 16;
2478         }
2479
2480       /* zcount contains the number of additional MOVK instructions
2481          required if the constant is built up with an initial MOVZ instruction,
2482          while ncount is the number of MOVK instructions required if starting
2483          with a MOVN instruction.  Choose the sequence that yields the fewest
2484          number of instructions, preferring MOVZ instructions when they are both
2485          the same.  */
2486       if (ncount < zcount)
2487         {
2488           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2489                           GEN_INT ((~val) & 0xffff));
2490           tval = 0xffff;
2491         }
2492       else
2493         {
2494           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2495                           GEN_INT (val & 0xffff));
2496           tval = 0;
2497         }
2498
2499       val >>= 16;
2500
2501       for (i = 16; i < 64; i += 16)
2502         {
2503           if ((val & 0xffff) != tval)
2504             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2505                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2506           val >>= 16;
2507         }
2508     }
2509 }
2510
2511 static void
2512 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2513 {
2514   HOST_WIDE_INT mdelta = delta;
2515   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2516   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2517
2518   if (mdelta < 0)
2519     mdelta = -mdelta;
2520
2521   if (mdelta >= 4096 * 4096)
2522     {
2523       aarch64_build_constant (scratchreg, delta);
2524       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2525     }
2526   else if (mdelta > 0)
2527     {
2528       if (mdelta >= 4096)
2529         {
2530           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2531           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2532           if (delta < 0)
2533             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2535           else
2536             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2537                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2538         }
2539       if (mdelta % 4096 != 0)
2540         {
2541           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2542           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2543                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2544         }
2545     }
2546 }
2547
2548 /* Output code to add DELTA to the first argument, and then jump
2549    to FUNCTION.  Used for C++ multiple inheritance.  */
2550 static void
2551 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2552                          HOST_WIDE_INT delta,
2553                          HOST_WIDE_INT vcall_offset,
2554                          tree function)
2555 {
2556   /* The this pointer is always in x0.  Note that this differs from
2557      Arm where the this pointer maybe bumped to r1 if r0 is required
2558      to return a pointer to an aggregate.  On AArch64 a result value
2559      pointer will be in x8.  */
2560   int this_regno = R0_REGNUM;
2561   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2562
2563   reload_completed = 1;
2564   emit_note (NOTE_INSN_PROLOGUE_END);
2565
2566   if (vcall_offset == 0)
2567     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2568   else
2569     {
2570       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2571
2572       this_rtx = gen_rtx_REG (Pmode, this_regno);
2573       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2574       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2575
2576       addr = this_rtx;
2577       if (delta != 0)
2578         {
2579           if (delta >= -256 && delta < 256)
2580             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2581                                        plus_constant (Pmode, this_rtx, delta));
2582           else
2583             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2584         }
2585
2586       if (Pmode == ptr_mode)
2587         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2588       else
2589         aarch64_emit_move (temp0,
2590                            gen_rtx_ZERO_EXTEND (Pmode,
2591                                                 gen_rtx_MEM (ptr_mode, addr)));
2592
2593       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2594           addr = plus_constant (Pmode, temp0, vcall_offset);
2595       else
2596         {
2597           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2598           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2599         }
2600
2601       if (Pmode == ptr_mode)
2602         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2603       else
2604         aarch64_emit_move (temp1,
2605                            gen_rtx_SIGN_EXTEND (Pmode,
2606                                                 gen_rtx_MEM (ptr_mode, addr)));
2607
2608       emit_insn (gen_add2_insn (this_rtx, temp1));
2609     }
2610
2611   /* Generate a tail call to the target function.  */
2612   if (!TREE_USED (function))
2613     {
2614       assemble_external (function);
2615       TREE_USED (function) = 1;
2616     }
2617   funexp = XEXP (DECL_RTL (function), 0);
2618   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2619   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2620   SIBLING_CALL_P (insn) = 1;
2621
2622   insn = get_insns ();
2623   shorten_branches (insn);
2624   final_start_function (insn, file, 1);
2625   final (insn, file, 1);
2626   final_end_function ();
2627
2628   /* Stop pretending to be a post-reload pass.  */
2629   reload_completed = 0;
2630 }
2631
2632 static int
2633 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2634 {
2635   if (GET_CODE (*x) == SYMBOL_REF)
2636     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2637
2638   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2639      TLS offsets, not real symbol references.  */
2640   if (GET_CODE (*x) == UNSPEC
2641       && XINT (*x, 1) == UNSPEC_TLS)
2642     return -1;
2643
2644   return 0;
2645 }
2646
2647 static bool
2648 aarch64_tls_referenced_p (rtx x)
2649 {
2650   if (!TARGET_HAVE_TLS)
2651     return false;
2652
2653   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2654 }
2655
2656
2657 static int
2658 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2659 {
2660   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2661   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2662
2663   if (*imm1 < *imm2)
2664     return -1;
2665   if (*imm1 > *imm2)
2666     return +1;
2667   return 0;
2668 }
2669
2670
2671 static void
2672 aarch64_build_bitmask_table (void)
2673 {
2674   unsigned HOST_WIDE_INT mask, imm;
2675   unsigned int log_e, e, s, r;
2676   unsigned int nimms = 0;
2677
2678   for (log_e = 1; log_e <= 6; log_e++)
2679     {
2680       e = 1 << log_e;
2681       if (e == 64)
2682         mask = ~(HOST_WIDE_INT) 0;
2683       else
2684         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2685       for (s = 1; s < e; s++)
2686         {
2687           for (r = 0; r < e; r++)
2688             {
2689               /* set s consecutive bits to 1 (s < 64) */
2690               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2691               /* rotate right by r */
2692               if (r != 0)
2693                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2694               /* replicate the constant depending on SIMD size */
2695               switch (log_e) {
2696               case 1: imm |= (imm <<  2);
2697               case 2: imm |= (imm <<  4);
2698               case 3: imm |= (imm <<  8);
2699               case 4: imm |= (imm << 16);
2700               case 5: imm |= (imm << 32);
2701               case 6:
2702                 break;
2703               default:
2704                 gcc_unreachable ();
2705               }
2706               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2707               aarch64_bitmasks[nimms++] = imm;
2708             }
2709         }
2710     }
2711
2712   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2713   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2714          aarch64_bitmasks_cmp);
2715 }
2716
2717
2718 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2719    a left shift of 0 or 12 bits.  */
2720 bool
2721 aarch64_uimm12_shift (HOST_WIDE_INT val)
2722 {
2723   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2724           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2725           );
2726 }
2727
2728
2729 /* Return true if val is an immediate that can be loaded into a
2730    register by a MOVZ instruction.  */
2731 static bool
2732 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2733 {
2734   if (GET_MODE_SIZE (mode) > 4)
2735     {
2736       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2737           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2738         return 1;
2739     }
2740   else
2741     {
2742       /* Ignore sign extension.  */
2743       val &= (HOST_WIDE_INT) 0xffffffff;
2744     }
2745   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2746           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2747 }
2748
2749
2750 /* Return true if val is a valid bitmask immediate.  */
2751 bool
2752 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2753 {
2754   if (GET_MODE_SIZE (mode) < 8)
2755     {
2756       /* Replicate bit pattern.  */
2757       val &= (HOST_WIDE_INT) 0xffffffff;
2758       val |= val << 32;
2759     }
2760   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2761                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2762 }
2763
2764
2765 /* Return true if val is an immediate that can be loaded into a
2766    register in a single instruction.  */
2767 bool
2768 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2769 {
2770   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2771     return 1;
2772   return aarch64_bitmask_imm (val, mode);
2773 }
2774
2775 static bool
2776 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2777 {
2778   rtx base, offset;
2779
2780   if (GET_CODE (x) == HIGH)
2781     return true;
2782
2783   split_const (x, &base, &offset);
2784   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2785     {
2786       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2787           != SYMBOL_FORCE_TO_MEM)
2788         return true;
2789       else
2790         /* Avoid generating a 64-bit relocation in ILP32; leave
2791            to aarch64_expand_mov_immediate to handle it properly.  */
2792         return mode != ptr_mode;
2793     }
2794
2795   return aarch64_tls_referenced_p (x);
2796 }
2797
2798 /* Return true if register REGNO is a valid index register.
2799    STRICT_P is true if REG_OK_STRICT is in effect.  */
2800
2801 bool
2802 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2803 {
2804   if (!HARD_REGISTER_NUM_P (regno))
2805     {
2806       if (!strict_p)
2807         return true;
2808
2809       if (!reg_renumber)
2810         return false;
2811
2812       regno = reg_renumber[regno];
2813     }
2814   return GP_REGNUM_P (regno);
2815 }
2816
2817 /* Return true if register REGNO is a valid base register for mode MODE.
2818    STRICT_P is true if REG_OK_STRICT is in effect.  */
2819
2820 bool
2821 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2822 {
2823   if (!HARD_REGISTER_NUM_P (regno))
2824     {
2825       if (!strict_p)
2826         return true;
2827
2828       if (!reg_renumber)
2829         return false;
2830
2831       regno = reg_renumber[regno];
2832     }
2833
2834   /* The fake registers will be eliminated to either the stack or
2835      hard frame pointer, both of which are usually valid base registers.
2836      Reload deals with the cases where the eliminated form isn't valid.  */
2837   return (GP_REGNUM_P (regno)
2838           || regno == SP_REGNUM
2839           || regno == FRAME_POINTER_REGNUM
2840           || regno == ARG_POINTER_REGNUM);
2841 }
2842
2843 /* Return true if X is a valid base register for mode MODE.
2844    STRICT_P is true if REG_OK_STRICT is in effect.  */
2845
2846 static bool
2847 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2848 {
2849   if (!strict_p && GET_CODE (x) == SUBREG)
2850     x = SUBREG_REG (x);
2851
2852   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2853 }
2854
2855 /* Return true if address offset is a valid index.  If it is, fill in INFO
2856    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2857
2858 static bool
2859 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2860                         enum machine_mode mode, bool strict_p)
2861 {
2862   enum aarch64_address_type type;
2863   rtx index;
2864   int shift;
2865
2866   /* (reg:P) */
2867   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2868       && GET_MODE (x) == Pmode)
2869     {
2870       type = ADDRESS_REG_REG;
2871       index = x;
2872       shift = 0;
2873     }
2874   /* (sign_extend:DI (reg:SI)) */
2875   else if ((GET_CODE (x) == SIGN_EXTEND
2876             || GET_CODE (x) == ZERO_EXTEND)
2877            && GET_MODE (x) == DImode
2878            && GET_MODE (XEXP (x, 0)) == SImode)
2879     {
2880       type = (GET_CODE (x) == SIGN_EXTEND)
2881         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882       index = XEXP (x, 0);
2883       shift = 0;
2884     }
2885   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2886   else if (GET_CODE (x) == MULT
2887            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889            && GET_MODE (XEXP (x, 0)) == DImode
2890            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891            && CONST_INT_P (XEXP (x, 1)))
2892     {
2893       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895       index = XEXP (XEXP (x, 0), 0);
2896       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2897     }
2898   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2899   else if (GET_CODE (x) == ASHIFT
2900            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2901                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2902            && GET_MODE (XEXP (x, 0)) == DImode
2903            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2904            && CONST_INT_P (XEXP (x, 1)))
2905     {
2906       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2907         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908       index = XEXP (XEXP (x, 0), 0);
2909       shift = INTVAL (XEXP (x, 1));
2910     }
2911   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2912   else if ((GET_CODE (x) == SIGN_EXTRACT
2913             || GET_CODE (x) == ZERO_EXTRACT)
2914            && GET_MODE (x) == DImode
2915            && GET_CODE (XEXP (x, 0)) == MULT
2916            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2917            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2918     {
2919       type = (GET_CODE (x) == SIGN_EXTRACT)
2920         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2921       index = XEXP (XEXP (x, 0), 0);
2922       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2923       if (INTVAL (XEXP (x, 1)) != 32 + shift
2924           || INTVAL (XEXP (x, 2)) != 0)
2925         shift = -1;
2926     }
2927   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2928      (const_int 0xffffffff<<shift)) */
2929   else if (GET_CODE (x) == AND
2930            && GET_MODE (x) == DImode
2931            && GET_CODE (XEXP (x, 0)) == MULT
2932            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2933            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2934            && CONST_INT_P (XEXP (x, 1)))
2935     {
2936       type = ADDRESS_REG_UXTW;
2937       index = XEXP (XEXP (x, 0), 0);
2938       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2939       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2940         shift = -1;
2941     }
2942   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2943   else if ((GET_CODE (x) == SIGN_EXTRACT
2944             || GET_CODE (x) == ZERO_EXTRACT)
2945            && GET_MODE (x) == DImode
2946            && GET_CODE (XEXP (x, 0)) == ASHIFT
2947            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2948            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2949     {
2950       type = (GET_CODE (x) == SIGN_EXTRACT)
2951         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2952       index = XEXP (XEXP (x, 0), 0);
2953       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2954       if (INTVAL (XEXP (x, 1)) != 32 + shift
2955           || INTVAL (XEXP (x, 2)) != 0)
2956         shift = -1;
2957     }
2958   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2959      (const_int 0xffffffff<<shift)) */
2960   else if (GET_CODE (x) == AND
2961            && GET_MODE (x) == DImode
2962            && GET_CODE (XEXP (x, 0)) == ASHIFT
2963            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2964            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2965            && CONST_INT_P (XEXP (x, 1)))
2966     {
2967       type = ADDRESS_REG_UXTW;
2968       index = XEXP (XEXP (x, 0), 0);
2969       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2970       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2971         shift = -1;
2972     }
2973   /* (mult:P (reg:P) (const_int scale)) */
2974   else if (GET_CODE (x) == MULT
2975            && GET_MODE (x) == Pmode
2976            && GET_MODE (XEXP (x, 0)) == Pmode
2977            && CONST_INT_P (XEXP (x, 1)))
2978     {
2979       type = ADDRESS_REG_REG;
2980       index = XEXP (x, 0);
2981       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2982     }
2983   /* (ashift:P (reg:P) (const_int shift)) */
2984   else if (GET_CODE (x) == ASHIFT
2985            && GET_MODE (x) == Pmode
2986            && GET_MODE (XEXP (x, 0)) == Pmode
2987            && CONST_INT_P (XEXP (x, 1)))
2988     {
2989       type = ADDRESS_REG_REG;
2990       index = XEXP (x, 0);
2991       shift = INTVAL (XEXP (x, 1));
2992     }
2993   else
2994     return false;
2995
2996   if (GET_CODE (index) == SUBREG)
2997     index = SUBREG_REG (index);
2998
2999   if ((shift == 0 ||
3000        (shift > 0 && shift <= 3
3001         && (1 << shift) == GET_MODE_SIZE (mode)))
3002       && REG_P (index)
3003       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3004     {
3005       info->type = type;
3006       info->offset = index;
3007       info->shift = shift;
3008       return true;
3009     }
3010
3011   return false;
3012 }
3013
3014 static inline bool
3015 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3016 {
3017   return (offset >= -64 * GET_MODE_SIZE (mode)
3018           && offset < 64 * GET_MODE_SIZE (mode)
3019           && offset % GET_MODE_SIZE (mode) == 0);
3020 }
3021
3022 static inline bool
3023 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3024                                HOST_WIDE_INT offset)
3025 {
3026   return offset >= -256 && offset < 256;
3027 }
3028
3029 static inline bool
3030 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3031 {
3032   return (offset >= 0
3033           && offset < 4096 * GET_MODE_SIZE (mode)
3034           && offset % GET_MODE_SIZE (mode) == 0);
3035 }
3036
3037 /* Return true if X is a valid address for machine mode MODE.  If it is,
3038    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3039    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3040
3041 static bool
3042 aarch64_classify_address (struct aarch64_address_info *info,
3043                           rtx x, enum machine_mode mode,
3044                           RTX_CODE outer_code, bool strict_p)
3045 {
3046   enum rtx_code code = GET_CODE (x);
3047   rtx op0, op1;
3048   bool allow_reg_index_p =
3049     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3050
3051   /* Don't support anything other than POST_INC or REG addressing for
3052      AdvSIMD.  */
3053   if (aarch64_vector_mode_p (mode)
3054       && (code != POST_INC && code != REG))
3055     return false;
3056
3057   switch (code)
3058     {
3059     case REG:
3060     case SUBREG:
3061       info->type = ADDRESS_REG_IMM;
3062       info->base = x;
3063       info->offset = const0_rtx;
3064       return aarch64_base_register_rtx_p (x, strict_p);
3065
3066     case PLUS:
3067       op0 = XEXP (x, 0);
3068       op1 = XEXP (x, 1);
3069       if (GET_MODE_SIZE (mode) != 0
3070           && CONST_INT_P (op1)
3071           && aarch64_base_register_rtx_p (op0, strict_p))
3072         {
3073           HOST_WIDE_INT offset = INTVAL (op1);
3074
3075           info->type = ADDRESS_REG_IMM;
3076           info->base = op0;
3077           info->offset = op1;
3078
3079           /* TImode and TFmode values are allowed in both pairs of X
3080              registers and individual Q registers.  The available
3081              address modes are:
3082              X,X: 7-bit signed scaled offset
3083              Q:   9-bit signed offset
3084              We conservatively require an offset representable in either mode.
3085            */
3086           if (mode == TImode || mode == TFmode)
3087             return (offset_7bit_signed_scaled_p (mode, offset)
3088                     && offset_9bit_signed_unscaled_p (mode, offset));
3089
3090           if (outer_code == PARALLEL)
3091             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3092                     && offset_7bit_signed_scaled_p (mode, offset));
3093           else
3094             return (offset_9bit_signed_unscaled_p (mode, offset)
3095                     || offset_12bit_unsigned_scaled_p (mode, offset));
3096         }
3097
3098       if (allow_reg_index_p)
3099         {
3100           /* Look for base + (scaled/extended) index register.  */
3101           if (aarch64_base_register_rtx_p (op0, strict_p)
3102               && aarch64_classify_index (info, op1, mode, strict_p))
3103             {
3104               info->base = op0;
3105               return true;
3106             }
3107           if (aarch64_base_register_rtx_p (op1, strict_p)
3108               && aarch64_classify_index (info, op0, mode, strict_p))
3109             {
3110               info->base = op1;
3111               return true;
3112             }
3113         }
3114
3115       return false;
3116
3117     case POST_INC:
3118     case POST_DEC:
3119     case PRE_INC:
3120     case PRE_DEC:
3121       info->type = ADDRESS_REG_WB;
3122       info->base = XEXP (x, 0);
3123       info->offset = NULL_RTX;
3124       return aarch64_base_register_rtx_p (info->base, strict_p);
3125
3126     case POST_MODIFY:
3127     case PRE_MODIFY:
3128       info->type = ADDRESS_REG_WB;
3129       info->base = XEXP (x, 0);
3130       if (GET_CODE (XEXP (x, 1)) == PLUS
3131           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3132           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3133           && aarch64_base_register_rtx_p (info->base, strict_p))
3134         {
3135           HOST_WIDE_INT offset;
3136           info->offset = XEXP (XEXP (x, 1), 1);
3137           offset = INTVAL (info->offset);
3138
3139           /* TImode and TFmode values are allowed in both pairs of X
3140              registers and individual Q registers.  The available
3141              address modes are:
3142              X,X: 7-bit signed scaled offset
3143              Q:   9-bit signed offset
3144              We conservatively require an offset representable in either mode.
3145            */
3146           if (mode == TImode || mode == TFmode)
3147             return (offset_7bit_signed_scaled_p (mode, offset)
3148                     && offset_9bit_signed_unscaled_p (mode, offset));
3149
3150           if (outer_code == PARALLEL)
3151             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3152                     && offset_7bit_signed_scaled_p (mode, offset));
3153           else
3154             return offset_9bit_signed_unscaled_p (mode, offset);
3155         }
3156       return false;
3157
3158     case CONST:
3159     case SYMBOL_REF:
3160     case LABEL_REF:
3161       /* load literal: pc-relative constant pool entry.  Only supported
3162          for SI mode or larger.  */
3163       info->type = ADDRESS_SYMBOLIC;
3164       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3165         {
3166           rtx sym, addend;
3167
3168           split_const (x, &sym, &addend);
3169           return (GET_CODE (sym) == LABEL_REF
3170                   || (GET_CODE (sym) == SYMBOL_REF
3171                       && CONSTANT_POOL_ADDRESS_P (sym)));
3172         }
3173       return false;
3174
3175     case LO_SUM:
3176       info->type = ADDRESS_LO_SUM;
3177       info->base = XEXP (x, 0);
3178       info->offset = XEXP (x, 1);
3179       if (allow_reg_index_p
3180           && aarch64_base_register_rtx_p (info->base, strict_p))
3181         {
3182           rtx sym, offs;
3183           split_const (info->offset, &sym, &offs);
3184           if (GET_CODE (sym) == SYMBOL_REF
3185               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3186                   == SYMBOL_SMALL_ABSOLUTE))
3187             {
3188               /* The symbol and offset must be aligned to the access size.  */
3189               unsigned int align;
3190               unsigned int ref_size;
3191
3192               if (CONSTANT_POOL_ADDRESS_P (sym))
3193                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3194               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3195                 {
3196                   tree exp = SYMBOL_REF_DECL (sym);
3197                   align = TYPE_ALIGN (TREE_TYPE (exp));
3198                   align = CONSTANT_ALIGNMENT (exp, align);
3199                 }
3200               else if (SYMBOL_REF_DECL (sym))
3201                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3202               else
3203                 align = BITS_PER_UNIT;
3204
3205               ref_size = GET_MODE_SIZE (mode);
3206               if (ref_size == 0)
3207                 ref_size = GET_MODE_SIZE (DImode);
3208
3209               return ((INTVAL (offs) & (ref_size - 1)) == 0
3210                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3211             }
3212         }
3213       return false;
3214
3215     default:
3216       return false;
3217     }
3218 }
3219
3220 bool
3221 aarch64_symbolic_address_p (rtx x)
3222 {
3223   rtx offset;
3224
3225   split_const (x, &x, &offset);
3226   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3227 }
3228
3229 /* Classify the base of symbolic expression X, given that X appears in
3230    context CONTEXT.  */
3231
3232 enum aarch64_symbol_type
3233 aarch64_classify_symbolic_expression (rtx x,
3234                                       enum aarch64_symbol_context context)
3235 {
3236   rtx offset;
3237
3238   split_const (x, &x, &offset);
3239   return aarch64_classify_symbol (x, context);
3240 }
3241
3242
3243 /* Return TRUE if X is a legitimate address for accessing memory in
3244    mode MODE.  */
3245 static bool
3246 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3247 {
3248   struct aarch64_address_info addr;
3249
3250   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3251 }
3252
3253 /* Return TRUE if X is a legitimate address for accessing memory in
3254    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3255    pair operation.  */
3256 bool
3257 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3258                            RTX_CODE outer_code, bool strict_p)
3259 {
3260   struct aarch64_address_info addr;
3261
3262   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3263 }
3264
3265 /* Return TRUE if rtx X is immediate constant 0.0 */
3266 bool
3267 aarch64_float_const_zero_rtx_p (rtx x)
3268 {
3269   REAL_VALUE_TYPE r;
3270
3271   if (GET_MODE (x) == VOIDmode)
3272     return false;
3273
3274   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3275   if (REAL_VALUE_MINUS_ZERO (r))
3276     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3277   return REAL_VALUES_EQUAL (r, dconst0);
3278 }
3279
3280 /* Return the fixed registers used for condition codes.  */
3281
3282 static bool
3283 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3284 {
3285   *p1 = CC_REGNUM;
3286   *p2 = INVALID_REGNUM;
3287   return true;
3288 }
3289
3290 enum machine_mode
3291 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3292 {
3293   /* All floating point compares return CCFP if it is an equality
3294      comparison, and CCFPE otherwise.  */
3295   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3296     {
3297       switch (code)
3298         {
3299         case EQ:
3300         case NE:
3301         case UNORDERED:
3302         case ORDERED:
3303         case UNLT:
3304         case UNLE:
3305         case UNGT:
3306         case UNGE:
3307         case UNEQ:
3308         case LTGT:
3309           return CCFPmode;
3310
3311         case LT:
3312         case LE:
3313         case GT:
3314         case GE:
3315           return CCFPEmode;
3316
3317         default:
3318           gcc_unreachable ();
3319         }
3320     }
3321
3322   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3323       && y == const0_rtx
3324       && (code == EQ || code == NE || code == LT || code == GE)
3325       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3326           || GET_CODE (x) == NEG))
3327     return CC_NZmode;
3328
3329   /* A compare with a shifted or negated operand.  Because of canonicalization,
3330      the comparison will have to be swapped when we emit the assembly
3331      code.  */
3332   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3333       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3334       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3335           || GET_CODE (x) == LSHIFTRT
3336           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3337           || GET_CODE (x) == NEG))
3338     return CC_SWPmode;
3339
3340   /* A compare of a mode narrower than SI mode against zero can be done
3341      by extending the value in the comparison.  */
3342   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3343       && y == const0_rtx)
3344     /* Only use sign-extension if we really need it.  */
3345     return ((code == GT || code == GE || code == LE || code == LT)
3346             ? CC_SESWPmode : CC_ZESWPmode);
3347
3348   /* For everything else, return CCmode.  */
3349   return CCmode;
3350 }
3351
3352 static unsigned
3353 aarch64_get_condition_code (rtx x)
3354 {
3355   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3356   enum rtx_code comp_code = GET_CODE (x);
3357
3358   if (GET_MODE_CLASS (mode) != MODE_CC)
3359     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3360
3361   switch (mode)
3362     {
3363     case CCFPmode:
3364     case CCFPEmode:
3365       switch (comp_code)
3366         {
3367         case GE: return AARCH64_GE;
3368         case GT: return AARCH64_GT;
3369         case LE: return AARCH64_LS;
3370         case LT: return AARCH64_MI;
3371         case NE: return AARCH64_NE;
3372         case EQ: return AARCH64_EQ;
3373         case ORDERED: return AARCH64_VC;
3374         case UNORDERED: return AARCH64_VS;
3375         case UNLT: return AARCH64_LT;
3376         case UNLE: return AARCH64_LE;
3377         case UNGT: return AARCH64_HI;
3378         case UNGE: return AARCH64_PL;
3379         default: gcc_unreachable ();
3380         }
3381       break;
3382
3383     case CCmode:
3384       switch (comp_code)
3385         {
3386         case NE: return AARCH64_NE;
3387         case EQ: return AARCH64_EQ;
3388         case GE: return AARCH64_GE;
3389         case GT: return AARCH64_GT;
3390         case LE: return AARCH64_LE;
3391         case LT: return AARCH64_LT;
3392         case GEU: return AARCH64_CS;
3393         case GTU: return AARCH64_HI;
3394         case LEU: return AARCH64_LS;
3395         case LTU: return AARCH64_CC;
3396         default: gcc_unreachable ();
3397         }
3398       break;
3399
3400     case CC_SWPmode:
3401     case CC_ZESWPmode:
3402     case CC_SESWPmode:
3403       switch (comp_code)
3404         {
3405         case NE: return AARCH64_NE;
3406         case EQ: return AARCH64_EQ;
3407         case GE: return AARCH64_LE;
3408         case GT: return AARCH64_LT;
3409         case LE: return AARCH64_GE;
3410         case LT: return AARCH64_GT;
3411         case GEU: return AARCH64_LS;
3412         case GTU: return AARCH64_CC;
3413         case LEU: return AARCH64_CS;
3414         case LTU: return AARCH64_HI;
3415         default: gcc_unreachable ();
3416         }
3417       break;
3418
3419     case CC_NZmode:
3420       switch (comp_code)
3421         {
3422         case NE: return AARCH64_NE;
3423         case EQ: return AARCH64_EQ;
3424         case GE: return AARCH64_PL;
3425         case LT: return AARCH64_MI;
3426         default: gcc_unreachable ();
3427         }
3428       break;
3429
3430     default:
3431       gcc_unreachable ();
3432       break;
3433     }
3434 }
3435
3436 static unsigned
3437 bit_count (unsigned HOST_WIDE_INT value)
3438 {
3439   unsigned count = 0;
3440
3441   while (value)
3442     {
3443       count++;
3444       value &= value - 1;
3445     }
3446
3447   return count;
3448 }
3449
3450 void
3451 aarch64_print_operand (FILE *f, rtx x, char code)
3452 {
3453   switch (code)
3454     {
3455     /* An integer or symbol address without a preceding # sign.  */
3456     case 'c':
3457       switch (GET_CODE (x))
3458         {
3459         case CONST_INT:
3460           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3461           break;
3462
3463         case SYMBOL_REF:
3464           output_addr_const (f, x);
3465           break;
3466
3467         case CONST:
3468           if (GET_CODE (XEXP (x, 0)) == PLUS
3469               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3470             {
3471               output_addr_const (f, x);
3472               break;
3473             }
3474           /* Fall through.  */
3475
3476         default:
3477           output_operand_lossage ("Unsupported operand for code '%c'", code);
3478         }
3479       break;
3480
3481     case 'e':
3482       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3483       {
3484         int n;
3485
3486         if (GET_CODE (x) != CONST_INT
3487             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3488           {
3489             output_operand_lossage ("invalid operand for '%%%c'", code);
3490             return;
3491           }
3492
3493         switch (n)
3494           {
3495           case 3:
3496             fputc ('b', f);
3497             break;
3498           case 4:
3499             fputc ('h', f);
3500             break;
3501           case 5:
3502             fputc ('w', f);
3503             break;
3504           default:
3505             output_operand_lossage ("invalid operand for '%%%c'", code);
3506             return;
3507           }
3508       }
3509       break;
3510
3511     case 'p':
3512       {
3513         int n;
3514
3515         /* Print N such that 2^N == X.  */
3516         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3517           {
3518             output_operand_lossage ("invalid operand for '%%%c'", code);
3519             return;
3520           }
3521
3522         asm_fprintf (f, "%d", n);
3523       }
3524       break;
3525
3526     case 'P':
3527       /* Print the number of non-zero bits in X (a const_int).  */
3528       if (GET_CODE (x) != CONST_INT)
3529         {
3530           output_operand_lossage ("invalid operand for '%%%c'", code);
3531           return;
3532         }
3533
3534       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3535       break;
3536
3537     case 'H':
3538       /* Print the higher numbered register of a pair (TImode) of regs.  */
3539       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3540         {
3541           output_operand_lossage ("invalid operand for '%%%c'", code);
3542           return;
3543         }
3544
3545       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3546       break;
3547
3548     case 'm':
3549       /* Print a condition (eq, ne, etc).  */
3550
3551       /* CONST_TRUE_RTX means always -- that's the default.  */
3552       if (x == const_true_rtx)
3553         return;
3554
3555       if (!COMPARISON_P (x))
3556         {
3557           output_operand_lossage ("invalid operand for '%%%c'", code);
3558           return;
3559         }
3560
3561       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3562       break;
3563
3564     case 'M':
3565       /* Print the inverse of a condition (eq <-> ne, etc).  */
3566
3567       /* CONST_TRUE_RTX means never -- that's the default.  */
3568       if (x == const_true_rtx)
3569         {
3570           fputs ("nv", f);
3571           return;
3572         }
3573
3574       if (!COMPARISON_P (x))
3575         {
3576           output_operand_lossage ("invalid operand for '%%%c'", code);
3577           return;
3578         }
3579
3580       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3581                                   (aarch64_get_condition_code (x))], f);
3582       break;
3583
3584     case 'b':
3585     case 'h':
3586     case 's':
3587     case 'd':
3588     case 'q':
3589       /* Print a scalar FP/SIMD register name.  */
3590       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3591         {
3592           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3593           return;
3594         }
3595       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3596       break;
3597
3598     case 'S':
3599     case 'T':
3600     case 'U':
3601     case 'V':
3602       /* Print the first FP/SIMD register name in a list.  */
3603       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3604         {
3605           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3606           return;
3607         }
3608       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3609       break;
3610
3611     case 'X':
3612       /* Print bottom 16 bits of integer constant in hex.  */
3613       if (GET_CODE (x) != CONST_INT)
3614         {
3615           output_operand_lossage ("invalid operand for '%%%c'", code);
3616           return;
3617         }
3618       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3619       break;
3620
3621     case 'w':
3622     case 'x':
3623       /* Print a general register name or the zero register (32-bit or
3624          64-bit).  */
3625       if (x == const0_rtx
3626           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3627         {
3628           asm_fprintf (f, "%czr", code);
3629           break;
3630         }
3631
3632       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3633         {
3634           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3635           break;
3636         }
3637
3638       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3639         {
3640           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3641           break;
3642         }
3643
3644       /* Fall through */
3645
3646     case 0:
3647       /* Print a normal operand, if it's a general register, then we
3648          assume DImode.  */
3649       if (x == NULL)
3650         {
3651           output_operand_lossage ("missing operand");
3652           return;
3653         }
3654
3655       switch (GET_CODE (x))
3656         {
3657         case REG:
3658           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3659           break;
3660
3661         case MEM:
3662           aarch64_memory_reference_mode = GET_MODE (x);
3663           output_address (XEXP (x, 0));
3664           break;
3665
3666         case LABEL_REF:
3667         case SYMBOL_REF:
3668           output_addr_const (asm_out_file, x);
3669           break;
3670
3671         case CONST_INT:
3672           asm_fprintf (f, "%wd", INTVAL (x));
3673           break;
3674
3675         case CONST_VECTOR:
3676           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3677             {
3678               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3679                                                             HOST_WIDE_INT_MIN,
3680                                                             HOST_WIDE_INT_MAX));
3681               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3682             }
3683           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3684             {
3685               fputc ('0', f);
3686             }
3687           else
3688             gcc_unreachable ();
3689           break;
3690
3691         case CONST_DOUBLE:
3692           /* CONST_DOUBLE can represent a double-width integer.
3693              In this case, the mode of x is VOIDmode.  */
3694           if (GET_MODE (x) == VOIDmode)
3695             ; /* Do Nothing.  */
3696           else if (aarch64_float_const_zero_rtx_p (x))
3697             {
3698               fputc ('0', f);
3699               break;
3700             }
3701           else if (aarch64_float_const_representable_p (x))
3702             {
3703 #define buf_size 20
3704               char float_buf[buf_size] = {'\0'};
3705               REAL_VALUE_TYPE r;
3706               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3707               real_to_decimal_for_mode (float_buf, &r,
3708                                         buf_size, buf_size,
3709                                         1, GET_MODE (x));
3710               asm_fprintf (asm_out_file, "%s", float_buf);
3711               break;
3712 #undef buf_size
3713             }
3714           output_operand_lossage ("invalid constant");
3715           return;
3716         default:
3717           output_operand_lossage ("invalid operand");
3718           return;
3719         }
3720       break;
3721
3722     case 'A':
3723       if (GET_CODE (x) == HIGH)
3724         x = XEXP (x, 0);
3725
3726       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3727         {
3728         case SYMBOL_SMALL_GOT:
3729           asm_fprintf (asm_out_file, ":got:");
3730           break;
3731
3732         case SYMBOL_SMALL_TLSGD:
3733           asm_fprintf (asm_out_file, ":tlsgd:");
3734           break;
3735
3736         case SYMBOL_SMALL_TLSDESC:
3737           asm_fprintf (asm_out_file, ":tlsdesc:");
3738           break;
3739
3740         case SYMBOL_SMALL_GOTTPREL:
3741           asm_fprintf (asm_out_file, ":gottprel:");
3742           break;
3743
3744         case SYMBOL_SMALL_TPREL:
3745           asm_fprintf (asm_out_file, ":tprel:");
3746           break;
3747
3748         case SYMBOL_TINY_GOT:
3749           gcc_unreachable ();
3750           break;
3751
3752         default:
3753           break;
3754         }
3755       output_addr_const (asm_out_file, x);
3756       break;
3757
3758     case 'L':
3759       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3760         {
3761         case SYMBOL_SMALL_GOT:
3762           asm_fprintf (asm_out_file, ":lo12:");
3763           break;
3764
3765         case SYMBOL_SMALL_TLSGD:
3766           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3767           break;
3768
3769         case SYMBOL_SMALL_TLSDESC:
3770           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3771           break;
3772
3773         case SYMBOL_SMALL_GOTTPREL:
3774           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3775           break;
3776
3777         case SYMBOL_SMALL_TPREL:
3778           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3779           break;
3780
3781         case SYMBOL_TINY_GOT:
3782           asm_fprintf (asm_out_file, ":got:");
3783           break;
3784
3785         default:
3786           break;
3787         }
3788       output_addr_const (asm_out_file, x);
3789       break;
3790
3791     case 'G':
3792
3793       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3794         {
3795         case SYMBOL_SMALL_TPREL:
3796           asm_fprintf (asm_out_file, ":tprel_hi12:");
3797           break;
3798         default:
3799           break;
3800         }
3801       output_addr_const (asm_out_file, x);
3802       break;
3803
3804     default:
3805       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3806       return;
3807     }
3808 }
3809
3810 void
3811 aarch64_print_operand_address (FILE *f, rtx x)
3812 {
3813   struct aarch64_address_info addr;
3814
3815   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3816                              MEM, true))
3817     switch (addr.type)
3818       {
3819       case ADDRESS_REG_IMM:
3820         if (addr.offset == const0_rtx)
3821           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3822         else
3823           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3824                        INTVAL (addr.offset));
3825         return;
3826
3827       case ADDRESS_REG_REG:
3828         if (addr.shift == 0)
3829           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3830                        reg_names [REGNO (addr.offset)]);
3831         else
3832           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3833                        reg_names [REGNO (addr.offset)], addr.shift);
3834         return;
3835
3836       case ADDRESS_REG_UXTW:
3837         if (addr.shift == 0)
3838           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3839                        REGNO (addr.offset) - R0_REGNUM);
3840         else
3841           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3842                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3843         return;
3844
3845       case ADDRESS_REG_SXTW:
3846         if (addr.shift == 0)
3847           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3848                        REGNO (addr.offset) - R0_REGNUM);
3849         else
3850           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3851                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3852         return;
3853
3854       case ADDRESS_REG_WB:
3855         switch (GET_CODE (x))
3856           {
3857           case PRE_INC:
3858             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3859                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3860             return;
3861           case POST_INC:
3862             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3863                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3864             return;
3865           case PRE_DEC:
3866             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3867                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3868             return;
3869           case POST_DEC:
3870             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3871                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3872             return;
3873           case PRE_MODIFY:
3874             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3875                          INTVAL (addr.offset));
3876             return;
3877           case POST_MODIFY:
3878             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3879                          INTVAL (addr.offset));
3880             return;
3881           default:
3882             break;
3883           }
3884         break;
3885
3886       case ADDRESS_LO_SUM:
3887         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3888         output_addr_const (f, addr.offset);
3889         asm_fprintf (f, "]");
3890         return;
3891
3892       case ADDRESS_SYMBOLIC:
3893         break;
3894       }
3895
3896   output_addr_const (f, x);
3897 }
3898
3899 bool
3900 aarch64_label_mentioned_p (rtx x)
3901 {
3902   const char *fmt;
3903   int i;
3904
3905   if (GET_CODE (x) == LABEL_REF)
3906     return true;
3907
3908   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3909      referencing instruction, but they are constant offsets, not
3910      symbols.  */
3911   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3912     return false;
3913
3914   fmt = GET_RTX_FORMAT (GET_CODE (x));
3915   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3916     {
3917       if (fmt[i] == 'E')
3918         {
3919           int j;
3920
3921           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3922             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3923               return 1;
3924         }
3925       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3926         return 1;
3927     }
3928
3929   return 0;
3930 }
3931
3932 /* Implement REGNO_REG_CLASS.  */
3933
3934 enum reg_class
3935 aarch64_regno_regclass (unsigned regno)
3936 {
3937   if (GP_REGNUM_P (regno))
3938     return CORE_REGS;
3939
3940   if (regno == SP_REGNUM)
3941     return STACK_REG;
3942
3943   if (regno == FRAME_POINTER_REGNUM
3944       || regno == ARG_POINTER_REGNUM)
3945     return POINTER_REGS;
3946
3947   if (FP_REGNUM_P (regno))
3948     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3949
3950   return NO_REGS;
3951 }
3952
3953 /* Try a machine-dependent way of reloading an illegitimate address
3954    operand.  If we find one, push the reload and return the new rtx.  */
3955
3956 rtx
3957 aarch64_legitimize_reload_address (rtx *x_p,
3958                                    enum machine_mode mode,
3959                                    int opnum, int type,
3960                                    int ind_levels ATTRIBUTE_UNUSED)
3961 {
3962   rtx x = *x_p;
3963
3964   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3965   if (aarch64_vector_mode_p (mode)
3966       && GET_CODE (x) == PLUS
3967       && REG_P (XEXP (x, 0))
3968       && CONST_INT_P (XEXP (x, 1)))
3969     {
3970       rtx orig_rtx = x;
3971       x = copy_rtx (x);
3972       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3973                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3974                    opnum, (enum reload_type) type);
3975       return x;
3976     }
3977
3978   /* We must recognize output that we have already generated ourselves.  */
3979   if (GET_CODE (x) == PLUS
3980       && GET_CODE (XEXP (x, 0)) == PLUS
3981       && REG_P (XEXP (XEXP (x, 0), 0))
3982       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3983       && CONST_INT_P (XEXP (x, 1)))
3984     {
3985       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3986                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3987                    opnum, (enum reload_type) type);
3988       return x;
3989     }
3990
3991   /* We wish to handle large displacements off a base register by splitting
3992      the addend across an add and the mem insn.  This can cut the number of
3993      extra insns needed from 3 to 1.  It is only useful for load/store of a
3994      single register with 12 bit offset field.  */
3995   if (GET_CODE (x) == PLUS
3996       && REG_P (XEXP (x, 0))
3997       && CONST_INT_P (XEXP (x, 1))
3998       && HARD_REGISTER_P (XEXP (x, 0))
3999       && mode != TImode
4000       && mode != TFmode
4001       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4002     {
4003       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4004       HOST_WIDE_INT low = val & 0xfff;
4005       HOST_WIDE_INT high = val - low;
4006       HOST_WIDE_INT offs;
4007       rtx cst;
4008       enum machine_mode xmode = GET_MODE (x);
4009
4010       /* In ILP32, xmode can be either DImode or SImode.  */
4011       gcc_assert (xmode == DImode || xmode == SImode);
4012
4013       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4014          BLKmode alignment.  */
4015       if (GET_MODE_SIZE (mode) == 0)
4016         return NULL_RTX;
4017
4018       offs = low % GET_MODE_SIZE (mode);
4019
4020       /* Align misaligned offset by adjusting high part to compensate.  */
4021       if (offs != 0)
4022         {
4023           if (aarch64_uimm12_shift (high + offs))
4024             {
4025               /* Align down.  */
4026               low = low - offs;
4027               high = high + offs;
4028             }
4029           else
4030             {
4031               /* Align up.  */
4032               offs = GET_MODE_SIZE (mode) - offs;
4033               low = low + offs;
4034               high = high + (low & 0x1000) - offs;
4035               low &= 0xfff;
4036             }
4037         }
4038
4039       /* Check for overflow.  */
4040       if (high + low != val)
4041         return NULL_RTX;
4042
4043       cst = GEN_INT (high);
4044       if (!aarch64_uimm12_shift (high))
4045         cst = force_const_mem (xmode, cst);
4046
4047       /* Reload high part into base reg, leaving the low part
4048          in the mem instruction.
4049          Note that replacing this gen_rtx_PLUS with plus_constant is
4050          wrong in this case because we rely on the
4051          (plus (plus reg c1) c2) structure being preserved so that
4052          XEXP (*p, 0) in push_reload below uses the correct term.  */
4053       x = gen_rtx_PLUS (xmode,
4054                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4055                         GEN_INT (low));
4056
4057       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4058                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4059                    opnum, (enum reload_type) type);
4060       return x;
4061     }
4062
4063   return NULL_RTX;
4064 }
4065
4066
4067 static reg_class_t
4068 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4069                           reg_class_t rclass,
4070                           enum machine_mode mode,
4071                           secondary_reload_info *sri)
4072 {
4073   /* Without the TARGET_SIMD instructions we cannot move a Q register
4074      to a Q register directly.  We need a scratch.  */
4075   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4076       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4077       && reg_class_subset_p (rclass, FP_REGS))
4078     {
4079       if (mode == TFmode)
4080         sri->icode = CODE_FOR_aarch64_reload_movtf;
4081       else if (mode == TImode)
4082         sri->icode = CODE_FOR_aarch64_reload_movti;
4083       return NO_REGS;
4084     }
4085
4086   /* A TFmode or TImode memory access should be handled via an FP_REGS
4087      because AArch64 has richer addressing modes for LDR/STR instructions
4088      than LDP/STP instructions.  */
4089   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4090       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4091     return FP_REGS;
4092
4093   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4094       return CORE_REGS;
4095
4096   return NO_REGS;
4097 }
4098
4099 static bool
4100 aarch64_can_eliminate (const int from, const int to)
4101 {
4102   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4103      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4104
4105   if (frame_pointer_needed)
4106     {
4107       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4108         return true;
4109       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4110         return false;
4111       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4112           && !cfun->calls_alloca)
4113         return true;
4114       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4115         return true;
4116     return false;
4117     }
4118   else
4119     {
4120       /* If we decided that we didn't need a leaf frame pointer but then used
4121          LR in the function, then we'll want a frame pointer after all, so
4122          prevent this elimination to ensure a frame pointer is used.
4123
4124          NOTE: the original value of flag_omit_frame_pointer gets trashed
4125          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4126          of faked_omit_frame_pointer here (which is true when we always
4127          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4128          pointers when LR is clobbered).  */
4129       if (to == STACK_POINTER_REGNUM
4130           && df_regs_ever_live_p (LR_REGNUM)
4131           && faked_omit_frame_pointer)
4132         return false;
4133     }
4134
4135   return true;
4136 }
4137
4138 HOST_WIDE_INT
4139 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4140 {
4141   HOST_WIDE_INT frame_size;
4142   HOST_WIDE_INT offset;
4143
4144   aarch64_layout_frame ();
4145   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4146                 + crtl->outgoing_args_size
4147                 + cfun->machine->saved_varargs_size);
4148
4149    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4150    offset = frame_size;
4151
4152    if (to == HARD_FRAME_POINTER_REGNUM)
4153      {
4154        if (from == ARG_POINTER_REGNUM)
4155          return offset - crtl->outgoing_args_size;
4156
4157        if (from == FRAME_POINTER_REGNUM)
4158          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4159      }
4160
4161    if (to == STACK_POINTER_REGNUM)
4162      {
4163        if (from == FRAME_POINTER_REGNUM)
4164          {
4165            HOST_WIDE_INT elim = crtl->outgoing_args_size
4166                               + cfun->machine->frame.saved_regs_size
4167                               + get_frame_size ()
4168                               - cfun->machine->frame.fp_lr_offset;
4169            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4170            return elim;
4171          }
4172      }
4173
4174    return offset;
4175 }
4176
4177
4178 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4179    previous frame.  */
4180
4181 rtx
4182 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4183 {
4184   if (count != 0)
4185     return const0_rtx;
4186   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4187 }
4188
4189
4190 static void
4191 aarch64_asm_trampoline_template (FILE *f)
4192 {
4193   if (TARGET_ILP32)
4194     {
4195       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4196       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4197     }
4198   else
4199     {
4200       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4201       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4202     }
4203   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4204   assemble_aligned_integer (4, const0_rtx);
4205   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4206   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4207 }
4208
4209 static void
4210 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4211 {
4212   rtx fnaddr, mem, a_tramp;
4213   const int tramp_code_sz = 16;
4214
4215   /* Don't need to copy the trailing D-words, we fill those in below.  */
4216   emit_block_move (m_tramp, assemble_trampoline_template (),
4217                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4218   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4219   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4220   if (GET_MODE (fnaddr) != ptr_mode)
4221     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4222   emit_move_insn (mem, fnaddr);
4223
4224   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4225   emit_move_insn (mem, chain_value);
4226
4227   /* XXX We should really define a "clear_cache" pattern and use
4228      gen_clear_cache().  */
4229   a_tramp = XEXP (m_tramp, 0);
4230   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4231                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4232                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4233                      ptr_mode);
4234 }
4235
4236 static unsigned char
4237 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4238 {
4239   switch (regclass)
4240     {
4241     case CORE_REGS:
4242     case POINTER_REGS:
4243     case GENERAL_REGS:
4244     case ALL_REGS:
4245     case FP_REGS:
4246     case FP_LO_REGS:
4247       return
4248         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4249                                        (GET_MODE_SIZE (mode) + 7) / 8;
4250     case STACK_REG:
4251       return 1;
4252
4253     case NO_REGS:
4254       return 0;
4255
4256     default:
4257       break;
4258     }
4259   gcc_unreachable ();
4260 }
4261
4262 static reg_class_t
4263 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4264 {
4265   if (regclass == POINTER_REGS)
4266     return GENERAL_REGS;
4267
4268   if (regclass == STACK_REG)
4269     {
4270       if (REG_P(x)
4271           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4272           return regclass;
4273
4274       return NO_REGS;
4275     }
4276
4277   /* If it's an integer immediate that MOVI can't handle, then
4278      FP_REGS is not an option, so we return NO_REGS instead.  */
4279   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4280       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4281     return NO_REGS;
4282
4283   /* Register eliminiation can result in a request for
4284      SP+constant->FP_REGS.  We cannot support such operations which
4285      use SP as source and an FP_REG as destination, so reject out
4286      right now.  */
4287   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4288     {
4289       rtx lhs = XEXP (x, 0);
4290
4291       /* Look through a possible SUBREG introduced by ILP32.  */
4292       if (GET_CODE (lhs) == SUBREG)
4293         lhs = SUBREG_REG (lhs);
4294
4295       gcc_assert (REG_P (lhs));
4296       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4297                                       POINTER_REGS));
4298       return NO_REGS;
4299     }
4300
4301   return regclass;
4302 }
4303
4304 void
4305 aarch64_asm_output_labelref (FILE* f, const char *name)
4306 {
4307   asm_fprintf (f, "%U%s", name);
4308 }
4309
4310 static void
4311 aarch64_elf_asm_constructor (rtx symbol, int priority)
4312 {
4313   if (priority == DEFAULT_INIT_PRIORITY)
4314     default_ctor_section_asm_out_constructor (symbol, priority);
4315   else
4316     {
4317       section *s;
4318       char buf[18];
4319       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4320       s = get_section (buf, SECTION_WRITE, NULL);
4321       switch_to_section (s);
4322       assemble_align (POINTER_SIZE);
4323       assemble_aligned_integer (POINTER_BYTES, symbol);
4324     }
4325 }
4326
4327 static void
4328 aarch64_elf_asm_destructor (rtx symbol, int priority)
4329 {
4330   if (priority == DEFAULT_INIT_PRIORITY)
4331     default_dtor_section_asm_out_destructor (symbol, priority);
4332   else
4333     {
4334       section *s;
4335       char buf[18];
4336       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4337       s = get_section (buf, SECTION_WRITE, NULL);
4338       switch_to_section (s);
4339       assemble_align (POINTER_SIZE);
4340       assemble_aligned_integer (POINTER_BYTES, symbol);
4341     }
4342 }
4343
4344 const char*
4345 aarch64_output_casesi (rtx *operands)
4346 {
4347   char buf[100];
4348   char label[100];
4349   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4350   int index;
4351   static const char *const patterns[4][2] =
4352   {
4353     {
4354       "ldrb\t%w3, [%0,%w1,uxtw]",
4355       "add\t%3, %4, %w3, sxtb #2"
4356     },
4357     {
4358       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4359       "add\t%3, %4, %w3, sxth #2"
4360     },
4361     {
4362       "ldr\t%w3, [%0,%w1,uxtw #2]",
4363       "add\t%3, %4, %w3, sxtw #2"
4364     },
4365     /* We assume that DImode is only generated when not optimizing and
4366        that we don't really need 64-bit address offsets.  That would
4367        imply an object file with 8GB of code in a single function!  */
4368     {
4369       "ldr\t%w3, [%0,%w1,uxtw #2]",
4370       "add\t%3, %4, %w3, sxtw #2"
4371     }
4372   };
4373
4374   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4375
4376   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4377
4378   gcc_assert (index >= 0 && index <= 3);
4379
4380   /* Need to implement table size reduction, by chaning the code below.  */
4381   output_asm_insn (patterns[index][0], operands);
4382   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4383   snprintf (buf, sizeof (buf),
4384             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4385   output_asm_insn (buf, operands);
4386   output_asm_insn (patterns[index][1], operands);
4387   output_asm_insn ("br\t%3", operands);
4388   assemble_label (asm_out_file, label);
4389   return "";
4390 }
4391
4392
4393 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4394    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4395    operator.  */
4396
4397 int
4398 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4399 {
4400   if (shift >= 0 && shift <= 3)
4401     {
4402       int size;
4403       for (size = 8; size <= 32; size *= 2)
4404         {
4405           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4406           if (mask == bits << shift)
4407             return size;
4408         }
4409     }
4410   return 0;
4411 }
4412
4413 static bool
4414 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4415                                    const_rtx x ATTRIBUTE_UNUSED)
4416 {
4417   /* We can't use blocks for constants when we're using a per-function
4418      constant pool.  */
4419   return false;
4420 }
4421
4422 static section *
4423 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4424                             rtx x ATTRIBUTE_UNUSED,
4425                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4426 {
4427   /* Force all constant pool entries into the current function section.  */
4428   return function_section (current_function_decl);
4429 }
4430
4431
4432 /* Costs.  */
4433
4434 /* Helper function for rtx cost calculation.  Strip a shift expression
4435    from X.  Returns the inner operand if successful, or the original
4436    expression on failure.  */
4437 static rtx
4438 aarch64_strip_shift (rtx x)
4439 {
4440   rtx op = x;
4441
4442   if ((GET_CODE (op) == ASHIFT
4443        || GET_CODE (op) == ASHIFTRT
4444        || GET_CODE (op) == LSHIFTRT)
4445       && CONST_INT_P (XEXP (op, 1)))
4446     return XEXP (op, 0);
4447
4448   if (GET_CODE (op) == MULT
4449       && CONST_INT_P (XEXP (op, 1))
4450       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4451     return XEXP (op, 0);
4452
4453   return x;
4454 }
4455
4456 /* Helper function for rtx cost calculation.  Strip a shift or extend
4457    expression from X.  Returns the inner operand if successful, or the
4458    original expression on failure.  We deal with a number of possible
4459    canonicalization variations here.  */
4460 static rtx
4461 aarch64_strip_shift_or_extend (rtx x)
4462 {
4463   rtx op = x;
4464
4465   /* Zero and sign extraction of a widened value.  */
4466   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4467       && XEXP (op, 2) == const0_rtx
4468       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4469                                          XEXP (op, 1)))
4470     return XEXP (XEXP (op, 0), 0);
4471
4472   /* It can also be represented (for zero-extend) as an AND with an
4473      immediate.  */
4474   if (GET_CODE (op) == AND
4475       && GET_CODE (XEXP (op, 0)) == MULT
4476       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4477       && CONST_INT_P (XEXP (op, 1))
4478       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4479                            INTVAL (XEXP (op, 1))) != 0)
4480     return XEXP (XEXP (op, 0), 0);
4481
4482   /* Now handle extended register, as this may also have an optional
4483      left shift by 1..4.  */
4484   if (GET_CODE (op) == ASHIFT
4485       && CONST_INT_P (XEXP (op, 1))
4486       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4487     op = XEXP (op, 0);
4488
4489   if (GET_CODE (op) == ZERO_EXTEND
4490       || GET_CODE (op) == SIGN_EXTEND)
4491     op = XEXP (op, 0);
4492
4493   if (op != x)
4494     return op;
4495
4496   return aarch64_strip_shift (x);
4497 }
4498
4499 /* Calculate the cost of calculating X, storing it in *COST.  Result
4500    is true if the total cost of the operation has now been calculated.  */
4501 static bool
4502 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4503                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4504 {
4505   rtx op0, op1;
4506   const struct cpu_cost_table *extra_cost
4507     = aarch64_tune_params->insn_extra_cost;
4508
4509   switch (code)
4510     {
4511     case SET:
4512       op0 = SET_DEST (x);
4513       op1 = SET_SRC (x);
4514
4515       switch (GET_CODE (op0))
4516         {
4517         case MEM:
4518           if (speed)
4519             *cost += extra_cost->ldst.store;
4520
4521           if (op1 != const0_rtx)
4522             *cost += rtx_cost (op1, SET, 1, speed);
4523           return true;
4524
4525         case SUBREG:
4526           if (! REG_P (SUBREG_REG (op0)))
4527             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4528           /* Fall through.  */
4529         case REG:
4530           /* Cost is just the cost of the RHS of the set.  */
4531           *cost += rtx_cost (op1, SET, 1, true);
4532           return true;
4533
4534         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4535         case SIGN_EXTRACT:
4536           /* Strip any redundant widening of the RHS to meet the width of
4537              the target.  */
4538           if (GET_CODE (op1) == SUBREG)
4539             op1 = SUBREG_REG (op1);
4540           if ((GET_CODE (op1) == ZERO_EXTEND
4541                || GET_CODE (op1) == SIGN_EXTEND)
4542               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4543               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4544                   >= INTVAL (XEXP (op0, 1))))
4545             op1 = XEXP (op1, 0);
4546           *cost += rtx_cost (op1, SET, 1, speed);
4547           return true;
4548
4549         default:
4550           break;
4551         }
4552       return false;
4553
4554     case MEM:
4555       if (speed)
4556         *cost += extra_cost->ldst.load;
4557
4558       return true;
4559
4560     case NEG:
4561       op0 = CONST0_RTX (GET_MODE (x));
4562       op1 = XEXP (x, 0);
4563       goto cost_minus;
4564
4565     case COMPARE:
4566       op0 = XEXP (x, 0);
4567       op1 = XEXP (x, 1);
4568
4569       if (op1 == const0_rtx
4570           && GET_CODE (op0) == AND)
4571         {
4572           x = op0;
4573           goto cost_logic;
4574         }
4575
4576       /* Comparisons can work if the order is swapped.
4577          Canonicalization puts the more complex operation first, but
4578          we want it in op1.  */
4579       if (! (REG_P (op0)
4580              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4581         {
4582           op0 = XEXP (x, 1);
4583           op1 = XEXP (x, 0);
4584         }
4585       goto cost_minus;
4586
4587     case MINUS:
4588       op0 = XEXP (x, 0);
4589       op1 = XEXP (x, 1);
4590
4591     cost_minus:
4592       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4593           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4594               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4595         {
4596           if (op0 != const0_rtx)
4597             *cost += rtx_cost (op0, MINUS, 0, speed);
4598
4599           if (CONST_INT_P (op1))
4600             {
4601               if (!aarch64_uimm12_shift (INTVAL (op1)))
4602                 *cost += rtx_cost (op1, MINUS, 1, speed);
4603             }
4604           else
4605             {
4606               op1 = aarch64_strip_shift_or_extend (op1);
4607               *cost += rtx_cost (op1, MINUS, 1, speed);
4608             }
4609           return true;
4610         }
4611
4612       return false;
4613
4614     case PLUS:
4615       op0 = XEXP (x, 0);
4616       op1 = XEXP (x, 1);
4617
4618       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4619         {
4620           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4621             {
4622               *cost += rtx_cost (op0, PLUS, 0, speed);
4623             }
4624           else
4625             {
4626               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4627
4628               if (new_op0 == op0
4629                   && GET_CODE (op0) == MULT)
4630                 {
4631                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4632                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4633                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4634                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4635                     {
4636                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4637                                           speed)
4638                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4639                                             speed)
4640                                 + rtx_cost (op1, PLUS, 1, speed));
4641                       if (speed)
4642                         *cost +=
4643                           extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4644                       return true;
4645                     }
4646                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4647                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4648                             + rtx_cost (op1, PLUS, 1, speed));
4649
4650                   if (speed)
4651                     *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4652                 }
4653
4654               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4655                         + rtx_cost (op1, PLUS, 1, speed));
4656             }
4657           return true;
4658         }
4659
4660       return false;
4661
4662     case IOR:
4663     case XOR:
4664     case AND:
4665     cost_logic:
4666       op0 = XEXP (x, 0);
4667       op1 = XEXP (x, 1);
4668
4669       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4670         {
4671           if (CONST_INT_P (op1)
4672               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4673             {
4674               *cost += rtx_cost (op0, AND, 0, speed);
4675             }
4676           else
4677             {
4678               if (GET_CODE (op0) == NOT)
4679                 op0 = XEXP (op0, 0);
4680               op0 = aarch64_strip_shift (op0);
4681               *cost += (rtx_cost (op0, AND, 0, speed)
4682                         + rtx_cost (op1, AND, 1, speed));
4683             }
4684           return true;
4685         }
4686       return false;
4687
4688     case ZERO_EXTEND:
4689       if ((GET_MODE (x) == DImode
4690            && GET_MODE (XEXP (x, 0)) == SImode)
4691           || GET_CODE (XEXP (x, 0)) == MEM)
4692         {
4693           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4694           return true;
4695         }
4696       return false;
4697
4698     case SIGN_EXTEND:
4699       if (GET_CODE (XEXP (x, 0)) == MEM)
4700         {
4701           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4702           return true;
4703         }
4704       return false;
4705
4706     case ROTATE:
4707       if (!CONST_INT_P (XEXP (x, 1)))
4708         *cost += COSTS_N_INSNS (2);
4709       /* Fall through.  */
4710     case ROTATERT:
4711     case LSHIFTRT:
4712     case ASHIFT:
4713     case ASHIFTRT:
4714
4715       /* Shifting by a register often takes an extra cycle.  */
4716       if (speed && !CONST_INT_P (XEXP (x, 1)))
4717         *cost += extra_cost->alu.arith_shift_reg;
4718
4719       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4720       return true;
4721
4722     case HIGH:
4723       if (!CONSTANT_P (XEXP (x, 0)))
4724         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4725       return true;
4726
4727     case LO_SUM:
4728       if (!CONSTANT_P (XEXP (x, 1)))
4729         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4730       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4731       return true;
4732
4733     case ZERO_EXTRACT:
4734     case SIGN_EXTRACT:
4735       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4736       return true;
4737
4738     case MULT:
4739       op0 = XEXP (x, 0);
4740       op1 = XEXP (x, 1);
4741
4742       *cost = COSTS_N_INSNS (1);
4743       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4744         {
4745           if (CONST_INT_P (op1)
4746               && exact_log2 (INTVAL (op1)) > 0)
4747             {
4748               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4749               return true;
4750             }
4751
4752           if ((GET_CODE (op0) == ZERO_EXTEND
4753                && GET_CODE (op1) == ZERO_EXTEND)
4754               || (GET_CODE (op0) == SIGN_EXTEND
4755                   && GET_CODE (op1) == SIGN_EXTEND))
4756             {
4757               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4758                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4759               if (speed)
4760                 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4761               return true;
4762             }
4763
4764           if (speed)
4765             *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4766         }
4767       else if (speed)
4768         {
4769           if (GET_MODE (x) == DFmode)
4770             *cost += extra_cost->fp[1].mult;
4771           else if (GET_MODE (x) == SFmode)
4772             *cost += extra_cost->fp[0].mult;
4773         }
4774
4775       return false;  /* All arguments need to be in registers.  */
4776
4777     case MOD:
4778     case UMOD:
4779       *cost = COSTS_N_INSNS (2);
4780       if (speed)
4781         {
4782           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4783             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4784                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4785           else if (GET_MODE (x) == DFmode)
4786             *cost += (extra_cost->fp[1].mult
4787                       + extra_cost->fp[1].div);
4788           else if (GET_MODE (x) == SFmode)
4789             *cost += (extra_cost->fp[0].mult
4790                       + extra_cost->fp[0].div);
4791         }
4792       return false;  /* All arguments need to be in registers.  */
4793
4794     case DIV:
4795     case UDIV:
4796       *cost = COSTS_N_INSNS (1);
4797       if (speed)
4798         {
4799           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4800             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4801           else if (GET_MODE (x) == DFmode)
4802             *cost += extra_cost->fp[1].div;
4803           else if (GET_MODE (x) == SFmode)
4804             *cost += extra_cost->fp[0].div;
4805         }
4806       return false;  /* All arguments need to be in registers.  */
4807
4808     default:
4809       break;
4810     }
4811   return false;
4812 }
4813
4814 static int
4815 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4816                   enum machine_mode mode ATTRIBUTE_UNUSED,
4817                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4818 {
4819   enum rtx_code c  = GET_CODE (x);
4820   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4821
4822   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4823     return addr_cost->pre_modify;
4824
4825   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4826     return addr_cost->post_modify;
4827
4828   if (c == PLUS)
4829     {
4830       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4831         return addr_cost->imm_offset;
4832       else if (GET_CODE (XEXP (x, 0)) == MULT
4833                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4834                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4835         return addr_cost->register_extend;
4836
4837       return addr_cost->register_offset;
4838     }
4839   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4840     return addr_cost->imm_offset;
4841
4842   return 0;
4843 }
4844
4845 static int
4846 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4847                             reg_class_t from, reg_class_t to)
4848 {
4849   const struct cpu_regmove_cost *regmove_cost
4850     = aarch64_tune_params->regmove_cost;
4851
4852   if (from == GENERAL_REGS && to == GENERAL_REGS)
4853     return regmove_cost->GP2GP;
4854   else if (from == GENERAL_REGS)
4855     return regmove_cost->GP2FP;
4856   else if (to == GENERAL_REGS)
4857     return regmove_cost->FP2GP;
4858
4859   /* When AdvSIMD instructions are disabled it is not possible to move
4860      a 128-bit value directly between Q registers.  This is handled in
4861      secondary reload.  A general register is used as a scratch to move
4862      the upper DI value and the lower DI value is moved directly,
4863      hence the cost is the sum of three moves. */
4864
4865   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4866     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4867
4868   return regmove_cost->FP2FP;
4869 }
4870
4871 static int
4872 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4873                           reg_class_t rclass ATTRIBUTE_UNUSED,
4874                           bool in ATTRIBUTE_UNUSED)
4875 {
4876   return aarch64_tune_params->memmov_cost;
4877 }
4878
4879 /* Vectorizer cost model target hooks.  */
4880
4881 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4882 static int
4883 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4884                                     tree vectype,
4885                                     int misalign ATTRIBUTE_UNUSED)
4886 {
4887   unsigned elements;
4888
4889   switch (type_of_cost)
4890     {
4891       case scalar_stmt:
4892         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4893
4894       case scalar_load:
4895         return aarch64_tune_params->vec_costs->scalar_load_cost;
4896
4897       case scalar_store:
4898         return aarch64_tune_params->vec_costs->scalar_store_cost;
4899
4900       case vector_stmt:
4901         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4902
4903       case vector_load:
4904         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4905
4906       case vector_store:
4907         return aarch64_tune_params->vec_costs->vec_store_cost;
4908
4909       case vec_to_scalar:
4910         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4911
4912       case scalar_to_vec:
4913         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4914
4915       case unaligned_load:
4916         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4917
4918       case unaligned_store:
4919         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4920
4921       case cond_branch_taken:
4922         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4923
4924       case cond_branch_not_taken:
4925         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4926
4927       case vec_perm:
4928       case vec_promote_demote:
4929         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4930
4931       case vec_construct:
4932         elements = TYPE_VECTOR_SUBPARTS (vectype);
4933         return elements / 2 + 1;
4934
4935       default:
4936         gcc_unreachable ();
4937     }
4938 }
4939
4940 /* Implement targetm.vectorize.add_stmt_cost.  */
4941 static unsigned
4942 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4943                        struct _stmt_vec_info *stmt_info, int misalign,
4944                        enum vect_cost_model_location where)
4945 {
4946   unsigned *cost = (unsigned *) data;
4947   unsigned retval = 0;
4948
4949   if (flag_vect_cost_model)
4950     {
4951       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4952       int stmt_cost =
4953             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4954
4955       /* Statements in an inner loop relative to the loop being
4956          vectorized are weighted more heavily.  The value here is
4957          a function (linear for now) of the loop nest level.  */
4958       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4959         {
4960           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4961           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4962           unsigned nest_level = loop_depth (loop);
4963
4964           count *= nest_level;
4965         }
4966
4967       retval = (unsigned) (count * stmt_cost);
4968       cost[where] += retval;
4969     }
4970
4971   return retval;
4972 }
4973
4974 static void initialize_aarch64_code_model (void);
4975
4976 /* Parse the architecture extension string.  */
4977
4978 static void
4979 aarch64_parse_extension (char *str)
4980 {
4981   /* The extension string is parsed left to right.  */
4982   const struct aarch64_option_extension *opt = NULL;
4983
4984   /* Flag to say whether we are adding or removing an extension.  */
4985   int adding_ext = -1;
4986
4987   while (str != NULL && *str != 0)
4988     {
4989       char *ext;
4990       size_t len;
4991
4992       str++;
4993       ext = strchr (str, '+');
4994
4995       if (ext != NULL)
4996         len = ext - str;
4997       else
4998         len = strlen (str);
4999
5000       if (len >= 2 && strncmp (str, "no", 2) == 0)
5001         {
5002           adding_ext = 0;
5003           len -= 2;
5004           str += 2;
5005         }
5006       else if (len > 0)
5007         adding_ext = 1;
5008
5009       if (len == 0)
5010         {
5011           error ("missing feature modifier after %qs", "+no");
5012           return;
5013         }
5014
5015       /* Scan over the extensions table trying to find an exact match.  */
5016       for (opt = all_extensions; opt->name != NULL; opt++)
5017         {
5018           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5019             {
5020               /* Add or remove the extension.  */
5021               if (adding_ext)
5022                 aarch64_isa_flags |= opt->flags_on;
5023               else
5024                 aarch64_isa_flags &= ~(opt->flags_off);
5025               break;
5026             }
5027         }
5028
5029       if (opt->name == NULL)
5030         {
5031           /* Extension not found in list.  */
5032           error ("unknown feature modifier %qs", str);
5033           return;
5034         }
5035
5036       str = ext;
5037     };
5038
5039   return;
5040 }
5041
5042 /* Parse the ARCH string.  */
5043
5044 static void
5045 aarch64_parse_arch (void)
5046 {
5047   char *ext;
5048   const struct processor *arch;
5049   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5050   size_t len;
5051
5052   strcpy (str, aarch64_arch_string);
5053
5054   ext = strchr (str, '+');
5055
5056   if (ext != NULL)
5057     len = ext - str;
5058   else
5059     len = strlen (str);
5060
5061   if (len == 0)
5062     {
5063       error ("missing arch name in -march=%qs", str);
5064       return;
5065     }
5066
5067   /* Loop through the list of supported ARCHs to find a match.  */
5068   for (arch = all_architectures; arch->name != NULL; arch++)
5069     {
5070       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5071         {
5072           selected_arch = arch;
5073           aarch64_isa_flags = selected_arch->flags;
5074           selected_cpu = &all_cores[selected_arch->core];
5075
5076           if (ext != NULL)
5077             {
5078               /* ARCH string contains at least one extension.  */
5079               aarch64_parse_extension (ext);
5080             }
5081
5082           return;
5083         }
5084     }
5085
5086   /* ARCH name not found in list.  */
5087   error ("unknown value %qs for -march", str);
5088   return;
5089 }
5090
5091 /* Parse the CPU string.  */
5092
5093 static void
5094 aarch64_parse_cpu (void)
5095 {
5096   char *ext;
5097   const struct processor *cpu;
5098   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5099   size_t len;
5100
5101   strcpy (str, aarch64_cpu_string);
5102
5103   ext = strchr (str, '+');
5104
5105   if (ext != NULL)
5106     len = ext - str;
5107   else
5108     len = strlen (str);
5109
5110   if (len == 0)
5111     {
5112       error ("missing cpu name in -mcpu=%qs", str);
5113       return;
5114     }
5115
5116   /* Loop through the list of supported CPUs to find a match.  */
5117   for (cpu = all_cores; cpu->name != NULL; cpu++)
5118     {
5119       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5120         {
5121           selected_cpu = cpu;
5122           aarch64_isa_flags = selected_cpu->flags;
5123
5124           if (ext != NULL)
5125             {
5126               /* CPU string contains at least one extension.  */
5127               aarch64_parse_extension (ext);
5128             }
5129
5130           return;
5131         }
5132     }
5133
5134   /* CPU name not found in list.  */
5135   error ("unknown value %qs for -mcpu", str);
5136   return;
5137 }
5138
5139 /* Parse the TUNE string.  */
5140
5141 static void
5142 aarch64_parse_tune (void)
5143 {
5144   const struct processor *cpu;
5145   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5146   strcpy (str, aarch64_tune_string);
5147
5148   /* Loop through the list of supported CPUs to find a match.  */
5149   for (cpu = all_cores; cpu->name != NULL; cpu++)
5150     {
5151       if (strcmp (cpu->name, str) == 0)
5152         {
5153           selected_tune = cpu;
5154           return;
5155         }
5156     }
5157
5158   /* CPU name not found in list.  */
5159   error ("unknown value %qs for -mtune", str);
5160   return;
5161 }
5162
5163
5164 /* Implement TARGET_OPTION_OVERRIDE.  */
5165
5166 static void
5167 aarch64_override_options (void)
5168 {
5169   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5170      otherwise march remains undefined.  mtune can be used with either march or
5171      mcpu.  */
5172
5173   if (aarch64_arch_string)
5174     {
5175       aarch64_parse_arch ();
5176       aarch64_cpu_string = NULL;
5177     }
5178
5179   if (aarch64_cpu_string)
5180     {
5181       aarch64_parse_cpu ();
5182       selected_arch = NULL;
5183     }
5184
5185   if (aarch64_tune_string)
5186     {
5187       aarch64_parse_tune ();
5188     }
5189
5190 #ifndef HAVE_AS_MABI_OPTION
5191   /* The compiler may have been configured with 2.23.* binutils, which does
5192      not have support for ILP32.  */
5193   if (TARGET_ILP32)
5194     error ("Assembler does not support -mabi=ilp32");
5195 #endif
5196
5197   initialize_aarch64_code_model ();
5198
5199   aarch64_build_bitmask_table ();
5200
5201   /* This target defaults to strict volatile bitfields.  */
5202   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5203     flag_strict_volatile_bitfields = 1;
5204
5205   /* If the user did not specify a processor, choose the default
5206      one for them.  This will be the CPU set during configuration using
5207      --with-cpu, otherwise it is "coretex-a53".  */
5208   if (!selected_cpu)
5209     {
5210       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5211       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5212     }
5213
5214   gcc_assert (selected_cpu);
5215
5216   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5217   if (!selected_tune)
5218     selected_tune = &all_cores[selected_cpu->core];
5219
5220   aarch64_tune_flags = selected_tune->flags;
5221   aarch64_tune = selected_tune->core;
5222   aarch64_tune_params = selected_tune->tune;
5223
5224   aarch64_override_options_after_change ();
5225 }
5226
5227 /* Implement targetm.override_options_after_change.  */
5228
5229 static void
5230 aarch64_override_options_after_change (void)
5231 {
5232   faked_omit_frame_pointer = false;
5233
5234   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5235      that aarch64_frame_pointer_required will be called.  We need to remember
5236      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5237
5238   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5239     {
5240       flag_omit_frame_pointer = true;
5241       faked_omit_frame_pointer = true;
5242     }
5243 }
5244
5245 static struct machine_function *
5246 aarch64_init_machine_status (void)
5247 {
5248   struct machine_function *machine;
5249   machine = ggc_alloc_cleared_machine_function ();
5250   return machine;
5251 }
5252
5253 void
5254 aarch64_init_expanders (void)
5255 {
5256   init_machine_status = aarch64_init_machine_status;
5257 }
5258
5259 /* A checking mechanism for the implementation of the various code models.  */
5260 static void
5261 initialize_aarch64_code_model (void)
5262 {
5263    if (flag_pic)
5264      {
5265        switch (aarch64_cmodel_var)
5266          {
5267          case AARCH64_CMODEL_TINY:
5268            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5269            break;
5270          case AARCH64_CMODEL_SMALL:
5271            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5272            break;
5273          case AARCH64_CMODEL_LARGE:
5274            sorry ("code model %qs with -f%s", "large",
5275                   flag_pic > 1 ? "PIC" : "pic");
5276          default:
5277            gcc_unreachable ();
5278          }
5279      }
5280    else
5281      aarch64_cmodel = aarch64_cmodel_var;
5282 }
5283
5284 /* Return true if SYMBOL_REF X binds locally.  */
5285
5286 static bool
5287 aarch64_symbol_binds_local_p (const_rtx x)
5288 {
5289   return (SYMBOL_REF_DECL (x)
5290           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5291           : SYMBOL_REF_LOCAL_P (x));
5292 }
5293
5294 /* Return true if SYMBOL_REF X is thread local */
5295 static bool
5296 aarch64_tls_symbol_p (rtx x)
5297 {
5298   if (! TARGET_HAVE_TLS)
5299     return false;
5300
5301   if (GET_CODE (x) != SYMBOL_REF)
5302     return false;
5303
5304   return SYMBOL_REF_TLS_MODEL (x) != 0;
5305 }
5306
5307 /* Classify a TLS symbol into one of the TLS kinds.  */
5308 enum aarch64_symbol_type
5309 aarch64_classify_tls_symbol (rtx x)
5310 {
5311   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5312
5313   switch (tls_kind)
5314     {
5315     case TLS_MODEL_GLOBAL_DYNAMIC:
5316     case TLS_MODEL_LOCAL_DYNAMIC:
5317       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5318
5319     case TLS_MODEL_INITIAL_EXEC:
5320       return SYMBOL_SMALL_GOTTPREL;
5321
5322     case TLS_MODEL_LOCAL_EXEC:
5323       return SYMBOL_SMALL_TPREL;
5324
5325     case TLS_MODEL_EMULATED:
5326     case TLS_MODEL_NONE:
5327       return SYMBOL_FORCE_TO_MEM;
5328
5329     default:
5330       gcc_unreachable ();
5331     }
5332 }
5333
5334 /* Return the method that should be used to access SYMBOL_REF or
5335    LABEL_REF X in context CONTEXT.  */
5336
5337 enum aarch64_symbol_type
5338 aarch64_classify_symbol (rtx x,
5339                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5340 {
5341   if (GET_CODE (x) == LABEL_REF)
5342     {
5343       switch (aarch64_cmodel)
5344         {
5345         case AARCH64_CMODEL_LARGE:
5346           return SYMBOL_FORCE_TO_MEM;
5347
5348         case AARCH64_CMODEL_TINY_PIC:
5349         case AARCH64_CMODEL_TINY:
5350           return SYMBOL_TINY_ABSOLUTE;
5351
5352         case AARCH64_CMODEL_SMALL_PIC:
5353         case AARCH64_CMODEL_SMALL:
5354           return SYMBOL_SMALL_ABSOLUTE;
5355
5356         default:
5357           gcc_unreachable ();
5358         }
5359     }
5360
5361   if (GET_CODE (x) == SYMBOL_REF)
5362     {
5363       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5364           || CONSTANT_POOL_ADDRESS_P (x))
5365         return SYMBOL_FORCE_TO_MEM;
5366
5367       if (aarch64_tls_symbol_p (x))
5368         return aarch64_classify_tls_symbol (x);
5369
5370       switch (aarch64_cmodel)
5371         {
5372         case AARCH64_CMODEL_TINY:
5373           if (SYMBOL_REF_WEAK (x))
5374             return SYMBOL_FORCE_TO_MEM;
5375           return SYMBOL_TINY_ABSOLUTE;
5376
5377         case AARCH64_CMODEL_SMALL:
5378           if (SYMBOL_REF_WEAK (x))
5379             return SYMBOL_FORCE_TO_MEM;
5380           return SYMBOL_SMALL_ABSOLUTE;
5381
5382         case AARCH64_CMODEL_TINY_PIC:
5383           if (!aarch64_symbol_binds_local_p (x))
5384             return SYMBOL_TINY_GOT;
5385           return SYMBOL_TINY_ABSOLUTE;
5386
5387         case AARCH64_CMODEL_SMALL_PIC:
5388           if (!aarch64_symbol_binds_local_p (x))
5389             return SYMBOL_SMALL_GOT;
5390           return SYMBOL_SMALL_ABSOLUTE;
5391
5392         default:
5393           gcc_unreachable ();
5394         }
5395     }
5396
5397   /* By default push everything into the constant pool.  */
5398   return SYMBOL_FORCE_TO_MEM;
5399 }
5400
5401 bool
5402 aarch64_constant_address_p (rtx x)
5403 {
5404   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5405 }
5406
5407 bool
5408 aarch64_legitimate_pic_operand_p (rtx x)
5409 {
5410   if (GET_CODE (x) == SYMBOL_REF
5411       || (GET_CODE (x) == CONST
5412           && GET_CODE (XEXP (x, 0)) == PLUS
5413           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5414      return false;
5415
5416   return true;
5417 }
5418
5419 /* Return true if X holds either a quarter-precision or
5420      floating-point +0.0 constant.  */
5421 static bool
5422 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5423 {
5424   if (!CONST_DOUBLE_P (x))
5425     return false;
5426
5427   /* TODO: We could handle moving 0.0 to a TFmode register,
5428      but first we would like to refactor the movtf_aarch64
5429      to be more amicable to split moves properly and
5430      correctly gate on TARGET_SIMD.  For now - reject all
5431      constants which are not to SFmode or DFmode registers.  */
5432   if (!(mode == SFmode || mode == DFmode))
5433     return false;
5434
5435   if (aarch64_float_const_zero_rtx_p (x))
5436     return true;
5437   return aarch64_float_const_representable_p (x);
5438 }
5439
5440 static bool
5441 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5442 {
5443   /* Do not allow vector struct mode constants.  We could support
5444      0 and -1 easily, but they need support in aarch64-simd.md.  */
5445   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5446     return false;
5447
5448   /* This could probably go away because
5449      we now decompose CONST_INTs according to expand_mov_immediate.  */
5450   if ((GET_CODE (x) == CONST_VECTOR
5451        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5452       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5453         return !targetm.cannot_force_const_mem (mode, x);
5454
5455   if (GET_CODE (x) == HIGH
5456       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5457     return true;
5458
5459   return aarch64_constant_address_p (x);
5460 }
5461
5462 rtx
5463 aarch64_load_tp (rtx target)
5464 {
5465   if (!target
5466       || GET_MODE (target) != Pmode
5467       || !register_operand (target, Pmode))
5468     target = gen_reg_rtx (Pmode);
5469
5470   /* Can return in any reg.  */
5471   emit_insn (gen_aarch64_load_tp_hard (target));
5472   return target;
5473 }
5474
5475 /* On AAPCS systems, this is the "struct __va_list".  */
5476 static GTY(()) tree va_list_type;
5477
5478 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5479    Return the type to use as __builtin_va_list.
5480
5481    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5482
5483    struct __va_list
5484    {
5485      void *__stack;
5486      void *__gr_top;
5487      void *__vr_top;
5488      int   __gr_offs;
5489      int   __vr_offs;
5490    };  */
5491
5492 static tree
5493 aarch64_build_builtin_va_list (void)
5494 {
5495   tree va_list_name;
5496   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5497
5498   /* Create the type.  */
5499   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5500   /* Give it the required name.  */
5501   va_list_name = build_decl (BUILTINS_LOCATION,
5502                              TYPE_DECL,
5503                              get_identifier ("__va_list"),
5504                              va_list_type);
5505   DECL_ARTIFICIAL (va_list_name) = 1;
5506   TYPE_NAME (va_list_type) = va_list_name;
5507   TYPE_STUB_DECL (va_list_type) = va_list_name;
5508
5509   /* Create the fields.  */
5510   f_stack = build_decl (BUILTINS_LOCATION,
5511                         FIELD_DECL, get_identifier ("__stack"),
5512                         ptr_type_node);
5513   f_grtop = build_decl (BUILTINS_LOCATION,
5514                         FIELD_DECL, get_identifier ("__gr_top"),
5515                         ptr_type_node);
5516   f_vrtop = build_decl (BUILTINS_LOCATION,
5517                         FIELD_DECL, get_identifier ("__vr_top"),
5518                         ptr_type_node);
5519   f_groff = build_decl (BUILTINS_LOCATION,
5520                         FIELD_DECL, get_identifier ("__gr_offs"),
5521                         integer_type_node);
5522   f_vroff = build_decl (BUILTINS_LOCATION,
5523                         FIELD_DECL, get_identifier ("__vr_offs"),
5524                         integer_type_node);
5525
5526   DECL_ARTIFICIAL (f_stack) = 1;
5527   DECL_ARTIFICIAL (f_grtop) = 1;
5528   DECL_ARTIFICIAL (f_vrtop) = 1;
5529   DECL_ARTIFICIAL (f_groff) = 1;
5530   DECL_ARTIFICIAL (f_vroff) = 1;
5531
5532   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5533   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5534   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5535   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5536   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5537
5538   TYPE_FIELDS (va_list_type) = f_stack;
5539   DECL_CHAIN (f_stack) = f_grtop;
5540   DECL_CHAIN (f_grtop) = f_vrtop;
5541   DECL_CHAIN (f_vrtop) = f_groff;
5542   DECL_CHAIN (f_groff) = f_vroff;
5543
5544   /* Compute its layout.  */
5545   layout_type (va_list_type);
5546
5547   return va_list_type;
5548 }
5549
5550 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5551 static void
5552 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5553 {
5554   const CUMULATIVE_ARGS *cum;
5555   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5556   tree stack, grtop, vrtop, groff, vroff;
5557   tree t;
5558   int gr_save_area_size;
5559   int vr_save_area_size;
5560   int vr_offset;
5561
5562   cum = &crtl->args.info;
5563   gr_save_area_size
5564     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5565   vr_save_area_size
5566     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5567
5568   if (TARGET_GENERAL_REGS_ONLY)
5569     {
5570       if (cum->aapcs_nvrn > 0)
5571         sorry ("%qs and floating point or vector arguments",
5572                "-mgeneral-regs-only");
5573       vr_save_area_size = 0;
5574     }
5575
5576   f_stack = TYPE_FIELDS (va_list_type_node);
5577   f_grtop = DECL_CHAIN (f_stack);
5578   f_vrtop = DECL_CHAIN (f_grtop);
5579   f_groff = DECL_CHAIN (f_vrtop);
5580   f_vroff = DECL_CHAIN (f_groff);
5581
5582   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5583                   NULL_TREE);
5584   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5585                   NULL_TREE);
5586   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5587                   NULL_TREE);
5588   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5589                   NULL_TREE);
5590   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5591                   NULL_TREE);
5592
5593   /* Emit code to initialize STACK, which points to the next varargs stack
5594      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5595      by named arguments.  STACK is 8-byte aligned.  */
5596   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5597   if (cum->aapcs_stack_size > 0)
5598     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5599   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5600   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5601
5602   /* Emit code to initialize GRTOP, the top of the GR save area.
5603      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5604   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5605   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5606   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5607
5608   /* Emit code to initialize VRTOP, the top of the VR save area.
5609      This address is gr_save_area_bytes below GRTOP, rounded
5610      down to the next 16-byte boundary.  */
5611   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5612   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5613                              STACK_BOUNDARY / BITS_PER_UNIT);
5614
5615   if (vr_offset)
5616     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5617   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5618   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5619
5620   /* Emit code to initialize GROFF, the offset from GRTOP of the
5621      next GPR argument.  */
5622   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5623               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5624   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5625
5626   /* Likewise emit code to initialize VROFF, the offset from FTOP
5627      of the next VR argument.  */
5628   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5629               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5630   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5631 }
5632
5633 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5634
5635 static tree
5636 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5637                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5638 {
5639   tree addr;
5640   bool indirect_p;
5641   bool is_ha;           /* is HFA or HVA.  */
5642   bool dw_align;        /* double-word align.  */
5643   enum machine_mode ag_mode = VOIDmode;
5644   int nregs;
5645   enum machine_mode mode;
5646
5647   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5648   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5649   HOST_WIDE_INT size, rsize, adjust, align;
5650   tree t, u, cond1, cond2;
5651
5652   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5653   if (indirect_p)
5654     type = build_pointer_type (type);
5655
5656   mode = TYPE_MODE (type);
5657
5658   f_stack = TYPE_FIELDS (va_list_type_node);
5659   f_grtop = DECL_CHAIN (f_stack);
5660   f_vrtop = DECL_CHAIN (f_grtop);
5661   f_groff = DECL_CHAIN (f_vrtop);
5662   f_vroff = DECL_CHAIN (f_groff);
5663
5664   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5665                   f_stack, NULL_TREE);
5666   size = int_size_in_bytes (type);
5667   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5668
5669   dw_align = false;
5670   adjust = 0;
5671   if (aarch64_vfp_is_call_or_return_candidate (mode,
5672                                                type,
5673                                                &ag_mode,
5674                                                &nregs,
5675                                                &is_ha))
5676     {
5677       /* TYPE passed in fp/simd registers.  */
5678       if (TARGET_GENERAL_REGS_ONLY)
5679         sorry ("%qs and floating point or vector arguments",
5680                "-mgeneral-regs-only");
5681
5682       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5683                       unshare_expr (valist), f_vrtop, NULL_TREE);
5684       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5685                       unshare_expr (valist), f_vroff, NULL_TREE);
5686
5687       rsize = nregs * UNITS_PER_VREG;
5688
5689       if (is_ha)
5690         {
5691           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5692             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5693         }
5694       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5695                && size < UNITS_PER_VREG)
5696         {
5697           adjust = UNITS_PER_VREG - size;
5698         }
5699     }
5700   else
5701     {
5702       /* TYPE passed in general registers.  */
5703       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5704                       unshare_expr (valist), f_grtop, NULL_TREE);
5705       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5706                       unshare_expr (valist), f_groff, NULL_TREE);
5707       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5708       nregs = rsize / UNITS_PER_WORD;
5709
5710       if (align > 8)
5711         dw_align = true;
5712
5713       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5714           && size < UNITS_PER_WORD)
5715         {
5716           adjust = UNITS_PER_WORD  - size;
5717         }
5718     }
5719
5720   /* Get a local temporary for the field value.  */
5721   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5722
5723   /* Emit code to branch if off >= 0.  */
5724   t = build2 (GE_EXPR, boolean_type_node, off,
5725               build_int_cst (TREE_TYPE (off), 0));
5726   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5727
5728   if (dw_align)
5729     {
5730       /* Emit: offs = (offs + 15) & -16.  */
5731       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5732                   build_int_cst (TREE_TYPE (off), 15));
5733       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5734                   build_int_cst (TREE_TYPE (off), -16));
5735       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5736     }
5737   else
5738     roundup = NULL;
5739
5740   /* Update ap.__[g|v]r_offs  */
5741   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5742               build_int_cst (TREE_TYPE (off), rsize));
5743   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5744
5745   /* String up.  */
5746   if (roundup)
5747     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5748
5749   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5750   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5751               build_int_cst (TREE_TYPE (f_off), 0));
5752   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5753
5754   /* String up: make sure the assignment happens before the use.  */
5755   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5756   COND_EXPR_ELSE (cond1) = t;
5757
5758   /* Prepare the trees handling the argument that is passed on the stack;
5759      the top level node will store in ON_STACK.  */
5760   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5761   if (align > 8)
5762     {
5763       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5764       t = fold_convert (intDI_type_node, arg);
5765       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5766                   build_int_cst (TREE_TYPE (t), 15));
5767       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5768                   build_int_cst (TREE_TYPE (t), -16));
5769       t = fold_convert (TREE_TYPE (arg), t);
5770       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5771     }
5772   else
5773     roundup = NULL;
5774   /* Advance ap.__stack  */
5775   t = fold_convert (intDI_type_node, arg);
5776   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5777               build_int_cst (TREE_TYPE (t), size + 7));
5778   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5779               build_int_cst (TREE_TYPE (t), -8));
5780   t = fold_convert (TREE_TYPE (arg), t);
5781   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5782   /* String up roundup and advance.  */
5783   if (roundup)
5784     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5785   /* String up with arg */
5786   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5787   /* Big-endianness related address adjustment.  */
5788   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5789       && size < UNITS_PER_WORD)
5790   {
5791     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5792                 size_int (UNITS_PER_WORD - size));
5793     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5794   }
5795
5796   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5797   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5798
5799   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5800   t = off;
5801   if (adjust)
5802     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5803                 build_int_cst (TREE_TYPE (off), adjust));
5804
5805   t = fold_convert (sizetype, t);
5806   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5807
5808   if (is_ha)
5809     {
5810       /* type ha; // treat as "struct {ftype field[n];}"
5811          ... [computing offs]
5812          for (i = 0; i <nregs; ++i, offs += 16)
5813            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5814          return ha;  */
5815       int i;
5816       tree tmp_ha, field_t, field_ptr_t;
5817
5818       /* Declare a local variable.  */
5819       tmp_ha = create_tmp_var_raw (type, "ha");
5820       gimple_add_tmp_var (tmp_ha);
5821
5822       /* Establish the base type.  */
5823       switch (ag_mode)
5824         {
5825         case SFmode:
5826           field_t = float_type_node;
5827           field_ptr_t = float_ptr_type_node;
5828           break;
5829         case DFmode:
5830           field_t = double_type_node;
5831           field_ptr_t = double_ptr_type_node;
5832           break;
5833         case TFmode:
5834           field_t = long_double_type_node;
5835           field_ptr_t = long_double_ptr_type_node;
5836           break;
5837 /* The half precision and quad precision are not fully supported yet.  Enable
5838    the following code after the support is complete.  Need to find the correct
5839    type node for __fp16 *.  */
5840 #if 0
5841         case HFmode:
5842           field_t = float_type_node;
5843           field_ptr_t = float_ptr_type_node;
5844           break;
5845 #endif
5846         case V2SImode:
5847         case V4SImode:
5848             {
5849               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5850               field_t = build_vector_type_for_mode (innertype, ag_mode);
5851               field_ptr_t = build_pointer_type (field_t);
5852             }
5853           break;
5854         default:
5855           gcc_assert (0);
5856         }
5857
5858       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5859       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5860       addr = t;
5861       t = fold_convert (field_ptr_t, addr);
5862       t = build2 (MODIFY_EXPR, field_t,
5863                   build1 (INDIRECT_REF, field_t, tmp_ha),
5864                   build1 (INDIRECT_REF, field_t, t));
5865
5866       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5867       for (i = 1; i < nregs; ++i)
5868         {
5869           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5870           u = fold_convert (field_ptr_t, addr);
5871           u = build2 (MODIFY_EXPR, field_t,
5872                       build2 (MEM_REF, field_t, tmp_ha,
5873                               build_int_cst (field_ptr_t,
5874                                              (i *
5875                                               int_size_in_bytes (field_t)))),
5876                       build1 (INDIRECT_REF, field_t, u));
5877           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5878         }
5879
5880       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5881       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5882     }
5883
5884   COND_EXPR_ELSE (cond2) = t;
5885   addr = fold_convert (build_pointer_type (type), cond1);
5886   addr = build_va_arg_indirect_ref (addr);
5887
5888   if (indirect_p)
5889     addr = build_va_arg_indirect_ref (addr);
5890
5891   return addr;
5892 }
5893
5894 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5895
5896 static void
5897 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5898                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5899                                 int no_rtl)
5900 {
5901   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5902   CUMULATIVE_ARGS local_cum;
5903   int gr_saved, vr_saved;
5904
5905   /* The caller has advanced CUM up to, but not beyond, the last named
5906      argument.  Advance a local copy of CUM past the last "real" named
5907      argument, to find out how many registers are left over.  */
5908   local_cum = *cum;
5909   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5910
5911   /* Found out how many registers we need to save.  */
5912   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5913   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5914
5915   if (TARGET_GENERAL_REGS_ONLY)
5916     {
5917       if (local_cum.aapcs_nvrn > 0)
5918         sorry ("%qs and floating point or vector arguments",
5919                "-mgeneral-regs-only");
5920       vr_saved = 0;
5921     }
5922
5923   if (!no_rtl)
5924     {
5925       if (gr_saved > 0)
5926         {
5927           rtx ptr, mem;
5928
5929           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5930           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5931                                - gr_saved * UNITS_PER_WORD);
5932           mem = gen_frame_mem (BLKmode, ptr);
5933           set_mem_alias_set (mem, get_varargs_alias_set ());
5934
5935           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5936                                mem, gr_saved);
5937         }
5938       if (vr_saved > 0)
5939         {
5940           /* We can't use move_block_from_reg, because it will use
5941              the wrong mode, storing D regs only.  */
5942           enum machine_mode mode = TImode;
5943           int off, i;
5944
5945           /* Set OFF to the offset from virtual_incoming_args_rtx of
5946              the first vector register.  The VR save area lies below
5947              the GR one, and is aligned to 16 bytes.  */
5948           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5949                                    STACK_BOUNDARY / BITS_PER_UNIT);
5950           off -= vr_saved * UNITS_PER_VREG;
5951
5952           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5953             {
5954               rtx ptr, mem;
5955
5956               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5957               mem = gen_frame_mem (mode, ptr);
5958               set_mem_alias_set (mem, get_varargs_alias_set ());
5959               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5960               off += UNITS_PER_VREG;
5961             }
5962         }
5963     }
5964
5965   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5966      any complication of having crtl->args.pretend_args_size changed.  */
5967   cfun->machine->saved_varargs_size
5968     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5969                       STACK_BOUNDARY / BITS_PER_UNIT)
5970        + vr_saved * UNITS_PER_VREG);
5971 }
5972
5973 static void
5974 aarch64_conditional_register_usage (void)
5975 {
5976   int i;
5977   if (!TARGET_FLOAT)
5978     {
5979       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5980         {
5981           fixed_regs[i] = 1;
5982           call_used_regs[i] = 1;
5983         }
5984     }
5985 }
5986
5987 /* Walk down the type tree of TYPE counting consecutive base elements.
5988    If *MODEP is VOIDmode, then set it to the first valid floating point
5989    type.  If a non-floating point type is found, or if a floating point
5990    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5991    otherwise return the count in the sub-tree.  */
5992 static int
5993 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5994 {
5995   enum machine_mode mode;
5996   HOST_WIDE_INT size;
5997
5998   switch (TREE_CODE (type))
5999     {
6000     case REAL_TYPE:
6001       mode = TYPE_MODE (type);
6002       if (mode != DFmode && mode != SFmode && mode != TFmode)
6003         return -1;
6004
6005       if (*modep == VOIDmode)
6006         *modep = mode;
6007
6008       if (*modep == mode)
6009         return 1;
6010
6011       break;
6012
6013     case COMPLEX_TYPE:
6014       mode = TYPE_MODE (TREE_TYPE (type));
6015       if (mode != DFmode && mode != SFmode && mode != TFmode)
6016         return -1;
6017
6018       if (*modep == VOIDmode)
6019         *modep = mode;
6020
6021       if (*modep == mode)
6022         return 2;
6023
6024       break;
6025
6026     case VECTOR_TYPE:
6027       /* Use V2SImode and V4SImode as representatives of all 64-bit
6028          and 128-bit vector types.  */
6029       size = int_size_in_bytes (type);
6030       switch (size)
6031         {
6032         case 8:
6033           mode = V2SImode;
6034           break;
6035         case 16:
6036           mode = V4SImode;
6037           break;
6038         default:
6039           return -1;
6040         }
6041
6042       if (*modep == VOIDmode)
6043         *modep = mode;
6044
6045       /* Vector modes are considered to be opaque: two vectors are
6046          equivalent for the purposes of being homogeneous aggregates
6047          if they are the same size.  */
6048       if (*modep == mode)
6049         return 1;
6050
6051       break;
6052
6053     case ARRAY_TYPE:
6054       {
6055         int count;
6056         tree index = TYPE_DOMAIN (type);
6057
6058         /* Can't handle incomplete types.  */
6059         if (!COMPLETE_TYPE_P (type))
6060           return -1;
6061
6062         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6063         if (count == -1
6064             || !index
6065             || !TYPE_MAX_VALUE (index)
6066             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6067             || !TYPE_MIN_VALUE (index)
6068             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6069             || count < 0)
6070           return -1;
6071
6072         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6073                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6074
6075         /* There must be no padding.  */
6076         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6077             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6078                 != count * GET_MODE_BITSIZE (*modep)))
6079           return -1;
6080
6081         return count;
6082       }
6083
6084     case RECORD_TYPE:
6085       {
6086         int count = 0;
6087         int sub_count;
6088         tree field;
6089
6090         /* Can't handle incomplete types.  */
6091         if (!COMPLETE_TYPE_P (type))
6092           return -1;
6093
6094         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6095           {
6096             if (TREE_CODE (field) != FIELD_DECL)
6097               continue;
6098
6099             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6100             if (sub_count < 0)
6101               return -1;
6102             count += sub_count;
6103           }
6104
6105         /* There must be no padding.  */
6106         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6107             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6108                 != count * GET_MODE_BITSIZE (*modep)))
6109           return -1;
6110
6111         return count;
6112       }
6113
6114     case UNION_TYPE:
6115     case QUAL_UNION_TYPE:
6116       {
6117         /* These aren't very interesting except in a degenerate case.  */
6118         int count = 0;
6119         int sub_count;
6120         tree field;
6121
6122         /* Can't handle incomplete types.  */
6123         if (!COMPLETE_TYPE_P (type))
6124           return -1;
6125
6126         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6127           {
6128             if (TREE_CODE (field) != FIELD_DECL)
6129               continue;
6130
6131             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6132             if (sub_count < 0)
6133               return -1;
6134             count = count > sub_count ? count : sub_count;
6135           }
6136
6137         /* There must be no padding.  */
6138         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6139             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6140                 != count * GET_MODE_BITSIZE (*modep)))
6141           return -1;
6142
6143         return count;
6144       }
6145
6146     default:
6147       break;
6148     }
6149
6150   return -1;
6151 }
6152
6153 /* Return true if we use LRA instead of reload pass.  */
6154 static bool
6155 aarch64_lra_p (void)
6156 {
6157   return aarch64_lra_flag;
6158 }
6159
6160 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6161    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6162    array types.  The C99 floating-point complex types are also considered
6163    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6164    types, which are GCC extensions and out of the scope of AAPCS64, are
6165    treated as composite types here as well.
6166
6167    Note that MODE itself is not sufficient in determining whether a type
6168    is such a composite type or not.  This is because
6169    stor-layout.c:compute_record_mode may have already changed the MODE
6170    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6171    structure with only one field may have its MODE set to the mode of the
6172    field.  Also an integer mode whose size matches the size of the
6173    RECORD_TYPE type may be used to substitute the original mode
6174    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6175    solely relied on.  */
6176
6177 static bool
6178 aarch64_composite_type_p (const_tree type,
6179                           enum machine_mode mode)
6180 {
6181   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6182     return true;
6183
6184   if (mode == BLKmode
6185       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6186       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6187     return true;
6188
6189   return false;
6190 }
6191
6192 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6193    type as described in AAPCS64 \S 4.1.2.
6194
6195    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6196
6197 static bool
6198 aarch64_short_vector_p (const_tree type,
6199                         enum machine_mode mode)
6200 {
6201   HOST_WIDE_INT size = -1;
6202
6203   if (type && TREE_CODE (type) == VECTOR_TYPE)
6204     size = int_size_in_bytes (type);
6205   else if (!aarch64_composite_type_p (type, mode)
6206            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6207                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6208     size = GET_MODE_SIZE (mode);
6209
6210   return (size == 8 || size == 16) ? true : false;
6211 }
6212
6213 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6214    shall be passed or returned in simd/fp register(s) (providing these
6215    parameter passing registers are available).
6216
6217    Upon successful return, *COUNT returns the number of needed registers,
6218    *BASE_MODE returns the mode of the individual register and when IS_HAF
6219    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6220    floating-point aggregate or a homogeneous short-vector aggregate.  */
6221
6222 static bool
6223 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6224                                          const_tree type,
6225                                          enum machine_mode *base_mode,
6226                                          int *count,
6227                                          bool *is_ha)
6228 {
6229   enum machine_mode new_mode = VOIDmode;
6230   bool composite_p = aarch64_composite_type_p (type, mode);
6231
6232   if (is_ha != NULL) *is_ha = false;
6233
6234   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6235       || aarch64_short_vector_p (type, mode))
6236     {
6237       *count = 1;
6238       new_mode = mode;
6239     }
6240   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6241     {
6242       if (is_ha != NULL) *is_ha = true;
6243       *count = 2;
6244       new_mode = GET_MODE_INNER (mode);
6245     }
6246   else if (type && composite_p)
6247     {
6248       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6249
6250       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6251         {
6252           if (is_ha != NULL) *is_ha = true;
6253           *count = ag_count;
6254         }
6255       else
6256         return false;
6257     }
6258   else
6259     return false;
6260
6261   *base_mode = new_mode;
6262   return true;
6263 }
6264
6265 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6266
6267 static rtx
6268 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6269                           int incoming ATTRIBUTE_UNUSED)
6270 {
6271   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6272 }
6273
6274 /* Implements target hook vector_mode_supported_p.  */
6275 static bool
6276 aarch64_vector_mode_supported_p (enum machine_mode mode)
6277 {
6278   if (TARGET_SIMD
6279       && (mode == V4SImode  || mode == V8HImode
6280           || mode == V16QImode || mode == V2DImode
6281           || mode == V2SImode  || mode == V4HImode
6282           || mode == V8QImode || mode == V2SFmode
6283           || mode == V4SFmode || mode == V2DFmode))
6284     return true;
6285
6286   return false;
6287 }
6288
6289 /* Return appropriate SIMD container
6290    for MODE within a vector of WIDTH bits.  */
6291 static enum machine_mode
6292 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6293 {
6294   gcc_assert (width == 64 || width == 128);
6295   if (TARGET_SIMD)
6296     {
6297       if (width == 128)
6298         switch (mode)
6299           {
6300           case DFmode:
6301             return V2DFmode;
6302           case SFmode:
6303             return V4SFmode;
6304           case SImode:
6305             return V4SImode;
6306           case HImode:
6307             return V8HImode;
6308           case QImode:
6309             return V16QImode;
6310           case DImode:
6311             return V2DImode;
6312           default:
6313             break;
6314           }
6315       else
6316         switch (mode)
6317           {
6318           case SFmode:
6319             return V2SFmode;
6320           case SImode:
6321             return V2SImode;
6322           case HImode:
6323             return V4HImode;
6324           case QImode:
6325             return V8QImode;
6326           default:
6327             break;
6328           }
6329     }
6330   return word_mode;
6331 }
6332
6333 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6334 static enum machine_mode
6335 aarch64_preferred_simd_mode (enum machine_mode mode)
6336 {
6337   return aarch64_simd_container_mode (mode, 128);
6338 }
6339
6340 /* Return the bitmask of possible vector sizes for the vectorizer
6341    to iterate over.  */
6342 static unsigned int
6343 aarch64_autovectorize_vector_sizes (void)
6344 {
6345   return (16 | 8);
6346 }
6347
6348 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6349    vector types in order to conform to the AAPCS64 (see "Procedure
6350    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6351    qualify for emission with the mangled names defined in that document,
6352    a vector type must not only be of the correct mode but also be
6353    composed of AdvSIMD vector element types (e.g.
6354    _builtin_aarch64_simd_qi); these types are registered by
6355    aarch64_init_simd_builtins ().  In other words, vector types defined
6356    in other ways e.g. via vector_size attribute will get default
6357    mangled names.  */
6358 typedef struct
6359 {
6360   enum machine_mode mode;
6361   const char *element_type_name;
6362   const char *mangled_name;
6363 } aarch64_simd_mangle_map_entry;
6364
6365 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6366   /* 64-bit containerized types.  */
6367   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6368   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6369   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6370   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6371   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6372   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6373   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6374   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6375   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6376   /* 128-bit containerized types.  */
6377   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6378   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6379   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6380   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6381   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6382   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6383   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6384   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6385   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6386   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6387   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6388   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6389   { VOIDmode, NULL, NULL }
6390 };
6391
6392 /* Implement TARGET_MANGLE_TYPE.  */
6393
6394 static const char *
6395 aarch64_mangle_type (const_tree type)
6396 {
6397   /* The AArch64 ABI documents say that "__va_list" has to be
6398      managled as if it is in the "std" namespace.  */
6399   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6400     return "St9__va_list";
6401
6402   /* Check the mode of the vector type, and the name of the vector
6403      element type, against the table.  */
6404   if (TREE_CODE (type) == VECTOR_TYPE)
6405     {
6406       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6407
6408       while (pos->mode != VOIDmode)
6409         {
6410           tree elt_type = TREE_TYPE (type);
6411
6412           if (pos->mode == TYPE_MODE (type)
6413               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6414               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6415                           pos->element_type_name))
6416             return pos->mangled_name;
6417
6418           pos++;
6419         }
6420     }
6421
6422   /* Use the default mangling.  */
6423   return NULL;
6424 }
6425
6426 /* Return the equivalent letter for size.  */
6427 static char
6428 sizetochar (int size)
6429 {
6430   switch (size)
6431     {
6432     case 64: return 'd';
6433     case 32: return 's';
6434     case 16: return 'h';
6435     case 8 : return 'b';
6436     default: gcc_unreachable ();
6437     }
6438 }
6439
6440 /* Return true iff x is a uniform vector of floating-point
6441    constants, and the constant can be represented in
6442    quarter-precision form.  Note, as aarch64_float_const_representable
6443    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6444 static bool
6445 aarch64_vect_float_const_representable_p (rtx x)
6446 {
6447   int i = 0;
6448   REAL_VALUE_TYPE r0, ri;
6449   rtx x0, xi;
6450
6451   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6452     return false;
6453
6454   x0 = CONST_VECTOR_ELT (x, 0);
6455   if (!CONST_DOUBLE_P (x0))
6456     return false;
6457
6458   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6459
6460   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6461     {
6462       xi = CONST_VECTOR_ELT (x, i);
6463       if (!CONST_DOUBLE_P (xi))
6464         return false;
6465
6466       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6467       if (!REAL_VALUES_EQUAL (r0, ri))
6468         return false;
6469     }
6470
6471   return aarch64_float_const_representable_p (x0);
6472 }
6473
6474 /* Return true for valid and false for invalid.  */
6475 bool
6476 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6477                               struct simd_immediate_info *info)
6478 {
6479 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6480   matches = 1;                                          \
6481   for (i = 0; i < idx; i += (STRIDE))                   \
6482     if (!(TEST))                                        \
6483       matches = 0;                                      \
6484   if (matches)                                          \
6485     {                                                   \
6486       immtype = (CLASS);                                \
6487       elsize = (ELSIZE);                                \
6488       eshift = (SHIFT);                                 \
6489       emvn = (NEG);                                     \
6490       break;                                            \
6491     }
6492
6493   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6494   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6495   unsigned char bytes[16];
6496   int immtype = -1, matches;
6497   unsigned int invmask = inverse ? 0xff : 0;
6498   int eshift, emvn;
6499
6500   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6501     {
6502       if (! (aarch64_simd_imm_zero_p (op, mode)
6503              || aarch64_vect_float_const_representable_p (op)))
6504         return false;
6505
6506       if (info)
6507         {
6508           info->value = CONST_VECTOR_ELT (op, 0);
6509           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6510           info->mvn = false;
6511           info->shift = 0;
6512         }
6513
6514       return true;
6515     }
6516
6517   /* Splat vector constant out into a byte vector.  */
6518   for (i = 0; i < n_elts; i++)
6519     {
6520       rtx el = CONST_VECTOR_ELT (op, i);
6521       unsigned HOST_WIDE_INT elpart;
6522       unsigned int part, parts;
6523
6524       if (GET_CODE (el) == CONST_INT)
6525         {
6526           elpart = INTVAL (el);
6527           parts = 1;
6528         }
6529       else if (GET_CODE (el) == CONST_DOUBLE)
6530         {
6531           elpart = CONST_DOUBLE_LOW (el);
6532           parts = 2;
6533         }
6534       else
6535         gcc_unreachable ();
6536
6537       for (part = 0; part < parts; part++)
6538         {
6539           unsigned int byte;
6540           for (byte = 0; byte < innersize; byte++)
6541             {
6542               bytes[idx++] = (elpart & 0xff) ^ invmask;
6543               elpart >>= BITS_PER_UNIT;
6544             }
6545           if (GET_CODE (el) == CONST_DOUBLE)
6546             elpart = CONST_DOUBLE_HIGH (el);
6547         }
6548     }
6549
6550   /* Sanity check.  */
6551   gcc_assert (idx == GET_MODE_SIZE (mode));
6552
6553   do
6554     {
6555       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6556              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6557
6558       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6559              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6560
6561       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6562              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6563
6564       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6565              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6566
6567       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6568
6569       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6570
6571       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6572              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6573
6574       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6575              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6576
6577       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6578              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6579
6580       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6581              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6582
6583       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6584
6585       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6586
6587       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6588              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6589
6590       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6591              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6592
6593       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6594              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6595
6596       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6597              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6598
6599       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6600
6601       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6602              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6603     }
6604   while (0);
6605
6606   if (immtype == -1)
6607     return false;
6608
6609   if (info)
6610     {
6611       info->element_width = elsize;
6612       info->mvn = emvn != 0;
6613       info->shift = eshift;
6614
6615       unsigned HOST_WIDE_INT imm = 0;
6616
6617       if (immtype >= 12 && immtype <= 15)
6618         info->msl = true;
6619
6620       /* Un-invert bytes of recognized vector, if necessary.  */
6621       if (invmask != 0)
6622         for (i = 0; i < idx; i++)
6623           bytes[i] ^= invmask;
6624
6625       if (immtype == 17)
6626         {
6627           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6628           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6629
6630           for (i = 0; i < 8; i++)
6631             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6632               << (i * BITS_PER_UNIT);
6633
6634
6635           info->value = GEN_INT (imm);
6636         }
6637       else
6638         {
6639           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6640             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6641
6642           /* Construct 'abcdefgh' because the assembler cannot handle
6643              generic constants.  */
6644           if (info->mvn)
6645             imm = ~imm;
6646           imm = (imm >> info->shift) & 0xff;
6647           info->value = GEN_INT (imm);
6648         }
6649     }
6650
6651   return true;
6652 #undef CHECK
6653 }
6654
6655 static bool
6656 aarch64_const_vec_all_same_int_p (rtx x,
6657                                   HOST_WIDE_INT minval,
6658                                   HOST_WIDE_INT maxval)
6659 {
6660   HOST_WIDE_INT firstval;
6661   int count, i;
6662
6663   if (GET_CODE (x) != CONST_VECTOR
6664       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6665     return false;
6666
6667   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6668   if (firstval < minval || firstval > maxval)
6669     return false;
6670
6671   count = CONST_VECTOR_NUNITS (x);
6672   for (i = 1; i < count; i++)
6673     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6674       return false;
6675
6676   return true;
6677 }
6678
6679 /* Check of immediate shift constants are within range.  */
6680 bool
6681 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6682 {
6683   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6684   if (left)
6685     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6686   else
6687     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6688 }
6689
6690 /* Return true if X is a uniform vector where all elements
6691    are either the floating-point constant 0.0 or the
6692    integer constant 0.  */
6693 bool
6694 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6695 {
6696   return x == CONST0_RTX (mode);
6697 }
6698
6699 bool
6700 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6701 {
6702   HOST_WIDE_INT imm = INTVAL (x);
6703   int i;
6704
6705   for (i = 0; i < 8; i++)
6706     {
6707       unsigned int byte = imm & 0xff;
6708       if (byte != 0xff && byte != 0)
6709        return false;
6710       imm >>= 8;
6711     }
6712
6713   return true;
6714 }
6715
6716 bool
6717 aarch64_mov_operand_p (rtx x,
6718                        enum aarch64_symbol_context context,
6719                        enum machine_mode mode)
6720 {
6721   if (GET_CODE (x) == HIGH
6722       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6723     return true;
6724
6725   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6726     return true;
6727
6728   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6729     return true;
6730
6731   return aarch64_classify_symbolic_expression (x, context)
6732     == SYMBOL_TINY_ABSOLUTE;
6733 }
6734
6735 /* Return a const_int vector of VAL.  */
6736 rtx
6737 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6738 {
6739   int nunits = GET_MODE_NUNITS (mode);
6740   rtvec v = rtvec_alloc (nunits);
6741   int i;
6742
6743   for (i=0; i < nunits; i++)
6744     RTVEC_ELT (v, i) = GEN_INT (val);
6745
6746   return gen_rtx_CONST_VECTOR (mode, v);
6747 }
6748
6749 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6750
6751 bool
6752 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6753 {
6754   enum machine_mode vmode;
6755
6756   gcc_assert (!VECTOR_MODE_P (mode));
6757   vmode = aarch64_preferred_simd_mode (mode);
6758   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6759   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6760 }
6761
6762 /* Construct and return a PARALLEL RTX vector.  */
6763 rtx
6764 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6765 {
6766   int nunits = GET_MODE_NUNITS (mode);
6767   rtvec v = rtvec_alloc (nunits / 2);
6768   int base = high ? nunits / 2 : 0;
6769   rtx t1;
6770   int i;
6771
6772   for (i=0; i < nunits / 2; i++)
6773     RTVEC_ELT (v, i) = GEN_INT (base + i);
6774
6775   t1 = gen_rtx_PARALLEL (mode, v);
6776   return t1;
6777 }
6778
6779 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6780    HIGH (exclusive).  */
6781 void
6782 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6783 {
6784   HOST_WIDE_INT lane;
6785   gcc_assert (GET_CODE (operand) == CONST_INT);
6786   lane = INTVAL (operand);
6787
6788   if (lane < low || lane >= high)
6789     error ("lane out of range");
6790 }
6791
6792 void
6793 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6794 {
6795   gcc_assert (GET_CODE (operand) == CONST_INT);
6796   HOST_WIDE_INT lane = INTVAL (operand);
6797
6798   if (lane < low || lane >= high)
6799     error ("constant out of range");
6800 }
6801
6802 /* Emit code to reinterpret one AdvSIMD type as another,
6803    without altering bits.  */
6804 void
6805 aarch64_simd_reinterpret (rtx dest, rtx src)
6806 {
6807   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6808 }
6809
6810 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6811    registers).  */
6812 void
6813 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6814                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6815                             rtx op1)
6816 {
6817   rtx mem = gen_rtx_MEM (mode, destaddr);
6818   rtx tmp1 = gen_reg_rtx (mode);
6819   rtx tmp2 = gen_reg_rtx (mode);
6820
6821   emit_insn (intfn (tmp1, op1, tmp2));
6822
6823   emit_move_insn (mem, tmp1);
6824   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6825   emit_move_insn (mem, tmp2);
6826 }
6827
6828 /* Return TRUE if OP is a valid vector addressing mode.  */
6829 bool
6830 aarch64_simd_mem_operand_p (rtx op)
6831 {
6832   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6833                         || GET_CODE (XEXP (op, 0)) == REG);
6834 }
6835
6836 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6837    not to early-clobber SRC registers in the process.
6838
6839    We assume that the operands described by SRC and DEST represent a
6840    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6841    number of components into which the copy has been decomposed.  */
6842 void
6843 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6844                                 rtx *src, unsigned int count)
6845 {
6846   unsigned int i;
6847
6848   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6849       || REGNO (operands[0]) < REGNO (operands[1]))
6850     {
6851       for (i = 0; i < count; i++)
6852         {
6853           operands[2 * i] = dest[i];
6854           operands[2 * i + 1] = src[i];
6855         }
6856     }
6857   else
6858     {
6859       for (i = 0; i < count; i++)
6860         {
6861           operands[2 * i] = dest[count - i - 1];
6862           operands[2 * i + 1] = src[count - i - 1];
6863         }
6864     }
6865 }
6866
6867 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6868    one of VSTRUCT modes: OI, CI or XI.  */
6869 int
6870 aarch64_simd_attr_length_move (rtx insn)
6871 {
6872   enum machine_mode mode;
6873
6874   extract_insn_cached (insn);
6875
6876   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6877     {
6878       mode = GET_MODE (recog_data.operand[0]);
6879       switch (mode)
6880         {
6881         case OImode:
6882           return 8;
6883         case CImode:
6884           return 12;
6885         case XImode:
6886           return 16;
6887         default:
6888           gcc_unreachable ();
6889         }
6890     }
6891   return 4;
6892 }
6893
6894 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6895    alignment of a vector to 128 bits.  */
6896 static HOST_WIDE_INT
6897 aarch64_simd_vector_alignment (const_tree type)
6898 {
6899   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6900   return MIN (align, 128);
6901 }
6902
6903 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6904 static bool
6905 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6906 {
6907   if (is_packed)
6908     return false;
6909
6910   /* We guarantee alignment for vectors up to 128-bits.  */
6911   if (tree_int_cst_compare (TYPE_SIZE (type),
6912                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6913     return false;
6914
6915   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6916   return true;
6917 }
6918
6919 /* If VALS is a vector constant that can be loaded into a register
6920    using DUP, generate instructions to do so and return an RTX to
6921    assign to the register.  Otherwise return NULL_RTX.  */
6922 static rtx
6923 aarch64_simd_dup_constant (rtx vals)
6924 {
6925   enum machine_mode mode = GET_MODE (vals);
6926   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6927   int n_elts = GET_MODE_NUNITS (mode);
6928   bool all_same = true;
6929   rtx x;
6930   int i;
6931
6932   if (GET_CODE (vals) != CONST_VECTOR)
6933     return NULL_RTX;
6934
6935   for (i = 1; i < n_elts; ++i)
6936     {
6937       x = CONST_VECTOR_ELT (vals, i);
6938       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6939         all_same = false;
6940     }
6941
6942   if (!all_same)
6943     return NULL_RTX;
6944
6945   /* We can load this constant by using DUP and a constant in a
6946      single ARM register.  This will be cheaper than a vector
6947      load.  */
6948   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6949   return gen_rtx_VEC_DUPLICATE (mode, x);
6950 }
6951
6952
6953 /* Generate code to load VALS, which is a PARALLEL containing only
6954    constants (for vec_init) or CONST_VECTOR, efficiently into a
6955    register.  Returns an RTX to copy into the register, or NULL_RTX
6956    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6957 static rtx
6958 aarch64_simd_make_constant (rtx vals)
6959 {
6960   enum machine_mode mode = GET_MODE (vals);
6961   rtx const_dup;
6962   rtx const_vec = NULL_RTX;
6963   int n_elts = GET_MODE_NUNITS (mode);
6964   int n_const = 0;
6965   int i;
6966
6967   if (GET_CODE (vals) == CONST_VECTOR)
6968     const_vec = vals;
6969   else if (GET_CODE (vals) == PARALLEL)
6970     {
6971       /* A CONST_VECTOR must contain only CONST_INTs and
6972          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6973          Only store valid constants in a CONST_VECTOR.  */
6974       for (i = 0; i < n_elts; ++i)
6975         {
6976           rtx x = XVECEXP (vals, 0, i);
6977           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6978             n_const++;
6979         }
6980       if (n_const == n_elts)
6981         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6982     }
6983   else
6984     gcc_unreachable ();
6985
6986   if (const_vec != NULL_RTX
6987       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6988     /* Load using MOVI/MVNI.  */
6989     return const_vec;
6990   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6991     /* Loaded using DUP.  */
6992     return const_dup;
6993   else if (const_vec != NULL_RTX)
6994     /* Load from constant pool. We can not take advantage of single-cycle
6995        LD1 because we need a PC-relative addressing mode.  */
6996     return const_vec;
6997   else
6998     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6999        We can not construct an initializer.  */
7000     return NULL_RTX;
7001 }
7002
7003 void
7004 aarch64_expand_vector_init (rtx target, rtx vals)
7005 {
7006   enum machine_mode mode = GET_MODE (target);
7007   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7008   int n_elts = GET_MODE_NUNITS (mode);
7009   int n_var = 0, one_var = -1;
7010   bool all_same = true;
7011   rtx x, mem;
7012   int i;
7013
7014   x = XVECEXP (vals, 0, 0);
7015   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7016     n_var = 1, one_var = 0;
7017
7018   for (i = 1; i < n_elts; ++i)
7019     {
7020       x = XVECEXP (vals, 0, i);
7021       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7022         ++n_var, one_var = i;
7023
7024       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7025         all_same = false;
7026     }
7027
7028   if (n_var == 0)
7029     {
7030       rtx constant = aarch64_simd_make_constant (vals);
7031       if (constant != NULL_RTX)
7032         {
7033           emit_move_insn (target, constant);
7034           return;
7035         }
7036     }
7037
7038   /* Splat a single non-constant element if we can.  */
7039   if (all_same)
7040     {
7041       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7042       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7043       return;
7044     }
7045
7046   /* One field is non-constant.  Load constant then overwrite varying
7047      field.  This is more efficient than using the stack.  */
7048   if (n_var == 1)
7049     {
7050       rtx copy = copy_rtx (vals);
7051       rtx index = GEN_INT (one_var);
7052       enum insn_code icode;
7053
7054       /* Load constant part of vector, substitute neighboring value for
7055          varying element.  */
7056       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7057       aarch64_expand_vector_init (target, copy);
7058
7059       /* Insert variable.  */
7060       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7061       icode = optab_handler (vec_set_optab, mode);
7062       gcc_assert (icode != CODE_FOR_nothing);
7063       emit_insn (GEN_FCN (icode) (target, x, index));
7064       return;
7065     }
7066
7067   /* Construct the vector in memory one field at a time
7068      and load the whole vector.  */
7069   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7070   for (i = 0; i < n_elts; i++)
7071     emit_move_insn (adjust_address_nv (mem, inner_mode,
7072                                     i * GET_MODE_SIZE (inner_mode)),
7073                     XVECEXP (vals, 0, i));
7074   emit_move_insn (target, mem);
7075
7076 }
7077
7078 static unsigned HOST_WIDE_INT
7079 aarch64_shift_truncation_mask (enum machine_mode mode)
7080 {
7081   return
7082     (aarch64_vector_mode_supported_p (mode)
7083      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7084 }
7085
7086 #ifndef TLS_SECTION_ASM_FLAG
7087 #define TLS_SECTION_ASM_FLAG 'T'
7088 #endif
7089
7090 void
7091 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7092                                tree decl ATTRIBUTE_UNUSED)
7093 {
7094   char flagchars[10], *f = flagchars;
7095
7096   /* If we have already declared this section, we can use an
7097      abbreviated form to switch back to it -- unless this section is
7098      part of a COMDAT groups, in which case GAS requires the full
7099      declaration every time.  */
7100   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7101       && (flags & SECTION_DECLARED))
7102     {
7103       fprintf (asm_out_file, "\t.section\t%s\n", name);
7104       return;
7105     }
7106
7107   if (!(flags & SECTION_DEBUG))
7108     *f++ = 'a';
7109   if (flags & SECTION_WRITE)
7110     *f++ = 'w';
7111   if (flags & SECTION_CODE)
7112     *f++ = 'x';
7113   if (flags & SECTION_SMALL)
7114     *f++ = 's';
7115   if (flags & SECTION_MERGE)
7116     *f++ = 'M';
7117   if (flags & SECTION_STRINGS)
7118     *f++ = 'S';
7119   if (flags & SECTION_TLS)
7120     *f++ = TLS_SECTION_ASM_FLAG;
7121   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7122     *f++ = 'G';
7123   *f = '\0';
7124
7125   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7126
7127   if (!(flags & SECTION_NOTYPE))
7128     {
7129       const char *type;
7130       const char *format;
7131
7132       if (flags & SECTION_BSS)
7133         type = "nobits";
7134       else
7135         type = "progbits";
7136
7137 #ifdef TYPE_OPERAND_FMT
7138       format = "," TYPE_OPERAND_FMT;
7139 #else
7140       format = ",@%s";
7141 #endif
7142
7143       fprintf (asm_out_file, format, type);
7144
7145       if (flags & SECTION_ENTSIZE)
7146         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7147       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7148         {
7149           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7150             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7151           else
7152             fprintf (asm_out_file, ",%s,comdat",
7153                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7154         }
7155     }
7156
7157   putc ('\n', asm_out_file);
7158 }
7159
7160 /* Select a format to encode pointers in exception handling data.  */
7161 int
7162 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7163 {
7164    int type;
7165    switch (aarch64_cmodel)
7166      {
7167      case AARCH64_CMODEL_TINY:
7168      case AARCH64_CMODEL_TINY_PIC:
7169      case AARCH64_CMODEL_SMALL:
7170      case AARCH64_CMODEL_SMALL_PIC:
7171        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7172           for everything.  */
7173        type = DW_EH_PE_sdata4;
7174        break;
7175      default:
7176        /* No assumptions here.  8-byte relocs required.  */
7177        type = DW_EH_PE_sdata8;
7178        break;
7179      }
7180    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7181 }
7182
7183 /* Emit load exclusive.  */
7184
7185 static void
7186 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7187                              rtx mem, rtx model_rtx)
7188 {
7189   rtx (*gen) (rtx, rtx, rtx);
7190
7191   switch (mode)
7192     {
7193     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7194     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7195     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7196     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7197     default:
7198       gcc_unreachable ();
7199     }
7200
7201   emit_insn (gen (rval, mem, model_rtx));
7202 }
7203
7204 /* Emit store exclusive.  */
7205
7206 static void
7207 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7208                               rtx rval, rtx mem, rtx model_rtx)
7209 {
7210   rtx (*gen) (rtx, rtx, rtx, rtx);
7211
7212   switch (mode)
7213     {
7214     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7215     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7216     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7217     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7218     default:
7219       gcc_unreachable ();
7220     }
7221
7222   emit_insn (gen (bval, rval, mem, model_rtx));
7223 }
7224
7225 /* Mark the previous jump instruction as unlikely.  */
7226
7227 static void
7228 aarch64_emit_unlikely_jump (rtx insn)
7229 {
7230   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7231
7232   insn = emit_jump_insn (insn);
7233   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7234 }
7235
7236 /* Expand a compare and swap pattern.  */
7237
7238 void
7239 aarch64_expand_compare_and_swap (rtx operands[])
7240 {
7241   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7242   enum machine_mode mode, cmp_mode;
7243   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7244
7245   bval = operands[0];
7246   rval = operands[1];
7247   mem = operands[2];
7248   oldval = operands[3];
7249   newval = operands[4];
7250   is_weak = operands[5];
7251   mod_s = operands[6];
7252   mod_f = operands[7];
7253   mode = GET_MODE (mem);
7254   cmp_mode = mode;
7255
7256   /* Normally the succ memory model must be stronger than fail, but in the
7257      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7258      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7259
7260   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7261       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7262     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7263
7264   switch (mode)
7265     {
7266     case QImode:
7267     case HImode:
7268       /* For short modes, we're going to perform the comparison in SImode,
7269          so do the zero-extension now.  */
7270       cmp_mode = SImode;
7271       rval = gen_reg_rtx (SImode);
7272       oldval = convert_modes (SImode, mode, oldval, true);
7273       /* Fall through.  */
7274
7275     case SImode:
7276     case DImode:
7277       /* Force the value into a register if needed.  */
7278       if (!aarch64_plus_operand (oldval, mode))
7279         oldval = force_reg (cmp_mode, oldval);
7280       break;
7281
7282     default:
7283       gcc_unreachable ();
7284     }
7285
7286   switch (mode)
7287     {
7288     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7289     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7290     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7291     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7292     default:
7293       gcc_unreachable ();
7294     }
7295
7296   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7297
7298   if (mode == QImode || mode == HImode)
7299     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7300
7301   x = gen_rtx_REG (CCmode, CC_REGNUM);
7302   x = gen_rtx_EQ (SImode, x, const0_rtx);
7303   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7304 }
7305
7306 /* Split a compare and swap pattern.  */
7307
7308 void
7309 aarch64_split_compare_and_swap (rtx operands[])
7310 {
7311   rtx rval, mem, oldval, newval, scratch;
7312   enum machine_mode mode;
7313   bool is_weak;
7314   rtx label1, label2, x, cond;
7315
7316   rval = operands[0];
7317   mem = operands[1];
7318   oldval = operands[2];
7319   newval = operands[3];
7320   is_weak = (operands[4] != const0_rtx);
7321   scratch = operands[7];
7322   mode = GET_MODE (mem);
7323
7324   label1 = NULL_RTX;
7325   if (!is_weak)
7326     {
7327       label1 = gen_label_rtx ();
7328       emit_label (label1);
7329     }
7330   label2 = gen_label_rtx ();
7331
7332   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7333
7334   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7335   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7336   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7337                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7338   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7339
7340   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7341
7342   if (!is_weak)
7343     {
7344       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7345       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7346                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7347       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7348     }
7349   else
7350     {
7351       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7352       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7353       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7354     }
7355
7356   emit_label (label2);
7357 }
7358
7359 /* Split an atomic operation.  */
7360
7361 void
7362 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7363                      rtx value, rtx model_rtx, rtx cond)
7364 {
7365   enum machine_mode mode = GET_MODE (mem);
7366   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7367   rtx label, x;
7368
7369   label = gen_label_rtx ();
7370   emit_label (label);
7371
7372   if (new_out)
7373     new_out = gen_lowpart (wmode, new_out);
7374   if (old_out)
7375     old_out = gen_lowpart (wmode, old_out);
7376   else
7377     old_out = new_out;
7378   value = simplify_gen_subreg (wmode, value, mode, 0);
7379
7380   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7381
7382   switch (code)
7383     {
7384     case SET:
7385       new_out = value;
7386       break;
7387
7388     case NOT:
7389       x = gen_rtx_AND (wmode, old_out, value);
7390       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7391       x = gen_rtx_NOT (wmode, new_out);
7392       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7393       break;
7394
7395     case MINUS:
7396       if (CONST_INT_P (value))
7397         {
7398           value = GEN_INT (-INTVAL (value));
7399           code = PLUS;
7400         }
7401       /* Fall through.  */
7402
7403     default:
7404       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7405       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7406       break;
7407     }
7408
7409   aarch64_emit_store_exclusive (mode, cond, mem,
7410                                 gen_lowpart (mode, new_out), model_rtx);
7411
7412   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7413   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7414                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7415   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7416 }
7417
7418 static void
7419 aarch64_print_extension (void)
7420 {
7421   const struct aarch64_option_extension *opt = NULL;
7422
7423   for (opt = all_extensions; opt->name != NULL; opt++)
7424     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7425       asm_fprintf (asm_out_file, "+%s", opt->name);
7426
7427   asm_fprintf (asm_out_file, "\n");
7428 }
7429
7430 static void
7431 aarch64_start_file (void)
7432 {
7433   if (selected_arch)
7434     {
7435       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7436       aarch64_print_extension ();
7437     }
7438   else if (selected_cpu)
7439     {
7440       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7441       aarch64_print_extension ();
7442     }
7443   default_file_start();
7444 }
7445
7446 /* Target hook for c_mode_for_suffix.  */
7447 static enum machine_mode
7448 aarch64_c_mode_for_suffix (char suffix)
7449 {
7450   if (suffix == 'q')
7451     return TFmode;
7452
7453   return VOIDmode;
7454 }
7455
7456 /* We can only represent floating point constants which will fit in
7457    "quarter-precision" values.  These values are characterised by
7458    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7459    by:
7460
7461    (-1)^s * (n/16) * 2^r
7462
7463    Where:
7464      's' is the sign bit.
7465      'n' is an integer in the range 16 <= n <= 31.
7466      'r' is an integer in the range -3 <= r <= 4.  */
7467
7468 /* Return true iff X can be represented by a quarter-precision
7469    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7470 bool
7471 aarch64_float_const_representable_p (rtx x)
7472 {
7473   /* This represents our current view of how many bits
7474      make up the mantissa.  */
7475   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7476   int exponent;
7477   unsigned HOST_WIDE_INT mantissa, mask;
7478   HOST_WIDE_INT m1, m2;
7479   REAL_VALUE_TYPE r, m;
7480
7481   if (!CONST_DOUBLE_P (x))
7482     return false;
7483
7484   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7485
7486   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7487      know if we have +zero until we analyse the mantissa, but we
7488      can reject the other invalid values.  */
7489   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7490       || REAL_VALUE_MINUS_ZERO (r))
7491     return false;
7492
7493   /* Extract exponent.  */
7494   r = real_value_abs (&r);
7495   exponent = REAL_EXP (&r);
7496
7497   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7498      highest (sign) bit, with a fixed binary point at bit point_pos.
7499      m1 holds the low part of the mantissa, m2 the high part.
7500      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7501      bits for the mantissa, this can fail (low bits will be lost).  */
7502   real_ldexp (&m, &r, point_pos - exponent);
7503   REAL_VALUE_TO_INT (&m1, &m2, m);
7504
7505   /* If the low part of the mantissa has bits set we cannot represent
7506      the value.  */
7507   if (m1 != 0)
7508     return false;
7509   /* We have rejected the lower HOST_WIDE_INT, so update our
7510      understanding of how many bits lie in the mantissa and
7511      look only at the high HOST_WIDE_INT.  */
7512   mantissa = m2;
7513   point_pos -= HOST_BITS_PER_WIDE_INT;
7514
7515   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7516   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7517   if ((mantissa & mask) != 0)
7518     return false;
7519
7520   /* Having filtered unrepresentable values, we may now remove all
7521      but the highest 5 bits.  */
7522   mantissa >>= point_pos - 5;
7523
7524   /* We cannot represent the value 0.0, so reject it.  This is handled
7525      elsewhere.  */
7526   if (mantissa == 0)
7527     return false;
7528
7529   /* Then, as bit 4 is always set, we can mask it off, leaving
7530      the mantissa in the range [0, 15].  */
7531   mantissa &= ~(1 << 4);
7532   gcc_assert (mantissa <= 15);
7533
7534   /* GCC internally does not use IEEE754-like encoding (where normalized
7535      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7536      Our mantissa values are shifted 4 places to the left relative to
7537      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7538      by 5 places to correct for GCC's representation.  */
7539   exponent = 5 - exponent;
7540
7541   return (exponent >= 0 && exponent <= 7);
7542 }
7543
7544 char*
7545 aarch64_output_simd_mov_immediate (rtx const_vector,
7546                                    enum machine_mode mode,
7547                                    unsigned width)
7548 {
7549   bool is_valid;
7550   static char templ[40];
7551   const char *mnemonic;
7552   const char *shift_op;
7553   unsigned int lane_count = 0;
7554   char element_char;
7555
7556   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7557
7558   /* This will return true to show const_vector is legal for use as either
7559      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7560      also update INFO to show how the immediate should be generated.  */
7561   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7562   gcc_assert (is_valid);
7563
7564   element_char = sizetochar (info.element_width);
7565   lane_count = width / info.element_width;
7566
7567   mode = GET_MODE_INNER (mode);
7568   if (mode == SFmode || mode == DFmode)
7569     {
7570       gcc_assert (info.shift == 0 && ! info.mvn);
7571       if (aarch64_float_const_zero_rtx_p (info.value))
7572         info.value = GEN_INT (0);
7573       else
7574         {
7575 #define buf_size 20
7576           REAL_VALUE_TYPE r;
7577           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7578           char float_buf[buf_size] = {'\0'};
7579           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7580 #undef buf_size
7581
7582           if (lane_count == 1)
7583             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7584           else
7585             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7586                       lane_count, element_char, float_buf);
7587           return templ;
7588         }
7589     }
7590
7591   mnemonic = info.mvn ? "mvni" : "movi";
7592   shift_op = info.msl ? "msl" : "lsl";
7593
7594   if (lane_count == 1)
7595     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7596               mnemonic, UINTVAL (info.value));
7597   else if (info.shift)
7598     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7599               ", %s %d", mnemonic, lane_count, element_char,
7600               UINTVAL (info.value), shift_op, info.shift);
7601   else
7602     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7603               mnemonic, lane_count, element_char, UINTVAL (info.value));
7604   return templ;
7605 }
7606
7607 char*
7608 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7609                                           enum machine_mode mode)
7610 {
7611   enum machine_mode vmode;
7612
7613   gcc_assert (!VECTOR_MODE_P (mode));
7614   vmode = aarch64_simd_container_mode (mode, 64);
7615   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7616   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7617 }
7618
7619 /* Split operands into moves from op[1] + op[2] into op[0].  */
7620
7621 void
7622 aarch64_split_combinev16qi (rtx operands[3])
7623 {
7624   unsigned int dest = REGNO (operands[0]);
7625   unsigned int src1 = REGNO (operands[1]);
7626   unsigned int src2 = REGNO (operands[2]);
7627   enum machine_mode halfmode = GET_MODE (operands[1]);
7628   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7629   rtx destlo, desthi;
7630
7631   gcc_assert (halfmode == V16QImode);
7632
7633   if (src1 == dest && src2 == dest + halfregs)
7634     {
7635       /* No-op move.  Can't split to nothing; emit something.  */
7636       emit_note (NOTE_INSN_DELETED);
7637       return;
7638     }
7639
7640   /* Preserve register attributes for variable tracking.  */
7641   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7642   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7643                                GET_MODE_SIZE (halfmode));
7644
7645   /* Special case of reversed high/low parts.  */
7646   if (reg_overlap_mentioned_p (operands[2], destlo)
7647       && reg_overlap_mentioned_p (operands[1], desthi))
7648     {
7649       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7650       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7651       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7652     }
7653   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7654     {
7655       /* Try to avoid unnecessary moves if part of the result
7656          is in the right place already.  */
7657       if (src1 != dest)
7658         emit_move_insn (destlo, operands[1]);
7659       if (src2 != dest + halfregs)
7660         emit_move_insn (desthi, operands[2]);
7661     }
7662   else
7663     {
7664       if (src2 != dest + halfregs)
7665         emit_move_insn (desthi, operands[2]);
7666       if (src1 != dest)
7667         emit_move_insn (destlo, operands[1]);
7668     }
7669 }
7670
7671 /* vec_perm support.  */
7672
7673 #define MAX_VECT_LEN 16
7674
7675 struct expand_vec_perm_d
7676 {
7677   rtx target, op0, op1;
7678   unsigned char perm[MAX_VECT_LEN];
7679   enum machine_mode vmode;
7680   unsigned char nelt;
7681   bool one_vector_p;
7682   bool testing_p;
7683 };
7684
7685 /* Generate a variable permutation.  */
7686
7687 static void
7688 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7689 {
7690   enum machine_mode vmode = GET_MODE (target);
7691   bool one_vector_p = rtx_equal_p (op0, op1);
7692
7693   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7694   gcc_checking_assert (GET_MODE (op0) == vmode);
7695   gcc_checking_assert (GET_MODE (op1) == vmode);
7696   gcc_checking_assert (GET_MODE (sel) == vmode);
7697   gcc_checking_assert (TARGET_SIMD);
7698
7699   if (one_vector_p)
7700     {
7701       if (vmode == V8QImode)
7702         {
7703           /* Expand the argument to a V16QI mode by duplicating it.  */
7704           rtx pair = gen_reg_rtx (V16QImode);
7705           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7706           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7707         }
7708       else
7709         {
7710           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7711         }
7712     }
7713   else
7714     {
7715       rtx pair;
7716
7717       if (vmode == V8QImode)
7718         {
7719           pair = gen_reg_rtx (V16QImode);
7720           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7721           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7722         }
7723       else
7724         {
7725           pair = gen_reg_rtx (OImode);
7726           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7727           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7728         }
7729     }
7730 }
7731
7732 void
7733 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7734 {
7735   enum machine_mode vmode = GET_MODE (target);
7736   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7737   bool one_vector_p = rtx_equal_p (op0, op1);
7738   rtx rmask[MAX_VECT_LEN], mask;
7739
7740   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7741
7742   /* The TBL instruction does not use a modulo index, so we must take care
7743      of that ourselves.  */
7744   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7745   for (i = 0; i < nelt; ++i)
7746     rmask[i] = mask;
7747   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7748   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7749
7750   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7751 }
7752
7753 /* Recognize patterns suitable for the TRN instructions.  */
7754 static bool
7755 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7756 {
7757   unsigned int i, odd, mask, nelt = d->nelt;
7758   rtx out, in0, in1, x;
7759   rtx (*gen) (rtx, rtx, rtx);
7760   enum machine_mode vmode = d->vmode;
7761
7762   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7763     return false;
7764
7765   /* Note that these are little-endian tests.
7766      We correct for big-endian later.  */
7767   if (d->perm[0] == 0)
7768     odd = 0;
7769   else if (d->perm[0] == 1)
7770     odd = 1;
7771   else
7772     return false;
7773   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7774
7775   for (i = 0; i < nelt; i += 2)
7776     {
7777       if (d->perm[i] != i + odd)
7778         return false;
7779       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7780         return false;
7781     }
7782
7783   /* Success!  */
7784   if (d->testing_p)
7785     return true;
7786
7787   in0 = d->op0;
7788   in1 = d->op1;
7789   if (BYTES_BIG_ENDIAN)
7790     {
7791       x = in0, in0 = in1, in1 = x;
7792       odd = !odd;
7793     }
7794   out = d->target;
7795
7796   if (odd)
7797     {
7798       switch (vmode)
7799         {
7800         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7801         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7802         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7803         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7804         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7805         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7806         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7807         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7808         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7809         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7810         default:
7811           return false;
7812         }
7813     }
7814   else
7815     {
7816       switch (vmode)
7817         {
7818         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7819         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7820         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7821         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7822         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7823         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7824         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7825         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7826         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7827         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7828         default:
7829           return false;
7830         }
7831     }
7832
7833   emit_insn (gen (out, in0, in1));
7834   return true;
7835 }
7836
7837 /* Recognize patterns suitable for the UZP instructions.  */
7838 static bool
7839 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7840 {
7841   unsigned int i, odd, mask, nelt = d->nelt;
7842   rtx out, in0, in1, x;
7843   rtx (*gen) (rtx, rtx, rtx);
7844   enum machine_mode vmode = d->vmode;
7845
7846   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7847     return false;
7848
7849   /* Note that these are little-endian tests.
7850      We correct for big-endian later.  */
7851   if (d->perm[0] == 0)
7852     odd = 0;
7853   else if (d->perm[0] == 1)
7854     odd = 1;
7855   else
7856     return false;
7857   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7858
7859   for (i = 0; i < nelt; i++)
7860     {
7861       unsigned elt = (i * 2 + odd) & mask;
7862       if (d->perm[i] != elt)
7863         return false;
7864     }
7865
7866   /* Success!  */
7867   if (d->testing_p)
7868     return true;
7869
7870   in0 = d->op0;
7871   in1 = d->op1;
7872   if (BYTES_BIG_ENDIAN)
7873     {
7874       x = in0, in0 = in1, in1 = x;
7875       odd = !odd;
7876     }
7877   out = d->target;
7878
7879   if (odd)
7880     {
7881       switch (vmode)
7882         {
7883         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7884         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7885         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7886         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7887         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7888         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7889         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7890         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7891         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7892         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7893         default:
7894           return false;
7895         }
7896     }
7897   else
7898     {
7899       switch (vmode)
7900         {
7901         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7902         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7903         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7904         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7905         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7906         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7907         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7908         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7909         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7910         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7911         default:
7912           return false;
7913         }
7914     }
7915
7916   emit_insn (gen (out, in0, in1));
7917   return true;
7918 }
7919
7920 /* Recognize patterns suitable for the ZIP instructions.  */
7921 static bool
7922 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7923 {
7924   unsigned int i, high, mask, nelt = d->nelt;
7925   rtx out, in0, in1, x;
7926   rtx (*gen) (rtx, rtx, rtx);
7927   enum machine_mode vmode = d->vmode;
7928
7929   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7930     return false;
7931
7932   /* Note that these are little-endian tests.
7933      We correct for big-endian later.  */
7934   high = nelt / 2;
7935   if (d->perm[0] == high)
7936     /* Do Nothing.  */
7937     ;
7938   else if (d->perm[0] == 0)
7939     high = 0;
7940   else
7941     return false;
7942   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7943
7944   for (i = 0; i < nelt / 2; i++)
7945     {
7946       unsigned elt = (i + high) & mask;
7947       if (d->perm[i * 2] != elt)
7948         return false;
7949       elt = (elt + nelt) & mask;
7950       if (d->perm[i * 2 + 1] != elt)
7951         return false;
7952     }
7953
7954   /* Success!  */
7955   if (d->testing_p)
7956     return true;
7957
7958   in0 = d->op0;
7959   in1 = d->op1;
7960   if (BYTES_BIG_ENDIAN)
7961     {
7962       x = in0, in0 = in1, in1 = x;
7963       high = !high;
7964     }
7965   out = d->target;
7966
7967   if (high)
7968     {
7969       switch (vmode)
7970         {
7971         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7972         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7973         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7974         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7975         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7976         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7977         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7978         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7979         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7980         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7981         default:
7982           return false;
7983         }
7984     }
7985   else
7986     {
7987       switch (vmode)
7988         {
7989         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7990         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7991         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7992         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7993         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7994         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7995         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7996         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7997         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7998         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7999         default:
8000           return false;
8001         }
8002     }
8003
8004   emit_insn (gen (out, in0, in1));
8005   return true;
8006 }
8007
8008 static bool
8009 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8010 {
8011   rtx (*gen) (rtx, rtx, rtx);
8012   rtx out = d->target;
8013   rtx in0;
8014   enum machine_mode vmode = d->vmode;
8015   unsigned int i, elt, nelt = d->nelt;
8016   rtx lane;
8017
8018   /* TODO: This may not be big-endian safe.  */
8019   if (BYTES_BIG_ENDIAN)
8020     return false;
8021
8022   elt = d->perm[0];
8023   for (i = 1; i < nelt; i++)
8024     {
8025       if (elt != d->perm[i])
8026         return false;
8027     }
8028
8029   /* The generic preparation in aarch64_expand_vec_perm_const_1
8030      swaps the operand order and the permute indices if it finds
8031      d->perm[0] to be in the second operand.  Thus, we can always
8032      use d->op0 and need not do any extra arithmetic to get the
8033      correct lane number.  */
8034   in0 = d->op0;
8035   lane = GEN_INT (elt);
8036
8037   switch (vmode)
8038     {
8039     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8040     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8041     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8042     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8043     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8044     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8045     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8046     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8047     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8048     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8049     default:
8050       return false;
8051     }
8052
8053   emit_insn (gen (out, in0, lane));
8054   return true;
8055 }
8056
8057 static bool
8058 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8059 {
8060   rtx rperm[MAX_VECT_LEN], sel;
8061   enum machine_mode vmode = d->vmode;
8062   unsigned int i, nelt = d->nelt;
8063
8064   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8065      numbering of elements for big-endian, we must reverse the order.  */
8066   if (BYTES_BIG_ENDIAN)
8067     return false;
8068
8069   if (d->testing_p)
8070     return true;
8071
8072   /* Generic code will try constant permutation twice.  Once with the
8073      original mode and again with the elements lowered to QImode.
8074      So wait and don't do the selector expansion ourselves.  */
8075   if (vmode != V8QImode && vmode != V16QImode)
8076     return false;
8077
8078   for (i = 0; i < nelt; ++i)
8079     rperm[i] = GEN_INT (d->perm[i]);
8080   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8081   sel = force_reg (vmode, sel);
8082
8083   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8084   return true;
8085 }
8086
8087 static bool
8088 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8089 {
8090   /* The pattern matching functions above are written to look for a small
8091      number to begin the sequence (0, 1, N/2).  If we begin with an index
8092      from the second operand, we can swap the operands.  */
8093   if (d->perm[0] >= d->nelt)
8094     {
8095       unsigned i, nelt = d->nelt;
8096       rtx x;
8097
8098       for (i = 0; i < nelt; ++i)
8099         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8100
8101       x = d->op0;
8102       d->op0 = d->op1;
8103       d->op1 = x;
8104     }
8105
8106   if (TARGET_SIMD)
8107     {
8108       if (aarch64_evpc_zip (d))
8109         return true;
8110       else if (aarch64_evpc_uzp (d))
8111         return true;
8112       else if (aarch64_evpc_trn (d))
8113         return true;
8114       else if (aarch64_evpc_dup (d))
8115         return true;
8116       return aarch64_evpc_tbl (d);
8117     }
8118   return false;
8119 }
8120
8121 /* Expand a vec_perm_const pattern.  */
8122
8123 bool
8124 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8125 {
8126   struct expand_vec_perm_d d;
8127   int i, nelt, which;
8128
8129   d.target = target;
8130   d.op0 = op0;
8131   d.op1 = op1;
8132
8133   d.vmode = GET_MODE (target);
8134   gcc_assert (VECTOR_MODE_P (d.vmode));
8135   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8136   d.testing_p = false;
8137
8138   for (i = which = 0; i < nelt; ++i)
8139     {
8140       rtx e = XVECEXP (sel, 0, i);
8141       int ei = INTVAL (e) & (2 * nelt - 1);
8142       which |= (ei < nelt ? 1 : 2);
8143       d.perm[i] = ei;
8144     }
8145
8146   switch (which)
8147     {
8148     default:
8149       gcc_unreachable ();
8150
8151     case 3:
8152       d.one_vector_p = false;
8153       if (!rtx_equal_p (op0, op1))
8154         break;
8155
8156       /* The elements of PERM do not suggest that only the first operand
8157          is used, but both operands are identical.  Allow easier matching
8158          of the permutation by folding the permutation into the single
8159          input vector.  */
8160       /* Fall Through.  */
8161     case 2:
8162       for (i = 0; i < nelt; ++i)
8163         d.perm[i] &= nelt - 1;
8164       d.op0 = op1;
8165       d.one_vector_p = true;
8166       break;
8167
8168     case 1:
8169       d.op1 = op0;
8170       d.one_vector_p = true;
8171       break;
8172     }
8173
8174   return aarch64_expand_vec_perm_const_1 (&d);
8175 }
8176
8177 static bool
8178 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8179                                      const unsigned char *sel)
8180 {
8181   struct expand_vec_perm_d d;
8182   unsigned int i, nelt, which;
8183   bool ret;
8184
8185   d.vmode = vmode;
8186   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8187   d.testing_p = true;
8188   memcpy (d.perm, sel, nelt);
8189
8190   /* Calculate whether all elements are in one vector.  */
8191   for (i = which = 0; i < nelt; ++i)
8192     {
8193       unsigned char e = d.perm[i];
8194       gcc_assert (e < 2 * nelt);
8195       which |= (e < nelt ? 1 : 2);
8196     }
8197
8198   /* If all elements are from the second vector, reindex as if from the
8199      first vector.  */
8200   if (which == 2)
8201     for (i = 0; i < nelt; ++i)
8202       d.perm[i] -= nelt;
8203
8204   /* Check whether the mask can be applied to a single vector.  */
8205   d.one_vector_p = (which != 3);
8206
8207   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8208   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8209   if (!d.one_vector_p)
8210     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8211
8212   start_sequence ();
8213   ret = aarch64_expand_vec_perm_const_1 (&d);
8214   end_sequence ();
8215
8216   return ret;
8217 }
8218
8219 #undef TARGET_ADDRESS_COST
8220 #define TARGET_ADDRESS_COST aarch64_address_cost
8221
8222 /* This hook will determines whether unnamed bitfields affect the alignment
8223    of the containing structure.  The hook returns true if the structure
8224    should inherit the alignment requirements of an unnamed bitfield's
8225    type.  */
8226 #undef TARGET_ALIGN_ANON_BITFIELD
8227 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8228
8229 #undef TARGET_ASM_ALIGNED_DI_OP
8230 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8231
8232 #undef TARGET_ASM_ALIGNED_HI_OP
8233 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8234
8235 #undef TARGET_ASM_ALIGNED_SI_OP
8236 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8237
8238 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8239 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8240   hook_bool_const_tree_hwi_hwi_const_tree_true
8241
8242 #undef TARGET_ASM_FILE_START
8243 #define TARGET_ASM_FILE_START aarch64_start_file
8244
8245 #undef TARGET_ASM_OUTPUT_MI_THUNK
8246 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8247
8248 #undef TARGET_ASM_SELECT_RTX_SECTION
8249 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8250
8251 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8252 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8253
8254 #undef TARGET_BUILD_BUILTIN_VA_LIST
8255 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8256
8257 #undef TARGET_CALLEE_COPIES
8258 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8259
8260 #undef TARGET_CAN_ELIMINATE
8261 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8262
8263 #undef TARGET_CANNOT_FORCE_CONST_MEM
8264 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8265
8266 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8267 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8268
8269 /* Only the least significant bit is used for initialization guard
8270    variables.  */
8271 #undef TARGET_CXX_GUARD_MASK_BIT
8272 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8273
8274 #undef TARGET_C_MODE_FOR_SUFFIX
8275 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8276
8277 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8278 #undef  TARGET_DEFAULT_TARGET_FLAGS
8279 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8280 #endif
8281
8282 #undef TARGET_CLASS_MAX_NREGS
8283 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8284
8285 #undef TARGET_BUILTIN_DECL
8286 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8287
8288 #undef  TARGET_EXPAND_BUILTIN
8289 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8290
8291 #undef TARGET_EXPAND_BUILTIN_VA_START
8292 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8293
8294 #undef TARGET_FOLD_BUILTIN
8295 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8296
8297 #undef TARGET_FUNCTION_ARG
8298 #define TARGET_FUNCTION_ARG aarch64_function_arg
8299
8300 #undef TARGET_FUNCTION_ARG_ADVANCE
8301 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8302
8303 #undef TARGET_FUNCTION_ARG_BOUNDARY
8304 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8305
8306 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8307 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8308
8309 #undef TARGET_FUNCTION_VALUE
8310 #define TARGET_FUNCTION_VALUE aarch64_function_value
8311
8312 #undef TARGET_FUNCTION_VALUE_REGNO_P
8313 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8314
8315 #undef TARGET_FRAME_POINTER_REQUIRED
8316 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8317
8318 #undef TARGET_GIMPLE_FOLD_BUILTIN
8319 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8320
8321 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8322 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8323
8324 #undef  TARGET_INIT_BUILTINS
8325 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8326
8327 #undef TARGET_LEGITIMATE_ADDRESS_P
8328 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8329
8330 #undef TARGET_LEGITIMATE_CONSTANT_P
8331 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8332
8333 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8334 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8335
8336 #undef TARGET_LRA_P
8337 #define TARGET_LRA_P aarch64_lra_p
8338
8339 #undef TARGET_MANGLE_TYPE
8340 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8341
8342 #undef TARGET_MEMORY_MOVE_COST
8343 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8344
8345 #undef TARGET_MUST_PASS_IN_STACK
8346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8347
8348 /* This target hook should return true if accesses to volatile bitfields
8349    should use the narrowest mode possible.  It should return false if these
8350    accesses should use the bitfield container type.  */
8351 #undef TARGET_NARROW_VOLATILE_BITFIELD
8352 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8353
8354 #undef  TARGET_OPTION_OVERRIDE
8355 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8356
8357 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8358 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8359   aarch64_override_options_after_change
8360
8361 #undef TARGET_PASS_BY_REFERENCE
8362 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8363
8364 #undef TARGET_PREFERRED_RELOAD_CLASS
8365 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8366
8367 #undef TARGET_SECONDARY_RELOAD
8368 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8369
8370 #undef TARGET_SHIFT_TRUNCATION_MASK
8371 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8372
8373 #undef TARGET_SETUP_INCOMING_VARARGS
8374 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8375
8376 #undef TARGET_STRUCT_VALUE_RTX
8377 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8378
8379 #undef TARGET_REGISTER_MOVE_COST
8380 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8381
8382 #undef TARGET_RETURN_IN_MEMORY
8383 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8384
8385 #undef TARGET_RETURN_IN_MSB
8386 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8387
8388 #undef TARGET_RTX_COSTS
8389 #define TARGET_RTX_COSTS aarch64_rtx_costs
8390
8391 #undef TARGET_TRAMPOLINE_INIT
8392 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8393
8394 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8395 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8396
8397 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8398 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8399
8400 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8401 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8402
8403 #undef TARGET_VECTORIZE_ADD_STMT_COST
8404 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8405
8406 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8407 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8408   aarch64_builtin_vectorization_cost
8409
8410 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8411 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8412
8413 #undef TARGET_VECTORIZE_BUILTINS
8414 #define TARGET_VECTORIZE_BUILTINS
8415
8416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8418   aarch64_builtin_vectorized_function
8419
8420 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8421 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8422   aarch64_autovectorize_vector_sizes
8423
8424 /* Section anchor support.  */
8425
8426 #undef TARGET_MIN_ANCHOR_OFFSET
8427 #define TARGET_MIN_ANCHOR_OFFSET -256
8428
8429 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8430    byte offset; we can do much more for larger data types, but have no way
8431    to determine the size of the access.  We assume accesses are aligned.  */
8432 #undef TARGET_MAX_ANCHOR_OFFSET
8433 #define TARGET_MAX_ANCHOR_OFFSET 4095
8434
8435 #undef TARGET_VECTOR_ALIGNMENT
8436 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8437
8438 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8439 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8440   aarch64_simd_vector_alignment_reachable
8441
8442 /* vec_perm support.  */
8443
8444 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8445 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8446   aarch64_vectorize_vec_perm_const_ok
8447
8448
8449 #undef TARGET_FIXED_CONDITION_CODE_REGS
8450 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8451
8452 struct gcc_target targetm = TARGET_INITIALIZER;
8453
8454 #include "gt-aarch64.h"