gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2015 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stringpool.h"
  40 #include "stor-layout.h"
  41 #include "calls.h"
  42 #include "varasm.h"
  43 #include "regs.h"
  44 #include "dominance.h"
  45 #include "cfg.h"
  46 #include "cfgrtl.h"
  47 #include "cfganal.h"
  48 #include "lcm.h"
  49 #include "cfgbuild.h"
  50 #include "cfgcleanup.h"
  51 #include "predict.h"
  52 #include "basic-block.h"
  53 #include "df.h"
  54 #include "hard-reg-set.h"
  55 #include "output.h"
  56 #include "hashtab.h"
  57 #include "function.h"
  58 #include "flags.h"
  59 #include "statistics.h"
  60 #include "real.h"
  61 #include "fixed-value.h"
  62 #include "insn-config.h"
  63 #include "expmed.h"
  64 #include "dojump.h"
  65 #include "explow.h"
  66 #include "emit-rtl.h"
  67 #include "stmt.h"
  68 #include "expr.h"
  69 #include "reload.h"
  70 #include "toplev.h"
  71 #include "target.h"
  72 #include "target-def.h"
  73 #include "targhooks.h"
  74 #include "ggc.h"
  75 #include "tm_p.h"
  76 #include "recog.h"
  77 #include "langhooks.h"
  78 #include "diagnostic-core.h"
  79 #include "hash-table.h"
  80 #include "tree-ssa-alias.h"
  81 #include "internal-fn.h"
  82 #include "gimple-fold.h"
  83 #include "tree-eh.h"
  84 #include "gimple-expr.h"
  85 #include "is-a.h"
  86 #include "gimple.h"
  87 #include "gimplify.h"
  88 #include "optabs.h"
  89 #include "dwarf2.h"
  90 #include "cfgloop.h"
  91 #include "tree-vectorizer.h"
  92 #include "aarch64-cost-tables.h"
  93 #include "dumpfile.h"
  94 #include "builtins.h"
  95 #include "rtl-iter.h"
  96 #include "tm-constrs.h"
  97 #include "sched-int.h"
  98
  99 /* Defined for convenience.  */
 100 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
 101
 102 /* Classifies an address.
 103
 104    ADDRESS_REG_IMM
 105        A simple base register plus immediate offset.
 106
 107    ADDRESS_REG_WB
 108        A base register indexed by immediate offset with writeback.
 109
 110    ADDRESS_REG_REG
 111        A base register indexed by (optionally scaled) register.
 112
 113    ADDRESS_REG_UXTW
 114        A base register indexed by (optionally scaled) zero-extended register.
 115
 116    ADDRESS_REG_SXTW
 117        A base register indexed by (optionally scaled) sign-extended register.
 118
 119    ADDRESS_LO_SUM
 120        A LO_SUM rtx with a base register and "LO12" symbol relocation.
 121
 122    ADDRESS_SYMBOLIC:
 123        A constant symbolic address, in pc-relative literal pool.  */
 124
 125 enum aarch64_address_type {
 126   ADDRESS_REG_IMM,
 127   ADDRESS_REG_WB,
 128   ADDRESS_REG_REG,
 129   ADDRESS_REG_UXTW,
 130   ADDRESS_REG_SXTW,
 131   ADDRESS_LO_SUM,
 132   ADDRESS_SYMBOLIC
 133 };
 134
 135 struct aarch64_address_info {
 136   enum aarch64_address_type type;
 137   rtx base;
 138   rtx offset;
 139   int shift;
 140   enum aarch64_symbol_type symbol_type;
 141 };
 142
 143 struct simd_immediate_info
 144 {
 145   rtx value;
 146   int shift;
 147   int element_width;
 148   bool mvn;
 149   bool msl;
 150 };
 151
 152 /* The current code model.  */
 153 enum aarch64_code_model aarch64_cmodel;
 154
 155 #ifdef HAVE_AS_TLS
 156 #undef TARGET_HAVE_TLS
 157 #define TARGET_HAVE_TLS 1
 158 #endif
 159
 160 static bool aarch64_composite_type_p (const_tree, machine_mode);
 161 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
 162                                                      const_tree,
 163                                                      machine_mode *, int *,
 164                                                      bool *);
 165 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 166 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 167 static void aarch64_override_options_after_change (void);
 168 static bool aarch64_vector_mode_supported_p (machine_mode);
 169 static unsigned bit_count (unsigned HOST_WIDE_INT);
 170 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 171                                                  const unsigned char *sel);
 172 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
 173
 174 /* Major revision number of the ARM Architecture implemented by the target.  */
 175 unsigned aarch64_architecture_version;
 176
 177 /* The processor for which instructions should be scheduled.  */
 178 enum aarch64_processor aarch64_tune = cortexa53;
 179
 180 /* The current tuning set.  */
 181 const struct tune_params *aarch64_tune_params;
 182
 183 /* Mask to specify which instructions we are allowed to generate.  */
 184 unsigned long aarch64_isa_flags = 0;
 185
 186 /* Mask to specify which instruction scheduling options should be used.  */
 187 unsigned long aarch64_tune_flags = 0;
 188
 189 /* Tuning parameters.  */
 190
 191 #if HAVE_DESIGNATED_INITIALIZERS
 192 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 193 #else
 194 #define NAMED_PARAM(NAME, VAL) (VAL)
 195 #endif
 196
 197 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 198 __extension__
 199 #endif
 200
 201 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 202 __extension__
 203 #endif
 204 static const struct cpu_addrcost_table generic_addrcost_table =
 205 {
 206 #if HAVE_DESIGNATED_INITIALIZERS
 207   .addr_scale_costs =
 208 #endif
 209     {
 210       NAMED_PARAM (hi, 0),
 211       NAMED_PARAM (si, 0),
 212       NAMED_PARAM (di, 0),
 213       NAMED_PARAM (ti, 0),
 214     },
 215   NAMED_PARAM (pre_modify, 0),
 216   NAMED_PARAM (post_modify, 0),
 217   NAMED_PARAM (register_offset, 0),
 218   NAMED_PARAM (register_extend, 0),
 219   NAMED_PARAM (imm_offset, 0)
 220 };
 221
 222 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 223 __extension__
 224 #endif
 225 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 226 {
 227 #if HAVE_DESIGNATED_INITIALIZERS
 228   .addr_scale_costs =
 229 #endif
 230     {
 231       NAMED_PARAM (hi, 1),
 232       NAMED_PARAM (si, 0),
 233       NAMED_PARAM (di, 0),
 234       NAMED_PARAM (ti, 1),
 235     },
 236   NAMED_PARAM (pre_modify, 0),
 237   NAMED_PARAM (post_modify, 0),
 238   NAMED_PARAM (register_offset, 0),
 239   NAMED_PARAM (register_extend, 0),
 240   NAMED_PARAM (imm_offset, 0),
 241 };
 242
 243 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 244 __extension__
 245 #endif
 246 static const struct cpu_addrcost_table xgene1_addrcost_table =
 247 {
 248 #if HAVE_DESIGNATED_INITIALIZERS
 249   .addr_scale_costs =
 250 #endif
 251     {
 252       NAMED_PARAM (hi, 1),
 253       NAMED_PARAM (si, 0),
 254       NAMED_PARAM (di, 0),
 255       NAMED_PARAM (ti, 1),
 256     },
 257   NAMED_PARAM (pre_modify, 1),
 258   NAMED_PARAM (post_modify, 0),
 259   NAMED_PARAM (register_offset, 0),
 260   NAMED_PARAM (register_extend, 1),
 261   NAMED_PARAM (imm_offset, 0),
 262 };
 263
 264 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 265 __extension__
 266 #endif
 267 static const struct cpu_regmove_cost generic_regmove_cost =
 268 {
 269   NAMED_PARAM (GP2GP, 1),
 270   /* Avoid the use of slow int<->fp moves for spilling by setting
 271      their cost higher than memmov_cost.  */
 272   NAMED_PARAM (GP2FP, 5),
 273   NAMED_PARAM (FP2GP, 5),
 274   NAMED_PARAM (FP2FP, 2)
 275 };
 276
 277 static const struct cpu_regmove_cost cortexa57_regmove_cost =
 278 {
 279   NAMED_PARAM (GP2GP, 1),
 280   /* Avoid the use of slow int<->fp moves for spilling by setting
 281      their cost higher than memmov_cost.  */
 282   NAMED_PARAM (GP2FP, 5),
 283   NAMED_PARAM (FP2GP, 5),
 284   NAMED_PARAM (FP2FP, 2)
 285 };
 286
 287 static const struct cpu_regmove_cost cortexa53_regmove_cost =
 288 {
 289   NAMED_PARAM (GP2GP, 1),
 290   /* Avoid the use of slow int<->fp moves for spilling by setting
 291      their cost higher than memmov_cost.  */
 292   NAMED_PARAM (GP2FP, 5),
 293   NAMED_PARAM (FP2GP, 5),
 294   NAMED_PARAM (FP2FP, 2)
 295 };
 296
 297 static const struct cpu_regmove_cost thunderx_regmove_cost =
 298 {
 299   NAMED_PARAM (GP2GP, 2),
 300   NAMED_PARAM (GP2FP, 2),
 301   NAMED_PARAM (FP2GP, 6),
 302   NAMED_PARAM (FP2FP, 4)
 303 };
 304
 305 static const struct cpu_regmove_cost xgene1_regmove_cost =
 306 {
 307   NAMED_PARAM (GP2GP, 1),
 308   /* Avoid the use of slow int<->fp moves for spilling by setting
 309      their cost higher than memmov_cost.  */
 310   NAMED_PARAM (GP2FP, 8),
 311   NAMED_PARAM (FP2GP, 8),
 312   NAMED_PARAM (FP2FP, 2)
 313 };
 314
 315 /* Generic costs for vector insn classes.  */
 316 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 317 __extension__
 318 #endif
 319 static const struct cpu_vector_cost generic_vector_cost =
 320 {
 321   NAMED_PARAM (scalar_stmt_cost, 1),
 322   NAMED_PARAM (scalar_load_cost, 1),
 323   NAMED_PARAM (scalar_store_cost, 1),
 324   NAMED_PARAM (vec_stmt_cost, 1),
 325   NAMED_PARAM (vec_to_scalar_cost, 1),
 326   NAMED_PARAM (scalar_to_vec_cost, 1),
 327   NAMED_PARAM (vec_align_load_cost, 1),
 328   NAMED_PARAM (vec_unalign_load_cost, 1),
 329   NAMED_PARAM (vec_unalign_store_cost, 1),
 330   NAMED_PARAM (vec_store_cost, 1),
 331   NAMED_PARAM (cond_taken_branch_cost, 3),
 332   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 333 };
 334
 335 /* Generic costs for vector insn classes.  */
 336 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 337 __extension__
 338 #endif
 339 static const struct cpu_vector_cost cortexa57_vector_cost =
 340 {
 341   NAMED_PARAM (scalar_stmt_cost, 1),
 342   NAMED_PARAM (scalar_load_cost, 4),
 343   NAMED_PARAM (scalar_store_cost, 1),
 344   NAMED_PARAM (vec_stmt_cost, 3),
 345   NAMED_PARAM (vec_to_scalar_cost, 8),
 346   NAMED_PARAM (scalar_to_vec_cost, 8),
 347   NAMED_PARAM (vec_align_load_cost, 5),
 348   NAMED_PARAM (vec_unalign_load_cost, 5),
 349   NAMED_PARAM (vec_unalign_store_cost, 1),
 350   NAMED_PARAM (vec_store_cost, 1),
 351   NAMED_PARAM (cond_taken_branch_cost, 1),
 352   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 353 };
 354
 355 /* Generic costs for vector insn classes.  */
 356 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 357 __extension__
 358 #endif
 359 static const struct cpu_vector_cost xgene1_vector_cost =
 360 {
 361   NAMED_PARAM (scalar_stmt_cost, 1),
 362   NAMED_PARAM (scalar_load_cost, 5),
 363   NAMED_PARAM (scalar_store_cost, 1),
 364   NAMED_PARAM (vec_stmt_cost, 2),
 365   NAMED_PARAM (vec_to_scalar_cost, 4),
 366   NAMED_PARAM (scalar_to_vec_cost, 4),
 367   NAMED_PARAM (vec_align_load_cost, 10),
 368   NAMED_PARAM (vec_unalign_load_cost, 10),
 369   NAMED_PARAM (vec_unalign_store_cost, 2),
 370   NAMED_PARAM (vec_store_cost, 2),
 371   NAMED_PARAM (cond_taken_branch_cost, 2),
 372   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 373 };
 374
 375 #define AARCH64_FUSE_NOTHING    (0)
 376 #define AARCH64_FUSE_MOV_MOVK   (1 << 0)
 377 #define AARCH64_FUSE_ADRP_ADD   (1 << 1)
 378 #define AARCH64_FUSE_MOVK_MOVK  (1 << 2)
 379 #define AARCH64_FUSE_ADRP_LDR   (1 << 3)
 380 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
 381
 382 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 383 __extension__
 384 #endif
 385 static const struct tune_params generic_tunings =
 386 {
 387   &cortexa57_extra_costs,
 388   &generic_addrcost_table,
 389   &generic_regmove_cost,
 390   &generic_vector_cost,
 391   NAMED_PARAM (memmov_cost, 4),
 392   NAMED_PARAM (issue_rate, 2),
 393   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
 394   8,    /* function_align.  */
 395   8,    /* jump_align.  */
 396   4,    /* loop_align.  */
 397   2,    /* int_reassoc_width.  */
 398   4,    /* fp_reassoc_width.  */
 399   1     /* vec_reassoc_width.  */
 400 };
 401
 402 static const struct tune_params cortexa53_tunings =
 403 {
 404   &cortexa53_extra_costs,
 405   &generic_addrcost_table,
 406   &cortexa53_regmove_cost,
 407   &generic_vector_cost,
 408   NAMED_PARAM (memmov_cost, 4),
 409   NAMED_PARAM (issue_rate, 2),
 410   NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
 411                              | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
 412   8,    /* function_align.  */
 413   8,    /* jump_align.  */
 414   4,    /* loop_align.  */
 415   2,    /* int_reassoc_width.  */
 416   4,    /* fp_reassoc_width.  */
 417   1     /* vec_reassoc_width.  */
 418 };
 419
 420 static const struct tune_params cortexa57_tunings =
 421 {
 422   &cortexa57_extra_costs,
 423   &cortexa57_addrcost_table,
 424   &cortexa57_regmove_cost,
 425   &cortexa57_vector_cost,
 426   NAMED_PARAM (memmov_cost, 4),
 427   NAMED_PARAM (issue_rate, 3),
 428   NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
 429   16,   /* function_align.  */
 430   8,    /* jump_align.  */
 431   4,    /* loop_align.  */
 432   2,    /* int_reassoc_width.  */
 433   4,    /* fp_reassoc_width.  */
 434   1     /* vec_reassoc_width.  */
 435 };
 436
 437 static const struct tune_params thunderx_tunings =
 438 {
 439   &thunderx_extra_costs,
 440   &generic_addrcost_table,
 441   &thunderx_regmove_cost,
 442   &generic_vector_cost,
 443   NAMED_PARAM (memmov_cost, 6),
 444   NAMED_PARAM (issue_rate, 2),
 445   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
 446   8,    /* function_align.  */
 447   8,    /* jump_align.  */
 448   8,    /* loop_align.  */
 449   2,    /* int_reassoc_width.  */
 450   4,    /* fp_reassoc_width.  */
 451   1     /* vec_reassoc_width.  */
 452 };
 453
 454 static const struct tune_params xgene1_tunings =
 455 {
 456   &xgene1_extra_costs,
 457   &xgene1_addrcost_table,
 458   &xgene1_regmove_cost,
 459   &xgene1_vector_cost,
 460   NAMED_PARAM (memmov_cost, 6),
 461   NAMED_PARAM (issue_rate, 4),
 462   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
 463   16,   /* function_align.  */
 464   8,    /* jump_align.  */
 465   16,   /* loop_align.  */
 466   2,    /* int_reassoc_width.  */
 467   4,    /* fp_reassoc_width.  */
 468   1     /* vec_reassoc_width.  */
 469 };
 470
 471 /* A processor implementing AArch64.  */
 472 struct processor
 473 {
 474   const char *const name;
 475   enum aarch64_processor core;
 476   const char *arch;
 477   unsigned architecture_version;
 478   const unsigned long flags;
 479   const struct tune_params *const tune;
 480 };
 481
 482 /* Processor cores implementing AArch64.  */
 483 static const struct processor all_cores[] =
 484 {
 485 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
 486   {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
 487 #include "aarch64-cores.def"
 488 #undef AARCH64_CORE
 489   {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
 490   {NULL, aarch64_none, NULL, 0, 0, NULL}
 491 };
 492
 493 /* Architectures implementing AArch64.  */
 494 static const struct processor all_architectures[] =
 495 {
 496 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 497   {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
 498 #include "aarch64-arches.def"
 499 #undef AARCH64_ARCH
 500   {NULL, aarch64_none, NULL, 0, 0, NULL}
 501 };
 502
 503 /* Target specification.  These are populated as commandline arguments
 504    are processed, or NULL if not specified.  */
 505 static const struct processor *selected_arch;
 506 static const struct processor *selected_cpu;
 507 static const struct processor *selected_tune;
 508
 509 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 510
 511 /* An ISA extension in the co-processor and main instruction set space.  */
 512 struct aarch64_option_extension
 513 {
 514   const char *const name;
 515   const unsigned long flags_on;
 516   const unsigned long flags_off;
 517 };
 518
 519 /* ISA extensions in AArch64.  */
 520 static const struct aarch64_option_extension all_extensions[] =
 521 {
 522 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 523   {NAME, FLAGS_ON, FLAGS_OFF},
 524 #include "aarch64-option-extensions.def"
 525 #undef AARCH64_OPT_EXTENSION
 526   {NULL, 0, 0}
 527 };
 528
 529 /* Used to track the size of an address when generating a pre/post
 530    increment address.  */
 531 static machine_mode aarch64_memory_reference_mode;
 532
 533 /* Used to force GTY into this file.  */
 534 static GTY(()) int gty_dummy;
 535
 536 /* A table of valid AArch64 "bitmask immediate" values for
 537    logical instructions.  */
 538
 539 #define AARCH64_NUM_BITMASKS  5334
 540 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 541
 542 typedef enum aarch64_cond_code
 543 {
 544   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 545   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 546   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 547 }
 548 aarch64_cc;
 549
 550 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 551
 552 /* The condition codes of the processor, and the inverse function.  */
 553 static const char * const aarch64_condition_codes[] =
 554 {
 555   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 556   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 557 };
 558
 559 static unsigned int
 560 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
 561 {
 562   return 2;
 563 }
 564
 565 static int
 566 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
 567                              enum machine_mode mode)
 568 {
 569   if (VECTOR_MODE_P (mode))
 570     return aarch64_tune_params->vec_reassoc_width;
 571   if (INTEGRAL_MODE_P (mode))
 572     return aarch64_tune_params->int_reassoc_width;
 573   if (FLOAT_MODE_P (mode))
 574     return aarch64_tune_params->fp_reassoc_width;
 575   return 1;
 576 }
 577
 578 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 579 unsigned
 580 aarch64_dbx_register_number (unsigned regno)
 581 {
 582    if (GP_REGNUM_P (regno))
 583      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 584    else if (regno == SP_REGNUM)
 585      return AARCH64_DWARF_SP;
 586    else if (FP_REGNUM_P (regno))
 587      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 588
 589    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 590       equivalent DWARF register.  */
 591    return DWARF_FRAME_REGISTERS;
 592 }
 593
 594 /* Return TRUE if MODE is any of the large INT modes.  */
 595 static bool
 596 aarch64_vect_struct_mode_p (machine_mode mode)
 597 {
 598   return mode == OImode || mode == CImode || mode == XImode;
 599 }
 600
 601 /* Return TRUE if MODE is any of the vector modes.  */
 602 static bool
 603 aarch64_vector_mode_p (machine_mode mode)
 604 {
 605   return aarch64_vector_mode_supported_p (mode)
 606          || aarch64_vect_struct_mode_p (mode);
 607 }
 608
 609 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 610 static bool
 611 aarch64_array_mode_supported_p (machine_mode mode,
 612                                 unsigned HOST_WIDE_INT nelems)
 613 {
 614   if (TARGET_SIMD
 615       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 616       && (nelems >= 2 && nelems <= 4))
 617     return true;
 618
 619   return false;
 620 }
 621
 622 /* Implement HARD_REGNO_NREGS.  */
 623
 624 int
 625 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
 626 {
 627   switch (aarch64_regno_regclass (regno))
 628     {
 629     case FP_REGS:
 630     case FP_LO_REGS:
 631       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 632     default:
 633       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 634     }
 635   gcc_unreachable ();
 636 }
 637
 638 /* Implement HARD_REGNO_MODE_OK.  */
 639
 640 int
 641 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
 642 {
 643   if (GET_MODE_CLASS (mode) == MODE_CC)
 644     return regno == CC_REGNUM;
 645
 646   if (regno == SP_REGNUM)
 647     /* The purpose of comparing with ptr_mode is to support the
 648        global register variable associated with the stack pointer
 649        register via the syntax of asm ("wsp") in ILP32.  */
 650     return mode == Pmode || mode == ptr_mode;
 651
 652   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 653     return mode == Pmode;
 654
 655   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 656     return 1;
 657
 658   if (FP_REGNUM_P (regno))
 659     {
 660       if (aarch64_vect_struct_mode_p (mode))
 661         return
 662           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 663       else
 664         return 1;
 665     }
 666
 667   return 0;
 668 }
 669
 670 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 671 machine_mode
 672 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 673                                      machine_mode mode)
 674 {
 675   /* Handle modes that fit within single registers.  */
 676   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 677     {
 678       if (GET_MODE_SIZE (mode) >= 4)
 679         return mode;
 680       else
 681         return SImode;
 682     }
 683   /* Fall back to generic for multi-reg and very large modes.  */
 684   else
 685     return choose_hard_reg_mode (regno, nregs, false);
 686 }
 687
 688 /* Return true if calls to DECL should be treated as
 689    long-calls (ie called via a register).  */
 690 static bool
 691 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 692 {
 693   return false;
 694 }
 695
 696 /* Return true if calls to symbol-ref SYM should be treated as
 697    long-calls (ie called via a register).  */
 698 bool
 699 aarch64_is_long_call_p (rtx sym)
 700 {
 701   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 702 }
 703
 704 /* Return true if the offsets to a zero/sign-extract operation
 705    represent an expression that matches an extend operation.  The
 706    operands represent the paramters from
 707
 708    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 709 bool
 710 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
 711                                 rtx extract_imm)
 712 {
 713   HOST_WIDE_INT mult_val, extract_val;
 714
 715   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 716     return false;
 717
 718   mult_val = INTVAL (mult_imm);
 719   extract_val = INTVAL (extract_imm);
 720
 721   if (extract_val > 8
 722       && extract_val < GET_MODE_BITSIZE (mode)
 723       && exact_log2 (extract_val & ~7) > 0
 724       && (extract_val & 7) <= 4
 725       && mult_val == (1 << (extract_val & 7)))
 726     return true;
 727
 728   return false;
 729 }
 730
 731 /* Emit an insn that's a simple single-set.  Both the operands must be
 732    known to be valid.  */
 733 inline static rtx
 734 emit_set_insn (rtx x, rtx y)
 735 {
 736   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 737 }
 738
 739 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 740    return the rtx for register 0 in the proper mode.  */
 741 rtx
 742 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 743 {
 744   machine_mode mode = SELECT_CC_MODE (code, x, y);
 745   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 746
 747   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 748   return cc_reg;
 749 }
 750
 751 /* Build the SYMBOL_REF for __tls_get_addr.  */
 752
 753 static GTY(()) rtx tls_get_addr_libfunc;
 754
 755 rtx
 756 aarch64_tls_get_addr (void)
 757 {
 758   if (!tls_get_addr_libfunc)
 759     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 760   return tls_get_addr_libfunc;
 761 }
 762
 763 /* Return the TLS model to use for ADDR.  */
 764
 765 static enum tls_model
 766 tls_symbolic_operand_type (rtx addr)
 767 {
 768   enum tls_model tls_kind = TLS_MODEL_NONE;
 769   rtx sym, addend;
 770
 771   if (GET_CODE (addr) == CONST)
 772     {
 773       split_const (addr, &sym, &addend);
 774       if (GET_CODE (sym) == SYMBOL_REF)
 775         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 776     }
 777   else if (GET_CODE (addr) == SYMBOL_REF)
 778     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 779
 780   return tls_kind;
 781 }
 782
 783 /* We'll allow lo_sum's in addresses in our legitimate addresses
 784    so that combine would take care of combining addresses where
 785    necessary, but for generation purposes, we'll generate the address
 786    as :
 787    RTL                               Absolute
 788    tmp = hi (symbol_ref);            adrp  x1, foo
 789    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 790                                      nop
 791
 792    PIC                               TLS
 793    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 794    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 795                                      bl   __tls_get_addr
 796                                      nop
 797
 798    Load TLS symbol, depending on TLS mechanism and TLS access model.
 799
 800    Global Dynamic - Traditional TLS:
 801    adrp tmp, :tlsgd:imm
 802    add  dest, tmp, #:tlsgd_lo12:imm
 803    bl   __tls_get_addr
 804
 805    Global Dynamic - TLS Descriptors:
 806    adrp dest, :tlsdesc:imm
 807    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 808    add  dest, dest, #:tlsdesc_lo12:imm
 809    blr  tmp
 810    mrs  tp, tpidr_el0
 811    add  dest, dest, tp
 812
 813    Initial Exec:
 814    mrs  tp, tpidr_el0
 815    adrp tmp, :gottprel:imm
 816    ldr  dest, [tmp, #:gottprel_lo12:imm]
 817    add  dest, dest, tp
 818
 819    Local Exec:
 820    mrs  tp, tpidr_el0
 821    add  t0, tp, #:tprel_hi12:imm
 822    add  t0, #:tprel_lo12_nc:imm
 823 */
 824
 825 static void
 826 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 827                                    enum aarch64_symbol_type type)
 828 {
 829   switch (type)
 830     {
 831     case SYMBOL_SMALL_ABSOLUTE:
 832       {
 833         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 834         rtx tmp_reg = dest;
 835         machine_mode mode = GET_MODE (dest);
 836
 837         gcc_assert (mode == Pmode || mode == ptr_mode);
 838
 839         if (can_create_pseudo_p ())
 840           tmp_reg = gen_reg_rtx (mode);
 841
 842         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 843         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 844         return;
 845       }
 846
 847     case SYMBOL_TINY_ABSOLUTE:
 848       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 849       return;
 850
 851     case SYMBOL_SMALL_GOT:
 852       {
 853         /* In ILP32, the mode of dest can be either SImode or DImode,
 854            while the got entry is always of SImode size.  The mode of
 855            dest depends on how dest is used: if dest is assigned to a
 856            pointer (e.g. in the memory), it has SImode; it may have
 857            DImode if dest is dereferenced to access the memeory.
 858            This is why we have to handle three different ldr_got_small
 859            patterns here (two patterns for ILP32).  */
 860         rtx tmp_reg = dest;
 861         machine_mode mode = GET_MODE (dest);
 862
 863         if (can_create_pseudo_p ())
 864           tmp_reg = gen_reg_rtx (mode);
 865
 866         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 867         if (mode == ptr_mode)
 868           {
 869             if (mode == DImode)
 870               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 871             else
 872               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 873           }
 874         else
 875           {
 876             gcc_assert (mode == Pmode);
 877             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 878           }
 879
 880         return;
 881       }
 882
 883     case SYMBOL_SMALL_TLSGD:
 884       {
 885         rtx_insn *insns;
 886         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 887
 888         start_sequence ();
 889         aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
 890         insns = get_insns ();
 891         end_sequence ();
 892
 893         RTL_CONST_CALL_P (insns) = 1;
 894         emit_libcall_block (insns, dest, result, imm);
 895         return;
 896       }
 897
 898     case SYMBOL_SMALL_TLSDESC:
 899       {
 900         machine_mode mode = GET_MODE (dest);
 901         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 902         rtx tp;
 903
 904         gcc_assert (mode == Pmode || mode == ptr_mode);
 905
 906         /* In ILP32, the got entry is always of SImode size.  Unlike
 907            small GOT, the dest is fixed at reg 0.  */
 908         if (TARGET_ILP32)
 909           emit_insn (gen_tlsdesc_small_si (imm));
 910         else
 911           emit_insn (gen_tlsdesc_small_di (imm));
 912         tp = aarch64_load_tp (NULL);
 913
 914         if (mode != Pmode)
 915           tp = gen_lowpart (mode, tp);
 916
 917         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 918         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 919         return;
 920       }
 921
 922     case SYMBOL_SMALL_GOTTPREL:
 923       {
 924         /* In ILP32, the mode of dest can be either SImode or DImode,
 925            while the got entry is always of SImode size.  The mode of
 926            dest depends on how dest is used: if dest is assigned to a
 927            pointer (e.g. in the memory), it has SImode; it may have
 928            DImode if dest is dereferenced to access the memeory.
 929            This is why we have to handle three different tlsie_small
 930            patterns here (two patterns for ILP32).  */
 931         machine_mode mode = GET_MODE (dest);
 932         rtx tmp_reg = gen_reg_rtx (mode);
 933         rtx tp = aarch64_load_tp (NULL);
 934
 935         if (mode == ptr_mode)
 936           {
 937             if (mode == DImode)
 938               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 939             else
 940               {
 941                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 942                 tp = gen_lowpart (mode, tp);
 943               }
 944           }
 945         else
 946           {
 947             gcc_assert (mode == Pmode);
 948             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 949           }
 950
 951         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 952         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 953         return;
 954       }
 955
 956     case SYMBOL_SMALL_TPREL:
 957       {
 958         rtx tp = aarch64_load_tp (NULL);
 959         emit_insn (gen_tlsle_small (dest, tp, imm));
 960         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 961         return;
 962       }
 963
 964     case SYMBOL_TINY_GOT:
 965       emit_insn (gen_ldr_got_tiny (dest, imm));
 966       return;
 967
 968     default:
 969       gcc_unreachable ();
 970     }
 971 }
 972
 973 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 974    handle all moves if !can_create_pseudo_p ().  The distinction is
 975    important because, unlike emit_move_insn, the move expanders know
 976    how to force Pmode objects into the constant pool even when the
 977    constant pool address is not itself legitimate.  */
 978 static rtx
 979 aarch64_emit_move (rtx dest, rtx src)
 980 {
 981   return (can_create_pseudo_p ()
 982           ? emit_move_insn (dest, src)
 983           : emit_move_insn_1 (dest, src));
 984 }
 985
 986 /* Split a 128-bit move operation into two 64-bit move operations,
 987    taking care to handle partial overlap of register to register
 988    copies.  Special cases are needed when moving between GP regs and
 989    FP regs.  SRC can be a register, constant or memory; DST a register
 990    or memory.  If either operand is memory it must not have any side
 991    effects.  */
 992 void
 993 aarch64_split_128bit_move (rtx dst, rtx src)
 994 {
 995   rtx dst_lo, dst_hi;
 996   rtx src_lo, src_hi;
 997
 998   machine_mode mode = GET_MODE (dst);
 999
1000   gcc_assert (mode == TImode || mode == TFmode);
1001   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1002   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
1003
1004   if (REG_P (dst) && REG_P (src))
1005     {
1006       int src_regno = REGNO (src);
1007       int dst_regno = REGNO (dst);
1008
1009       /* Handle FP <-> GP regs.  */
1010       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1011         {
1012           src_lo = gen_lowpart (word_mode, src);
1013           src_hi = gen_highpart (word_mode, src);
1014
1015           if (mode == TImode)
1016             {
1017               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1018               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1019             }
1020           else
1021             {
1022               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1023               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1024             }
1025           return;
1026         }
1027       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1028         {
1029           dst_lo = gen_lowpart (word_mode, dst);
1030           dst_hi = gen_highpart (word_mode, dst);
1031
1032           if (mode == TImode)
1033             {
1034               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1035               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1036             }
1037           else
1038             {
1039               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1040               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1041             }
1042           return;
1043         }
1044     }
1045
1046   dst_lo = gen_lowpart (word_mode, dst);
1047   dst_hi = gen_highpart (word_mode, dst);
1048   src_lo = gen_lowpart (word_mode, src);
1049   src_hi = gen_highpart_mode (word_mode, mode, src);
1050
1051   /* At most one pairing may overlap.  */
1052   if (reg_overlap_mentioned_p (dst_lo, src_hi))
1053     {
1054       aarch64_emit_move (dst_hi, src_hi);
1055       aarch64_emit_move (dst_lo, src_lo);
1056     }
1057   else
1058     {
1059       aarch64_emit_move (dst_lo, src_lo);
1060       aarch64_emit_move (dst_hi, src_hi);
1061     }
1062 }
1063
1064 bool
1065 aarch64_split_128bit_move_p (rtx dst, rtx src)
1066 {
1067   return (! REG_P (src)
1068           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1069 }
1070
1071 /* Split a complex SIMD combine.  */
1072
1073 void
1074 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1075 {
1076   machine_mode src_mode = GET_MODE (src1);
1077   machine_mode dst_mode = GET_MODE (dst);
1078
1079   gcc_assert (VECTOR_MODE_P (dst_mode));
1080
1081   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1082     {
1083       rtx (*gen) (rtx, rtx, rtx);
1084
1085       switch (src_mode)
1086         {
1087         case V8QImode:
1088           gen = gen_aarch64_simd_combinev8qi;
1089           break;
1090         case V4HImode:
1091           gen = gen_aarch64_simd_combinev4hi;
1092           break;
1093         case V2SImode:
1094           gen = gen_aarch64_simd_combinev2si;
1095           break;
1096         case V2SFmode:
1097           gen = gen_aarch64_simd_combinev2sf;
1098           break;
1099         case DImode:
1100           gen = gen_aarch64_simd_combinedi;
1101           break;
1102         case DFmode:
1103           gen = gen_aarch64_simd_combinedf;
1104           break;
1105         default:
1106           gcc_unreachable ();
1107         }
1108
1109       emit_insn (gen (dst, src1, src2));
1110       return;
1111     }
1112 }
1113
1114 /* Split a complex SIMD move.  */
1115
1116 void
1117 aarch64_split_simd_move (rtx dst, rtx src)
1118 {
1119   machine_mode src_mode = GET_MODE (src);
1120   machine_mode dst_mode = GET_MODE (dst);
1121
1122   gcc_assert (VECTOR_MODE_P (dst_mode));
1123
1124   if (REG_P (dst) && REG_P (src))
1125     {
1126       rtx (*gen) (rtx, rtx);
1127
1128       gcc_assert (VECTOR_MODE_P (src_mode));
1129
1130       switch (src_mode)
1131         {
1132         case V16QImode:
1133           gen = gen_aarch64_split_simd_movv16qi;
1134           break;
1135         case V8HImode:
1136           gen = gen_aarch64_split_simd_movv8hi;
1137           break;
1138         case V4SImode:
1139           gen = gen_aarch64_split_simd_movv4si;
1140           break;
1141         case V2DImode:
1142           gen = gen_aarch64_split_simd_movv2di;
1143           break;
1144         case V4SFmode:
1145           gen = gen_aarch64_split_simd_movv4sf;
1146           break;
1147         case V2DFmode:
1148           gen = gen_aarch64_split_simd_movv2df;
1149           break;
1150         default:
1151           gcc_unreachable ();
1152         }
1153
1154       emit_insn (gen (dst, src));
1155       return;
1156     }
1157 }
1158
1159 static rtx
1160 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1161 {
1162   if (can_create_pseudo_p ())
1163     return force_reg (mode, value);
1164   else
1165     {
1166       x = aarch64_emit_move (x, value);
1167       return x;
1168     }
1169 }
1170
1171
1172 static rtx
1173 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1174 {
1175   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1176     {
1177       rtx high;
1178       /* Load the full offset into a register.  This
1179          might be improvable in the future.  */
1180       high = GEN_INT (offset);
1181       offset = 0;
1182       high = aarch64_force_temporary (mode, temp, high);
1183       reg = aarch64_force_temporary (mode, temp,
1184                                      gen_rtx_PLUS (mode, high, reg));
1185     }
1186   return plus_constant (mode, reg, offset);
1187 }
1188
1189 static int
1190 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1191                                 machine_mode mode)
1192 {
1193   unsigned HOST_WIDE_INT mask;
1194   int i;
1195   bool first;
1196   unsigned HOST_WIDE_INT val;
1197   bool subtargets;
1198   rtx subtarget;
1199   int one_match, zero_match, first_not_ffff_match;
1200   int num_insns = 0;
1201
1202   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1203     {
1204       if (generate)
1205         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1206       num_insns++;
1207       return num_insns;
1208     }
1209
1210   if (mode == SImode)
1211     {
1212       /* We know we can't do this in 1 insn, and we must be able to do it
1213          in two; so don't mess around looking for sequences that don't buy
1214          us anything.  */
1215       if (generate)
1216         {
1217           emit_insn (gen_rtx_SET (VOIDmode, dest,
1218                                   GEN_INT (INTVAL (imm) & 0xffff)));
1219           emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1220                                      GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1221         }
1222       num_insns += 2;
1223       return num_insns;
1224     }
1225
1226   /* Remaining cases are all for DImode.  */
1227
1228   val = INTVAL (imm);
1229   subtargets = optimize && can_create_pseudo_p ();
1230
1231   one_match = 0;
1232   zero_match = 0;
1233   mask = 0xffff;
1234   first_not_ffff_match = -1;
1235
1236   for (i = 0; i < 64; i += 16, mask <<= 16)
1237     {
1238       if ((val & mask) == mask)
1239         one_match++;
1240       else
1241         {
1242           if (first_not_ffff_match < 0)
1243             first_not_ffff_match = i;
1244           if ((val & mask) == 0)
1245             zero_match++;
1246         }
1247     }
1248
1249   if (one_match == 2)
1250     {
1251       /* Set one of the quarters and then insert back into result.  */
1252       mask = 0xffffll << first_not_ffff_match;
1253       if (generate)
1254         {
1255           emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1256           emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1257                                      GEN_INT ((val >> first_not_ffff_match)
1258                                               & 0xffff)));
1259         }
1260       num_insns += 2;
1261       return num_insns;
1262     }
1263
1264   if (zero_match == 2)
1265     goto simple_sequence;
1266
1267   mask = 0x0ffff0000UL;
1268   for (i = 16; i < 64; i += 16, mask <<= 16)
1269     {
1270       HOST_WIDE_INT comp = mask & ~(mask - 1);
1271
1272       if (aarch64_uimm12_shift (val - (val & mask)))
1273         {
1274           if (generate)
1275             {
1276               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1277               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1278                                       GEN_INT (val & mask)));
1279               emit_insn (gen_adddi3 (dest, subtarget,
1280                                      GEN_INT (val - (val & mask))));
1281             }
1282           num_insns += 2;
1283           return num_insns;
1284         }
1285       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1286         {
1287           if (generate)
1288             {
1289               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1290               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1291                                       GEN_INT ((val + comp) & mask)));
1292               emit_insn (gen_adddi3 (dest, subtarget,
1293                                      GEN_INT (val - ((val + comp) & mask))));
1294             }
1295           num_insns += 2;
1296           return num_insns;
1297         }
1298       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1299         {
1300           if (generate)
1301             {
1302               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1303               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1304                                       GEN_INT ((val - comp) | ~mask)));
1305               emit_insn (gen_adddi3 (dest, subtarget,
1306                                      GEN_INT (val - ((val - comp) | ~mask))));
1307             }
1308           num_insns += 2;
1309           return num_insns;
1310         }
1311       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1312         {
1313           if (generate)
1314             {
1315               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1316               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1317                                       GEN_INT (val | ~mask)));
1318               emit_insn (gen_adddi3 (dest, subtarget,
1319                                      GEN_INT (val - (val | ~mask))));
1320             }
1321           num_insns += 2;
1322           return num_insns;
1323         }
1324     }
1325
1326   /* See if we can do it by arithmetically combining two
1327      immediates.  */
1328   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1329     {
1330       int j;
1331       mask = 0xffff;
1332
1333       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1334           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1335         {
1336           if (generate)
1337             {
1338               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1339               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1340                                       GEN_INT (aarch64_bitmasks[i])));
1341               emit_insn (gen_adddi3 (dest, subtarget,
1342                                      GEN_INT (val - aarch64_bitmasks[i])));
1343             }
1344           num_insns += 2;
1345           return num_insns;
1346         }
1347
1348       for (j = 0; j < 64; j += 16, mask <<= 16)
1349         {
1350           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1351             {
1352               if (generate)
1353                 {
1354                   emit_insn (gen_rtx_SET (VOIDmode, dest,
1355                                           GEN_INT (aarch64_bitmasks[i])));
1356                   emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1357                                              GEN_INT ((val >> j) & 0xffff)));
1358                 }
1359               num_insns += 2;
1360               return num_insns;
1361             }
1362         }
1363     }
1364
1365   /* See if we can do it by logically combining two immediates.  */
1366   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1367     {
1368       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1369         {
1370           int j;
1371
1372           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1373             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1374               {
1375                 if (generate)
1376                   {
1377                     subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1378                     emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1379                                             GEN_INT (aarch64_bitmasks[i])));
1380                     emit_insn (gen_iordi3 (dest, subtarget,
1381                                            GEN_INT (aarch64_bitmasks[j])));
1382                   }
1383                 num_insns += 2;
1384                 return num_insns;
1385               }
1386         }
1387       else if ((val & aarch64_bitmasks[i]) == val)
1388         {
1389           int j;
1390
1391           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1392             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1393               {
1394                 if (generate)
1395                   {
1396                     subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1397                     emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1398                                             GEN_INT (aarch64_bitmasks[j])));
1399                     emit_insn (gen_anddi3 (dest, subtarget,
1400                                            GEN_INT (aarch64_bitmasks[i])));
1401                   }
1402                 num_insns += 2;
1403                 return num_insns;
1404               }
1405         }
1406     }
1407
1408   if (one_match > zero_match)
1409     {
1410       /* Set either first three quarters or all but the third.   */
1411       mask = 0xffffll << (16 - first_not_ffff_match);
1412       if (generate)
1413         emit_insn (gen_rtx_SET (VOIDmode, dest,
1414                                 GEN_INT (val | mask | 0xffffffff00000000ull)));
1415       num_insns ++;
1416
1417       /* Now insert other two quarters.  */
1418       for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1419            i < 64; i += 16, mask <<= 16)
1420         {
1421           if ((val & mask) != mask)
1422             {
1423               if (generate)
1424                 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1425                                            GEN_INT ((val >> i) & 0xffff)));
1426               num_insns ++;
1427             }
1428         }
1429       return num_insns;
1430     }
1431
1432  simple_sequence:
1433   first = true;
1434   mask = 0xffff;
1435   for (i = 0; i < 64; i += 16, mask <<= 16)
1436     {
1437       if ((val & mask) != 0)
1438         {
1439           if (first)
1440             {
1441               if (generate)
1442                 emit_insn (gen_rtx_SET (VOIDmode, dest,
1443                                         GEN_INT (val & mask)));
1444               num_insns ++;
1445               first = false;
1446             }
1447           else
1448             {
1449               if (generate)
1450                 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1451                                            GEN_INT ((val >> i) & 0xffff)));
1452               num_insns ++;
1453             }
1454         }
1455     }
1456
1457   return num_insns;
1458 }
1459
1460
1461 void
1462 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1463 {
1464   machine_mode mode = GET_MODE (dest);
1465
1466   gcc_assert (mode == SImode || mode == DImode);
1467
1468   /* Check on what type of symbol it is.  */
1469   if (GET_CODE (imm) == SYMBOL_REF
1470       || GET_CODE (imm) == LABEL_REF
1471       || GET_CODE (imm) == CONST)
1472     {
1473       rtx mem, base, offset;
1474       enum aarch64_symbol_type sty;
1475
1476       /* If we have (const (plus symbol offset)), separate out the offset
1477          before we start classifying the symbol.  */
1478       split_const (imm, &base, &offset);
1479
1480       sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1481       switch (sty)
1482         {
1483         case SYMBOL_FORCE_TO_MEM:
1484           if (offset != const0_rtx
1485               && targetm.cannot_force_const_mem (mode, imm))
1486             {
1487               gcc_assert (can_create_pseudo_p ());
1488               base = aarch64_force_temporary (mode, dest, base);
1489               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1490               aarch64_emit_move (dest, base);
1491               return;
1492             }
1493           mem = force_const_mem (ptr_mode, imm);
1494           gcc_assert (mem);
1495           if (mode != ptr_mode)
1496             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1497           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1498           return;
1499
1500         case SYMBOL_SMALL_TLSGD:
1501         case SYMBOL_SMALL_TLSDESC:
1502         case SYMBOL_SMALL_GOTTPREL:
1503         case SYMBOL_SMALL_GOT:
1504         case SYMBOL_TINY_GOT:
1505           if (offset != const0_rtx)
1506             {
1507               gcc_assert(can_create_pseudo_p ());
1508               base = aarch64_force_temporary (mode, dest, base);
1509               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1510               aarch64_emit_move (dest, base);
1511               return;
1512             }
1513           /* FALLTHRU */
1514
1515         case SYMBOL_SMALL_TPREL:
1516         case SYMBOL_SMALL_ABSOLUTE:
1517         case SYMBOL_TINY_ABSOLUTE:
1518           aarch64_load_symref_appropriately (dest, imm, sty);
1519           return;
1520
1521         default:
1522           gcc_unreachable ();
1523         }
1524     }
1525
1526   if (!CONST_INT_P (imm))
1527     {
1528       if (GET_CODE (imm) == HIGH)
1529         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1530       else
1531         {
1532           rtx mem = force_const_mem (mode, imm);
1533           gcc_assert (mem);
1534           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1535         }
1536
1537       return;
1538     }
1539
1540   aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1541 }
1542
1543 static bool
1544 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1545                                  tree exp ATTRIBUTE_UNUSED)
1546 {
1547   /* Currently, always true.  */
1548   return true;
1549 }
1550
1551 /* Implement TARGET_PASS_BY_REFERENCE.  */
1552
1553 static bool
1554 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1555                            machine_mode mode,
1556                            const_tree type,
1557                            bool named ATTRIBUTE_UNUSED)
1558 {
1559   HOST_WIDE_INT size;
1560   machine_mode dummymode;
1561   int nregs;
1562
1563   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1564   size = (mode == BLKmode && type)
1565     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1566
1567   /* Aggregates are passed by reference based on their size.  */
1568   if (type && AGGREGATE_TYPE_P (type))
1569     {
1570       size = int_size_in_bytes (type);
1571     }
1572
1573   /* Variable sized arguments are always returned by reference.  */
1574   if (size < 0)
1575     return true;
1576
1577   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1578   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1579                                                &dummymode, &nregs,
1580                                                NULL))
1581     return false;
1582
1583   /* Arguments which are variable sized or larger than 2 registers are
1584      passed by reference unless they are a homogenous floating point
1585      aggregate.  */
1586   return size > 2 * UNITS_PER_WORD;
1587 }
1588
1589 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1590 static bool
1591 aarch64_return_in_msb (const_tree valtype)
1592 {
1593   machine_mode dummy_mode;
1594   int dummy_int;
1595
1596   /* Never happens in little-endian mode.  */
1597   if (!BYTES_BIG_ENDIAN)
1598     return false;
1599
1600   /* Only composite types smaller than or equal to 16 bytes can
1601      be potentially returned in registers.  */
1602   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1603       || int_size_in_bytes (valtype) <= 0
1604       || int_size_in_bytes (valtype) > 16)
1605     return false;
1606
1607   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1608      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1609      is always passed/returned in the least significant bits of fp/simd
1610      register(s).  */
1611   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1612                                                &dummy_mode, &dummy_int, NULL))
1613     return false;
1614
1615   return true;
1616 }
1617
1618 /* Implement TARGET_FUNCTION_VALUE.
1619    Define how to find the value returned by a function.  */
1620
1621 static rtx
1622 aarch64_function_value (const_tree type, const_tree func,
1623                         bool outgoing ATTRIBUTE_UNUSED)
1624 {
1625   machine_mode mode;
1626   int unsignedp;
1627   int count;
1628   machine_mode ag_mode;
1629
1630   mode = TYPE_MODE (type);
1631   if (INTEGRAL_TYPE_P (type))
1632     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1633
1634   if (aarch64_return_in_msb (type))
1635     {
1636       HOST_WIDE_INT size = int_size_in_bytes (type);
1637
1638       if (size % UNITS_PER_WORD != 0)
1639         {
1640           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1641           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1642         }
1643     }
1644
1645   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1646                                                &ag_mode, &count, NULL))
1647     {
1648       if (!aarch64_composite_type_p (type, mode))
1649         {
1650           gcc_assert (count == 1 && mode == ag_mode);
1651           return gen_rtx_REG (mode, V0_REGNUM);
1652         }
1653       else
1654         {
1655           int i;
1656           rtx par;
1657
1658           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1659           for (i = 0; i < count; i++)
1660             {
1661               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1662               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1663                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1664               XVECEXP (par, 0, i) = tmp;
1665             }
1666           return par;
1667         }
1668     }
1669   else
1670     return gen_rtx_REG (mode, R0_REGNUM);
1671 }
1672
1673 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1674    Return true if REGNO is the number of a hard register in which the values
1675    of called function may come back.  */
1676
1677 static bool
1678 aarch64_function_value_regno_p (const unsigned int regno)
1679 {
1680   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1681      of 16-byte return values are: 128-bit integers and 16-byte small
1682      structures (excluding homogeneous floating-point aggregates).  */
1683   if (regno == R0_REGNUM || regno == R1_REGNUM)
1684     return true;
1685
1686   /* Up to four fp/simd registers can return a function value, e.g. a
1687      homogeneous floating-point aggregate having four members.  */
1688   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1689     return !TARGET_GENERAL_REGS_ONLY;
1690
1691   return false;
1692 }
1693
1694 /* Implement TARGET_RETURN_IN_MEMORY.
1695
1696    If the type T of the result of a function is such that
1697      void func (T arg)
1698    would require that arg be passed as a value in a register (or set of
1699    registers) according to the parameter passing rules, then the result
1700    is returned in the same registers as would be used for such an
1701    argument.  */
1702
1703 static bool
1704 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1705 {
1706   HOST_WIDE_INT size;
1707   machine_mode ag_mode;
1708   int count;
1709
1710   if (!AGGREGATE_TYPE_P (type)
1711       && TREE_CODE (type) != COMPLEX_TYPE
1712       && TREE_CODE (type) != VECTOR_TYPE)
1713     /* Simple scalar types always returned in registers.  */
1714     return false;
1715
1716   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1717                                                type,
1718                                                &ag_mode,
1719                                                &count,
1720                                                NULL))
1721     return false;
1722
1723   /* Types larger than 2 registers returned in memory.  */
1724   size = int_size_in_bytes (type);
1725   return (size < 0 || size > 2 * UNITS_PER_WORD);
1726 }
1727
1728 static bool
1729 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1730                                const_tree type, int *nregs)
1731 {
1732   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1733   return aarch64_vfp_is_call_or_return_candidate (mode,
1734                                                   type,
1735                                                   &pcum->aapcs_vfp_rmode,
1736                                                   nregs,
1737                                                   NULL);
1738 }
1739
1740 /* Given MODE and TYPE of a function argument, return the alignment in
1741    bits.  The idea is to suppress any stronger alignment requested by
1742    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1743    This is a helper function for local use only.  */
1744
1745 static unsigned int
1746 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1747 {
1748   unsigned int alignment;
1749
1750   if (type)
1751     {
1752       if (!integer_zerop (TYPE_SIZE (type)))
1753         {
1754           if (TYPE_MODE (type) == mode)
1755             alignment = TYPE_ALIGN (type);
1756           else
1757             alignment = GET_MODE_ALIGNMENT (mode);
1758         }
1759       else
1760         alignment = 0;
1761     }
1762   else
1763     alignment = GET_MODE_ALIGNMENT (mode);
1764
1765   return alignment;
1766 }
1767
1768 /* Layout a function argument according to the AAPCS64 rules.  The rule
1769    numbers refer to the rule numbers in the AAPCS64.  */
1770
1771 static void
1772 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1773                     const_tree type,
1774                     bool named ATTRIBUTE_UNUSED)
1775 {
1776   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1777   int ncrn, nvrn, nregs;
1778   bool allocate_ncrn, allocate_nvrn;
1779   HOST_WIDE_INT size;
1780
1781   /* We need to do this once per argument.  */
1782   if (pcum->aapcs_arg_processed)
1783     return;
1784
1785   pcum->aapcs_arg_processed = true;
1786
1787   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1788   size
1789     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1790                         UNITS_PER_WORD);
1791
1792   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1793   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1794                                                  mode,
1795                                                  type,
1796                                                  &nregs);
1797
1798   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1799      The following code thus handles passing by SIMD/FP registers first.  */
1800
1801   nvrn = pcum->aapcs_nvrn;
1802
1803   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1804      and homogenous short-vector aggregates (HVA).  */
1805   if (allocate_nvrn)
1806     {
1807       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1808         {
1809           pcum->aapcs_nextnvrn = nvrn + nregs;
1810           if (!aarch64_composite_type_p (type, mode))
1811             {
1812               gcc_assert (nregs == 1);
1813               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1814             }
1815           else
1816             {
1817               rtx par;
1818               int i;
1819               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1820               for (i = 0; i < nregs; i++)
1821                 {
1822                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1823                                          V0_REGNUM + nvrn + i);
1824                   tmp = gen_rtx_EXPR_LIST
1825                     (VOIDmode, tmp,
1826                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1827                   XVECEXP (par, 0, i) = tmp;
1828                 }
1829               pcum->aapcs_reg = par;
1830             }
1831           return;
1832         }
1833       else
1834         {
1835           /* C.3 NSRN is set to 8.  */
1836           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1837           goto on_stack;
1838         }
1839     }
1840
1841   ncrn = pcum->aapcs_ncrn;
1842   nregs = size / UNITS_PER_WORD;
1843
1844   /* C6 - C9.  though the sign and zero extension semantics are
1845      handled elsewhere.  This is the case where the argument fits
1846      entirely general registers.  */
1847   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1848     {
1849       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1850
1851       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1852
1853       /* C.8 if the argument has an alignment of 16 then the NGRN is
1854          rounded up to the next even number.  */
1855       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1856         {
1857           ++ncrn;
1858           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1859         }
1860       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1861          A reg is still generated for it, but the caller should be smart
1862          enough not to use it.  */
1863       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1864         {
1865           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1866         }
1867       else
1868         {
1869           rtx par;
1870           int i;
1871
1872           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1873           for (i = 0; i < nregs; i++)
1874             {
1875               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1876               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1877                                        GEN_INT (i * UNITS_PER_WORD));
1878               XVECEXP (par, 0, i) = tmp;
1879             }
1880           pcum->aapcs_reg = par;
1881         }
1882
1883       pcum->aapcs_nextncrn = ncrn + nregs;
1884       return;
1885     }
1886
1887   /* C.11  */
1888   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1889
1890   /* The argument is passed on stack; record the needed number of words for
1891      this argument and align the total size if necessary.  */
1892 on_stack:
1893   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1894   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1895     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1896                                                16 / UNITS_PER_WORD);
1897   return;
1898 }
1899
1900 /* Implement TARGET_FUNCTION_ARG.  */
1901
1902 static rtx
1903 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1904                       const_tree type, bool named)
1905 {
1906   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1907   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1908
1909   if (mode == VOIDmode)
1910     return NULL_RTX;
1911
1912   aarch64_layout_arg (pcum_v, mode, type, named);
1913   return pcum->aapcs_reg;
1914 }
1915
1916 void
1917 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1918                            const_tree fntype ATTRIBUTE_UNUSED,
1919                            rtx libname ATTRIBUTE_UNUSED,
1920                            const_tree fndecl ATTRIBUTE_UNUSED,
1921                            unsigned n_named ATTRIBUTE_UNUSED)
1922 {
1923   pcum->aapcs_ncrn = 0;
1924   pcum->aapcs_nvrn = 0;
1925   pcum->aapcs_nextncrn = 0;
1926   pcum->aapcs_nextnvrn = 0;
1927   pcum->pcs_variant = ARM_PCS_AAPCS64;
1928   pcum->aapcs_reg = NULL_RTX;
1929   pcum->aapcs_arg_processed = false;
1930   pcum->aapcs_stack_words = 0;
1931   pcum->aapcs_stack_size = 0;
1932
1933   return;
1934 }
1935
1936 static void
1937 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1938                               machine_mode mode,
1939                               const_tree type,
1940                               bool named)
1941 {
1942   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1943   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1944     {
1945       aarch64_layout_arg (pcum_v, mode, type, named);
1946       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1947                   != (pcum->aapcs_stack_words != 0));
1948       pcum->aapcs_arg_processed = false;
1949       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1950       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1951       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1952       pcum->aapcs_stack_words = 0;
1953       pcum->aapcs_reg = NULL_RTX;
1954     }
1955 }
1956
1957 bool
1958 aarch64_function_arg_regno_p (unsigned regno)
1959 {
1960   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1961           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1962 }
1963
1964 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1965    PARM_BOUNDARY bits of alignment, but will be given anything up
1966    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1967    that both before and after the layout of each argument, the Next
1968    Stacked Argument Address (NSAA) will have a minimum alignment of
1969    8 bytes.  */
1970
1971 static unsigned int
1972 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1973 {
1974   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1975
1976   if (alignment < PARM_BOUNDARY)
1977     alignment = PARM_BOUNDARY;
1978   if (alignment > STACK_BOUNDARY)
1979     alignment = STACK_BOUNDARY;
1980   return alignment;
1981 }
1982
1983 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1984
1985    Return true if an argument passed on the stack should be padded upwards,
1986    i.e. if the least-significant byte of the stack slot has useful data.
1987
1988    Small aggregate types are placed in the lowest memory address.
1989
1990    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1991
1992 bool
1993 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1994 {
1995   /* On little-endian targets, the least significant byte of every stack
1996      argument is passed at the lowest byte address of the stack slot.  */
1997   if (!BYTES_BIG_ENDIAN)
1998     return true;
1999
2000   /* Otherwise, integral, floating-point and pointer types are padded downward:
2001      the least significant byte of a stack argument is passed at the highest
2002      byte address of the stack slot.  */
2003   if (type
2004       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2005          || POINTER_TYPE_P (type))
2006       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2007     return false;
2008
2009   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
2010   return true;
2011 }
2012
2013 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2014
2015    It specifies padding for the last (may also be the only)
2016    element of a block move between registers and memory.  If
2017    assuming the block is in the memory, padding upward means that
2018    the last element is padded after its highest significant byte,
2019    while in downward padding, the last element is padded at the
2020    its least significant byte side.
2021
2022    Small aggregates and small complex types are always padded
2023    upwards.
2024
2025    We don't need to worry about homogeneous floating-point or
2026    short-vector aggregates; their move is not affected by the
2027    padding direction determined here.  Regardless of endianness,
2028    each element of such an aggregate is put in the least
2029    significant bits of a fp/simd register.
2030
2031    Return !BYTES_BIG_ENDIAN if the least significant byte of the
2032    register has useful data, and return the opposite if the most
2033    significant byte does.  */
2034
2035 bool
2036 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
2037                      bool first ATTRIBUTE_UNUSED)
2038 {
2039
2040   /* Small composite types are always padded upward.  */
2041   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2042     {
2043       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2044                             : GET_MODE_SIZE (mode));
2045       if (size < 2 * UNITS_PER_WORD)
2046         return true;
2047     }
2048
2049   /* Otherwise, use the default padding.  */
2050   return !BYTES_BIG_ENDIAN;
2051 }
2052
2053 static machine_mode
2054 aarch64_libgcc_cmp_return_mode (void)
2055 {
2056   return SImode;
2057 }
2058
2059 static bool
2060 aarch64_frame_pointer_required (void)
2061 {
2062   /* In aarch64_override_options_after_change
2063      flag_omit_leaf_frame_pointer turns off the frame pointer by
2064      default.  Turn it back on now if we've not got a leaf
2065      function.  */
2066   if (flag_omit_leaf_frame_pointer
2067       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2068     return true;
2069
2070   return false;
2071 }
2072
2073 /* Mark the registers that need to be saved by the callee and calculate
2074    the size of the callee-saved registers area and frame record (both FP
2075    and LR may be omitted).  */
2076 static void
2077 aarch64_layout_frame (void)
2078 {
2079   HOST_WIDE_INT offset = 0;
2080   int regno;
2081
2082   if (reload_completed && cfun->machine->frame.laid_out)
2083     return;
2084
2085 #define SLOT_NOT_REQUIRED (-2)
2086 #define SLOT_REQUIRED     (-1)
2087
2088   cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2089   cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2090
2091   /* First mark all the registers that really need to be saved...  */
2092   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2093     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2094
2095   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2096     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2097
2098   /* ... that includes the eh data registers (if needed)...  */
2099   if (crtl->calls_eh_return)
2100     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2101       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2102         = SLOT_REQUIRED;
2103
2104   /* ... and any callee saved register that dataflow says is live.  */
2105   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2106     if (df_regs_ever_live_p (regno)
2107         && (regno == R30_REGNUM
2108             || !call_used_regs[regno]))
2109       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2110
2111   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2112     if (df_regs_ever_live_p (regno)
2113         && !call_used_regs[regno])
2114       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2115
2116   if (frame_pointer_needed)
2117     {
2118       /* FP and LR are placed in the linkage record.  */
2119       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2120       cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2121       cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2122       cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2123       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2124       offset += 2 * UNITS_PER_WORD;
2125     }
2126
2127   /* Now assign stack slots for them.  */
2128   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2129     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2130       {
2131         cfun->machine->frame.reg_offset[regno] = offset;
2132         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2133           cfun->machine->frame.wb_candidate1 = regno;
2134         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2135           cfun->machine->frame.wb_candidate2 = regno;
2136         offset += UNITS_PER_WORD;
2137       }
2138
2139   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2140     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2141       {
2142         cfun->machine->frame.reg_offset[regno] = offset;
2143         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2144           cfun->machine->frame.wb_candidate1 = regno;
2145         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2146                  && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2147           cfun->machine->frame.wb_candidate2 = regno;
2148         offset += UNITS_PER_WORD;
2149       }
2150
2151   cfun->machine->frame.padding0 =
2152     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2153   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2154
2155   cfun->machine->frame.saved_regs_size = offset;
2156
2157   cfun->machine->frame.hard_fp_offset
2158     = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2159                         + get_frame_size ()
2160                         + cfun->machine->frame.saved_regs_size,
2161                         STACK_BOUNDARY / BITS_PER_UNIT);
2162
2163   cfun->machine->frame.frame_size
2164     = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2165                         + crtl->outgoing_args_size,
2166                         STACK_BOUNDARY / BITS_PER_UNIT);
2167
2168   cfun->machine->frame.laid_out = true;
2169 }
2170
2171 static bool
2172 aarch64_register_saved_on_entry (int regno)
2173 {
2174   return cfun->machine->frame.reg_offset[regno] >= 0;
2175 }
2176
2177 static unsigned
2178 aarch64_next_callee_save (unsigned regno, unsigned limit)
2179 {
2180   while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2181     regno ++;
2182   return regno;
2183 }
2184
2185 static void
2186 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2187                            HOST_WIDE_INT adjustment)
2188  {
2189   rtx base_rtx = stack_pointer_rtx;
2190   rtx insn, reg, mem;
2191
2192   reg = gen_rtx_REG (mode, regno);
2193   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2194                             plus_constant (Pmode, base_rtx, -adjustment));
2195   mem = gen_rtx_MEM (mode, mem);
2196
2197   insn = emit_move_insn (mem, reg);
2198   RTX_FRAME_RELATED_P (insn) = 1;
2199 }
2200
2201 static rtx
2202 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2203                           HOST_WIDE_INT adjustment)
2204 {
2205   switch (mode)
2206     {
2207     case DImode:
2208       return gen_storewb_pairdi_di (base, base, reg, reg2,
2209                                     GEN_INT (-adjustment),
2210                                     GEN_INT (UNITS_PER_WORD - adjustment));
2211     case DFmode:
2212       return gen_storewb_pairdf_di (base, base, reg, reg2,
2213                                     GEN_INT (-adjustment),
2214                                     GEN_INT (UNITS_PER_WORD - adjustment));
2215     default:
2216       gcc_unreachable ();
2217     }
2218 }
2219
2220 static void
2221 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2222                          unsigned regno2, HOST_WIDE_INT adjustment)
2223 {
2224   rtx_insn *insn;
2225   rtx reg1 = gen_rtx_REG (mode, regno1);
2226   rtx reg2 = gen_rtx_REG (mode, regno2);
2227
2228   insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2229                                               reg2, adjustment));
2230   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2231   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2232   RTX_FRAME_RELATED_P (insn) = 1;
2233 }
2234
2235 static rtx
2236 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2237                          HOST_WIDE_INT adjustment)
2238 {
2239   switch (mode)
2240     {
2241     case DImode:
2242       return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2243                                    GEN_INT (UNITS_PER_WORD));
2244     case DFmode:
2245       return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2246                                    GEN_INT (UNITS_PER_WORD));
2247     default:
2248       gcc_unreachable ();
2249     }
2250 }
2251
2252 static rtx
2253 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2254                         rtx reg2)
2255 {
2256   switch (mode)
2257     {
2258     case DImode:
2259       return gen_store_pairdi (mem1, reg1, mem2, reg2);
2260
2261     case DFmode:
2262       return gen_store_pairdf (mem1, reg1, mem2, reg2);
2263
2264     default:
2265       gcc_unreachable ();
2266     }
2267 }
2268
2269 static rtx
2270 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2271                        rtx mem2)
2272 {
2273   switch (mode)
2274     {
2275     case DImode:
2276       return gen_load_pairdi (reg1, mem1, reg2, mem2);
2277
2278     case DFmode:
2279       return gen_load_pairdf (reg1, mem1, reg2, mem2);
2280
2281     default:
2282       gcc_unreachable ();
2283     }
2284 }
2285
2286
2287 static void
2288 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2289                            unsigned start, unsigned limit, bool skip_wb)
2290 {
2291   rtx_insn *insn;
2292   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2293                                                  ? gen_frame_mem : gen_rtx_MEM);
2294   unsigned regno;
2295   unsigned regno2;
2296
2297   for (regno = aarch64_next_callee_save (start, limit);
2298        regno <= limit;
2299        regno = aarch64_next_callee_save (regno + 1, limit))
2300     {
2301       rtx reg, mem;
2302       HOST_WIDE_INT offset;
2303
2304       if (skip_wb
2305           && (regno == cfun->machine->frame.wb_candidate1
2306               || regno == cfun->machine->frame.wb_candidate2))
2307         continue;
2308
2309       reg = gen_rtx_REG (mode, regno);
2310       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2311       mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2312                                               offset));
2313
2314       regno2 = aarch64_next_callee_save (regno + 1, limit);
2315
2316       if (regno2 <= limit
2317           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2318               == cfun->machine->frame.reg_offset[regno2]))
2319
2320         {
2321           rtx reg2 = gen_rtx_REG (mode, regno2);
2322           rtx mem2;
2323
2324           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2325           mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2326                                                    offset));
2327           insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2328                                                     reg2));
2329
2330           /* The first part of a frame-related parallel insn is
2331              always assumed to be relevant to the frame
2332              calculations; subsequent parts, are only
2333              frame-related if explicitly marked.  */
2334           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2335           regno = regno2;
2336         }
2337       else
2338         insn = emit_move_insn (mem, reg);
2339
2340       RTX_FRAME_RELATED_P (insn) = 1;
2341     }
2342 }
2343
2344 static void
2345 aarch64_restore_callee_saves (machine_mode mode,
2346                               HOST_WIDE_INT start_offset, unsigned start,
2347                               unsigned limit, bool skip_wb, rtx *cfi_ops)
2348 {
2349   rtx base_rtx = stack_pointer_rtx;
2350   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2351                                                  ? gen_frame_mem : gen_rtx_MEM);
2352   unsigned regno;
2353   unsigned regno2;
2354   HOST_WIDE_INT offset;
2355
2356   for (regno = aarch64_next_callee_save (start, limit);
2357        regno <= limit;
2358        regno = aarch64_next_callee_save (regno + 1, limit))
2359     {
2360       rtx reg, mem;
2361
2362       if (skip_wb
2363           && (regno == cfun->machine->frame.wb_candidate1
2364               || regno == cfun->machine->frame.wb_candidate2))
2365         continue;
2366
2367       reg = gen_rtx_REG (mode, regno);
2368       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2369       mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2370
2371       regno2 = aarch64_next_callee_save (regno + 1, limit);
2372
2373       if (regno2 <= limit
2374           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2375               == cfun->machine->frame.reg_offset[regno2]))
2376         {
2377           rtx reg2 = gen_rtx_REG (mode, regno2);
2378           rtx mem2;
2379
2380           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2381           mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2382           emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2383
2384           *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2385           regno = regno2;
2386         }
2387       else
2388         emit_move_insn (reg, mem);
2389       *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2390     }
2391 }
2392
2393 /* AArch64 stack frames generated by this compiler look like:
2394
2395         +-------------------------------+
2396         |                               |
2397         |  incoming stack arguments     |
2398         |                               |
2399         +-------------------------------+
2400         |                               | <-- incoming stack pointer (aligned)
2401         |  callee-allocated save area   |
2402         |  for register varargs         |
2403         |                               |
2404         +-------------------------------+
2405         |  local variables              | <-- frame_pointer_rtx
2406         |                               |
2407         +-------------------------------+
2408         |  padding0                     | \
2409         +-------------------------------+  |
2410         |  callee-saved registers       |  | frame.saved_regs_size
2411         +-------------------------------+  |
2412         |  LR'                          |  |
2413         +-------------------------------+  |
2414         |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
2415         +-------------------------------+
2416         |  dynamic allocation           |
2417         +-------------------------------+
2418         |  padding                      |
2419         +-------------------------------+
2420         |  outgoing stack arguments     | <-- arg_pointer
2421         |                               |
2422         +-------------------------------+
2423         |                               | <-- stack_pointer_rtx (aligned)
2424
2425    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2426    but leave frame_pointer_rtx and hard_frame_pointer_rtx
2427    unchanged.  */
2428
2429 /* Generate the prologue instructions for entry into a function.
2430    Establish the stack frame by decreasing the stack pointer with a
2431    properly calculated size and, if necessary, create a frame record
2432    filled with the values of LR and previous frame pointer.  The
2433    current FP is also set up if it is in use.  */
2434
2435 void
2436 aarch64_expand_prologue (void)
2437 {
2438   /* sub sp, sp, #<frame_size>
2439      stp {fp, lr}, [sp, #<frame_size> - 16]
2440      add fp, sp, #<frame_size> - hardfp_offset
2441      stp {cs_reg}, [fp, #-16] etc.
2442
2443      sub sp, sp, <final_adjustment_if_any>
2444   */
2445   HOST_WIDE_INT frame_size, offset;
2446   HOST_WIDE_INT fp_offset;              /* Offset from hard FP to SP.  */
2447   HOST_WIDE_INT hard_fp_offset;
2448   rtx_insn *insn;
2449
2450   aarch64_layout_frame ();
2451
2452   offset = frame_size = cfun->machine->frame.frame_size;
2453   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2454   fp_offset = frame_size - hard_fp_offset;
2455
2456   if (flag_stack_usage_info)
2457     current_function_static_stack_size = frame_size;
2458
2459   /* Store pairs and load pairs have a range only -512 to 504.  */
2460   if (offset >= 512)
2461     {
2462       /* When the frame has a large size, an initial decrease is done on
2463          the stack pointer to jump over the callee-allocated save area for
2464          register varargs, the local variable area and/or the callee-saved
2465          register area.  This will allow the pre-index write-back
2466          store pair instructions to be used for setting up the stack frame
2467          efficiently.  */
2468       offset = hard_fp_offset;
2469       if (offset >= 512)
2470         offset = cfun->machine->frame.saved_regs_size;
2471
2472       frame_size -= (offset + crtl->outgoing_args_size);
2473       fp_offset = 0;
2474
2475       if (frame_size >= 0x1000000)
2476         {
2477           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2478           emit_move_insn (op0, GEN_INT (-frame_size));
2479           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2480
2481           add_reg_note (insn, REG_CFA_ADJUST_CFA,
2482                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2483                                      plus_constant (Pmode, stack_pointer_rtx,
2484                                                     -frame_size)));
2485           RTX_FRAME_RELATED_P (insn) = 1;
2486         }
2487       else if (frame_size > 0)
2488         {
2489           int hi_ofs = frame_size & 0xfff000;
2490           int lo_ofs = frame_size & 0x000fff;
2491
2492           if (hi_ofs)
2493             {
2494               insn = emit_insn (gen_add2_insn
2495                                 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2496               RTX_FRAME_RELATED_P (insn) = 1;
2497             }
2498           if (lo_ofs)
2499             {
2500               insn = emit_insn (gen_add2_insn
2501                                 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2502               RTX_FRAME_RELATED_P (insn) = 1;
2503             }
2504         }
2505     }
2506   else
2507     frame_size = -1;
2508
2509   if (offset > 0)
2510     {
2511       bool skip_wb = false;
2512
2513       if (frame_pointer_needed)
2514         {
2515           skip_wb = true;
2516
2517           if (fp_offset)
2518             {
2519               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2520                                                GEN_INT (-offset)));
2521               RTX_FRAME_RELATED_P (insn) = 1;
2522
2523               aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2524                                          R30_REGNUM, false);
2525             }
2526           else
2527             aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2528
2529           /* Set up frame pointer to point to the location of the
2530              previous frame pointer on the stack.  */
2531           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2532                                            stack_pointer_rtx,
2533                                            GEN_INT (fp_offset)));
2534           RTX_FRAME_RELATED_P (insn) = 1;
2535           emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2536         }
2537       else
2538         {
2539           unsigned reg1 = cfun->machine->frame.wb_candidate1;
2540           unsigned reg2 = cfun->machine->frame.wb_candidate2;
2541
2542           if (fp_offset
2543               || reg1 == FIRST_PSEUDO_REGISTER
2544               || (reg2 == FIRST_PSEUDO_REGISTER
2545                   && offset >= 256))
2546             {
2547               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2548                                                GEN_INT (-offset)));
2549               RTX_FRAME_RELATED_P (insn) = 1;
2550             }
2551           else
2552             {
2553               machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2554
2555               skip_wb = true;
2556
2557               if (reg2 == FIRST_PSEUDO_REGISTER)
2558                 aarch64_pushwb_single_reg (mode1, reg1, offset);
2559               else
2560                 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2561             }
2562         }
2563
2564       aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2565                                  skip_wb);
2566       aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2567                                  skip_wb);
2568     }
2569
2570   /* when offset >= 512,
2571      sub sp, sp, #<outgoing_args_size> */
2572   if (frame_size > -1)
2573     {
2574       if (crtl->outgoing_args_size > 0)
2575         {
2576           insn = emit_insn (gen_add2_insn
2577                             (stack_pointer_rtx,
2578                              GEN_INT (- crtl->outgoing_args_size)));
2579           RTX_FRAME_RELATED_P (insn) = 1;
2580         }
2581     }
2582 }
2583
2584 /* Return TRUE if we can use a simple_return insn.
2585
2586    This function checks whether the callee saved stack is empty, which
2587    means no restore actions are need. The pro_and_epilogue will use
2588    this to check whether shrink-wrapping opt is feasible.  */
2589
2590 bool
2591 aarch64_use_return_insn_p (void)
2592 {
2593   if (!reload_completed)
2594     return false;
2595
2596   if (crtl->profile)
2597     return false;
2598
2599   aarch64_layout_frame ();
2600
2601   return cfun->machine->frame.frame_size == 0;
2602 }
2603
2604 /* Generate the epilogue instructions for returning from a function.  */
2605 void
2606 aarch64_expand_epilogue (bool for_sibcall)
2607 {
2608   HOST_WIDE_INT frame_size, offset;
2609   HOST_WIDE_INT fp_offset;
2610   HOST_WIDE_INT hard_fp_offset;
2611   rtx_insn *insn;
2612   /* We need to add memory barrier to prevent read from deallocated stack.  */
2613   bool need_barrier_p = (get_frame_size () != 0
2614                          || cfun->machine->frame.saved_varargs_size);
2615
2616   aarch64_layout_frame ();
2617
2618   offset = frame_size = cfun->machine->frame.frame_size;
2619   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2620   fp_offset = frame_size - hard_fp_offset;
2621
2622   /* Store pairs and load pairs have a range only -512 to 504.  */
2623   if (offset >= 512)
2624     {
2625       offset = hard_fp_offset;
2626       if (offset >= 512)
2627         offset = cfun->machine->frame.saved_regs_size;
2628
2629       frame_size -= (offset + crtl->outgoing_args_size);
2630       fp_offset = 0;
2631       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2632         {
2633           insn = emit_insn (gen_add2_insn
2634                             (stack_pointer_rtx,
2635                              GEN_INT (crtl->outgoing_args_size)));
2636           RTX_FRAME_RELATED_P (insn) = 1;
2637         }
2638     }
2639   else
2640     frame_size = -1;
2641
2642   /* If there were outgoing arguments or we've done dynamic stack
2643      allocation, then restore the stack pointer from the frame
2644      pointer.  This is at most one insn and more efficient than using
2645      GCC's internal mechanism.  */
2646   if (frame_pointer_needed
2647       && (crtl->outgoing_args_size || cfun->calls_alloca))
2648     {
2649       if (cfun->calls_alloca)
2650         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2651
2652       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2653                                        hard_frame_pointer_rtx,
2654                                        GEN_INT (0)));
2655       offset = offset - fp_offset;
2656     }
2657
2658   if (offset > 0)
2659     {
2660       unsigned reg1 = cfun->machine->frame.wb_candidate1;
2661       unsigned reg2 = cfun->machine->frame.wb_candidate2;
2662       bool skip_wb = true;
2663       rtx cfi_ops = NULL;
2664
2665       if (frame_pointer_needed)
2666         fp_offset = 0;
2667       else if (fp_offset
2668                || reg1 == FIRST_PSEUDO_REGISTER
2669                || (reg2 == FIRST_PSEUDO_REGISTER
2670                    && offset >= 256))
2671         skip_wb = false;
2672
2673       aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2674                                     skip_wb, &cfi_ops);
2675       aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2676                                     skip_wb, &cfi_ops);
2677
2678       if (need_barrier_p)
2679         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2680
2681       if (skip_wb)
2682         {
2683           machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2684           rtx rreg1 = gen_rtx_REG (mode1, reg1);
2685
2686           cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2687           if (reg2 == FIRST_PSEUDO_REGISTER)
2688             {
2689               rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2690               mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2691               mem = gen_rtx_MEM (mode1, mem);
2692               insn = emit_move_insn (rreg1, mem);
2693             }
2694           else
2695             {
2696               rtx rreg2 = gen_rtx_REG (mode1, reg2);
2697
2698               cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2699               insn = emit_insn (aarch64_gen_loadwb_pair
2700                                 (mode1, stack_pointer_rtx, rreg1,
2701                                  rreg2, offset));
2702             }
2703         }
2704       else
2705         {
2706           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2707                                            GEN_INT (offset)));
2708         }
2709
2710       /* Reset the CFA to be SP + FRAME_SIZE.  */
2711       rtx new_cfa = stack_pointer_rtx;
2712       if (frame_size > 0)
2713         new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2714       cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2715       REG_NOTES (insn) = cfi_ops;
2716       RTX_FRAME_RELATED_P (insn) = 1;
2717     }
2718
2719   if (frame_size > 0)
2720     {
2721       if (need_barrier_p)
2722         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2723
2724       if (frame_size >= 0x1000000)
2725         {
2726           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2727           emit_move_insn (op0, GEN_INT (frame_size));
2728           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2729         }
2730       else
2731         {
2732           int hi_ofs = frame_size & 0xfff000;
2733           int lo_ofs = frame_size & 0x000fff;
2734
2735           if (hi_ofs && lo_ofs)
2736             {
2737               insn = emit_insn (gen_add2_insn
2738                                 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2739               RTX_FRAME_RELATED_P (insn) = 1;
2740               frame_size = lo_ofs;
2741             }
2742           insn = emit_insn (gen_add2_insn
2743                             (stack_pointer_rtx, GEN_INT (frame_size)));
2744         }
2745
2746       /* Reset the CFA to be SP + 0.  */
2747       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2748       RTX_FRAME_RELATED_P (insn) = 1;
2749     }
2750
2751   /* Stack adjustment for exception handler.  */
2752   if (crtl->calls_eh_return)
2753     {
2754       /* We need to unwind the stack by the offset computed by
2755          EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
2756          to be SP; letting the CFA move during this adjustment
2757          is just as correct as retaining the CFA from the body
2758          of the function.  Therefore, do nothing special.  */
2759       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2760     }
2761
2762   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2763   if (!for_sibcall)
2764     emit_jump_insn (ret_rtx);
2765 }
2766
2767 /* Return the place to copy the exception unwinding return address to.
2768    This will probably be a stack slot, but could (in theory be the
2769    return register).  */
2770 rtx
2771 aarch64_final_eh_return_addr (void)
2772 {
2773   HOST_WIDE_INT fp_offset;
2774
2775   aarch64_layout_frame ();
2776
2777   fp_offset = cfun->machine->frame.frame_size
2778               - cfun->machine->frame.hard_fp_offset;
2779
2780   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2781     return gen_rtx_REG (DImode, LR_REGNUM);
2782
2783   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2784      result in a store to save LR introduced by builtin_eh_return () being
2785      incorrectly deleted because the alias is not detected.
2786      So in the calculation of the address to copy the exception unwinding
2787      return address to, we note 2 cases.
2788      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2789      we return a SP-relative location since all the addresses are SP-relative
2790      in this case.  This prevents the store from being optimized away.
2791      If the fp_offset is not 0, then the addresses will be FP-relative and
2792      therefore we return a FP-relative location.  */
2793
2794   if (frame_pointer_needed)
2795     {
2796       if (fp_offset)
2797         return gen_frame_mem (DImode,
2798                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2799       else
2800         return gen_frame_mem (DImode,
2801                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2802     }
2803
2804   /* If FP is not needed, we calculate the location of LR, which would be
2805      at the top of the saved registers block.  */
2806
2807   return gen_frame_mem (DImode,
2808                         plus_constant (Pmode,
2809                                        stack_pointer_rtx,
2810                                        fp_offset
2811                                        + cfun->machine->frame.saved_regs_size
2812                                        - 2 * UNITS_PER_WORD));
2813 }
2814
2815 /* Possibly output code to build up a constant in a register.  For
2816    the benefit of the costs infrastructure, returns the number of
2817    instructions which would be emitted.  GENERATE inhibits or
2818    enables code generation.  */
2819
2820 static int
2821 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2822 {
2823   int insns = 0;
2824
2825   if (aarch64_bitmask_imm (val, DImode))
2826     {
2827       if (generate)
2828         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2829       insns = 1;
2830     }
2831   else
2832     {
2833       int i;
2834       int ncount = 0;
2835       int zcount = 0;
2836       HOST_WIDE_INT valp = val >> 16;
2837       HOST_WIDE_INT valm;
2838       HOST_WIDE_INT tval;
2839
2840       for (i = 16; i < 64; i += 16)
2841         {
2842           valm = (valp & 0xffff);
2843
2844           if (valm != 0)
2845             ++ zcount;
2846
2847           if (valm != 0xffff)
2848             ++ ncount;
2849
2850           valp >>= 16;
2851         }
2852
2853       /* zcount contains the number of additional MOVK instructions
2854          required if the constant is built up with an initial MOVZ instruction,
2855          while ncount is the number of MOVK instructions required if starting
2856          with a MOVN instruction.  Choose the sequence that yields the fewest
2857          number of instructions, preferring MOVZ instructions when they are both
2858          the same.  */
2859       if (ncount < zcount)
2860         {
2861           if (generate)
2862             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2863                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2864           tval = 0xffff;
2865           insns++;
2866         }
2867       else
2868         {
2869           if (generate)
2870             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2871                             GEN_INT (val & 0xffff));
2872           tval = 0;
2873           insns++;
2874         }
2875
2876       val >>= 16;
2877
2878       for (i = 16; i < 64; i += 16)
2879         {
2880           if ((val & 0xffff) != tval)
2881             {
2882               if (generate)
2883                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2884                                            GEN_INT (i),
2885                                            GEN_INT (val & 0xffff)));
2886               insns++;
2887             }
2888           val >>= 16;
2889         }
2890     }
2891   return insns;
2892 }
2893
2894 static void
2895 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2896 {
2897   HOST_WIDE_INT mdelta = delta;
2898   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2899   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2900
2901   if (mdelta < 0)
2902     mdelta = -mdelta;
2903
2904   if (mdelta >= 4096 * 4096)
2905     {
2906       (void) aarch64_build_constant (scratchreg, delta, true);
2907       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2908     }
2909   else if (mdelta > 0)
2910     {
2911       if (mdelta >= 4096)
2912         {
2913           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2914           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2915           if (delta < 0)
2916             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2917                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2918           else
2919             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2920                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2921         }
2922       if (mdelta % 4096 != 0)
2923         {
2924           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2925           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2926                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2927         }
2928     }
2929 }
2930
2931 /* Output code to add DELTA to the first argument, and then jump
2932    to FUNCTION.  Used for C++ multiple inheritance.  */
2933 static void
2934 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2935                          HOST_WIDE_INT delta,
2936                          HOST_WIDE_INT vcall_offset,
2937                          tree function)
2938 {
2939   /* The this pointer is always in x0.  Note that this differs from
2940      Arm where the this pointer maybe bumped to r1 if r0 is required
2941      to return a pointer to an aggregate.  On AArch64 a result value
2942      pointer will be in x8.  */
2943   int this_regno = R0_REGNUM;
2944   rtx this_rtx, temp0, temp1, addr, funexp;
2945   rtx_insn *insn;
2946
2947   reload_completed = 1;
2948   emit_note (NOTE_INSN_PROLOGUE_END);
2949
2950   if (vcall_offset == 0)
2951     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2952   else
2953     {
2954       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2955
2956       this_rtx = gen_rtx_REG (Pmode, this_regno);
2957       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2958       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2959
2960       addr = this_rtx;
2961       if (delta != 0)
2962         {
2963           if (delta >= -256 && delta < 256)
2964             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2965                                        plus_constant (Pmode, this_rtx, delta));
2966           else
2967             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2968         }
2969
2970       if (Pmode == ptr_mode)
2971         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2972       else
2973         aarch64_emit_move (temp0,
2974                            gen_rtx_ZERO_EXTEND (Pmode,
2975                                                 gen_rtx_MEM (ptr_mode, addr)));
2976
2977       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2978           addr = plus_constant (Pmode, temp0, vcall_offset);
2979       else
2980         {
2981           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2982           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2983         }
2984
2985       if (Pmode == ptr_mode)
2986         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2987       else
2988         aarch64_emit_move (temp1,
2989                            gen_rtx_SIGN_EXTEND (Pmode,
2990                                                 gen_rtx_MEM (ptr_mode, addr)));
2991
2992       emit_insn (gen_add2_insn (this_rtx, temp1));
2993     }
2994
2995   /* Generate a tail call to the target function.  */
2996   if (!TREE_USED (function))
2997     {
2998       assemble_external (function);
2999       TREE_USED (function) = 1;
3000     }
3001   funexp = XEXP (DECL_RTL (function), 0);
3002   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3003   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3004   SIBLING_CALL_P (insn) = 1;
3005
3006   insn = get_insns ();
3007   shorten_branches (insn);
3008   final_start_function (insn, file, 1);
3009   final (insn, file, 1);
3010   final_end_function ();
3011
3012   /* Stop pretending to be a post-reload pass.  */
3013   reload_completed = 0;
3014 }
3015
3016 static bool
3017 aarch64_tls_referenced_p (rtx x)
3018 {
3019   if (!TARGET_HAVE_TLS)
3020     return false;
3021   subrtx_iterator::array_type array;
3022   FOR_EACH_SUBRTX (iter, array, x, ALL)
3023     {
3024       const_rtx x = *iter;
3025       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3026         return true;
3027       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3028          TLS offsets, not real symbol references.  */
3029       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3030         iter.skip_subrtxes ();
3031     }
3032   return false;
3033 }
3034
3035
3036 static int
3037 aarch64_bitmasks_cmp (const void *i1, const void *i2)
3038 {
3039   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3040   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3041
3042   if (*imm1 < *imm2)
3043     return -1;
3044   if (*imm1 > *imm2)
3045     return +1;
3046   return 0;
3047 }
3048
3049
3050 static void
3051 aarch64_build_bitmask_table (void)
3052 {
3053   unsigned HOST_WIDE_INT mask, imm;
3054   unsigned int log_e, e, s, r;
3055   unsigned int nimms = 0;
3056
3057   for (log_e = 1; log_e <= 6; log_e++)
3058     {
3059       e = 1 << log_e;
3060       if (e == 64)
3061         mask = ~(HOST_WIDE_INT) 0;
3062       else
3063         mask = ((HOST_WIDE_INT) 1 << e) - 1;
3064       for (s = 1; s < e; s++)
3065         {
3066           for (r = 0; r < e; r++)
3067             {
3068               /* set s consecutive bits to 1 (s < 64) */
3069               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3070               /* rotate right by r */
3071               if (r != 0)
3072                 imm = ((imm >> r) | (imm << (e - r))) & mask;
3073               /* replicate the constant depending on SIMD size */
3074               switch (log_e) {
3075               case 1: imm |= (imm <<  2);
3076               case 2: imm |= (imm <<  4);
3077               case 3: imm |= (imm <<  8);
3078               case 4: imm |= (imm << 16);
3079               case 5: imm |= (imm << 32);
3080               case 6:
3081                 break;
3082               default:
3083                 gcc_unreachable ();
3084               }
3085               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3086               aarch64_bitmasks[nimms++] = imm;
3087             }
3088         }
3089     }
3090
3091   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3092   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3093          aarch64_bitmasks_cmp);
3094 }
3095
3096
3097 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3098    a left shift of 0 or 12 bits.  */
3099 bool
3100 aarch64_uimm12_shift (HOST_WIDE_INT val)
3101 {
3102   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3103           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3104           );
3105 }
3106
3107
3108 /* Return true if val is an immediate that can be loaded into a
3109    register by a MOVZ instruction.  */
3110 static bool
3111 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3112 {
3113   if (GET_MODE_SIZE (mode) > 4)
3114     {
3115       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3116           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3117         return 1;
3118     }
3119   else
3120     {
3121       /* Ignore sign extension.  */
3122       val &= (HOST_WIDE_INT) 0xffffffff;
3123     }
3124   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3125           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3126 }
3127
3128
3129 /* Return true if val is a valid bitmask immediate.  */
3130 bool
3131 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3132 {
3133   if (GET_MODE_SIZE (mode) < 8)
3134     {
3135       /* Replicate bit pattern.  */
3136       val &= (HOST_WIDE_INT) 0xffffffff;
3137       val |= val << 32;
3138     }
3139   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3140                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3141 }
3142
3143
3144 /* Return true if val is an immediate that can be loaded into a
3145    register in a single instruction.  */
3146 bool
3147 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3148 {
3149   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3150     return 1;
3151   return aarch64_bitmask_imm (val, mode);
3152 }
3153
3154 static bool
3155 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3156 {
3157   rtx base, offset;
3158
3159   if (GET_CODE (x) == HIGH)
3160     return true;
3161
3162   split_const (x, &base, &offset);
3163   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3164     {
3165       if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3166           != SYMBOL_FORCE_TO_MEM)
3167         return true;
3168       else
3169         /* Avoid generating a 64-bit relocation in ILP32; leave
3170            to aarch64_expand_mov_immediate to handle it properly.  */
3171         return mode != ptr_mode;
3172     }
3173
3174   return aarch64_tls_referenced_p (x);
3175 }
3176
3177 /* Return true if register REGNO is a valid index register.
3178    STRICT_P is true if REG_OK_STRICT is in effect.  */
3179
3180 bool
3181 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3182 {
3183   if (!HARD_REGISTER_NUM_P (regno))
3184     {
3185       if (!strict_p)
3186         return true;
3187
3188       if (!reg_renumber)
3189         return false;
3190
3191       regno = reg_renumber[regno];
3192     }
3193   return GP_REGNUM_P (regno);
3194 }
3195
3196 /* Return true if register REGNO is a valid base register for mode MODE.
3197    STRICT_P is true if REG_OK_STRICT is in effect.  */
3198
3199 bool
3200 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3201 {
3202   if (!HARD_REGISTER_NUM_P (regno))
3203     {
3204       if (!strict_p)
3205         return true;
3206
3207       if (!reg_renumber)
3208         return false;
3209
3210       regno = reg_renumber[regno];
3211     }
3212
3213   /* The fake registers will be eliminated to either the stack or
3214      hard frame pointer, both of which are usually valid base registers.
3215      Reload deals with the cases where the eliminated form isn't valid.  */
3216   return (GP_REGNUM_P (regno)
3217           || regno == SP_REGNUM
3218           || regno == FRAME_POINTER_REGNUM
3219           || regno == ARG_POINTER_REGNUM);
3220 }
3221
3222 /* Return true if X is a valid base register for mode MODE.
3223    STRICT_P is true if REG_OK_STRICT is in effect.  */
3224
3225 static bool
3226 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3227 {
3228   if (!strict_p && GET_CODE (x) == SUBREG)
3229     x = SUBREG_REG (x);
3230
3231   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3232 }
3233
3234 /* Return true if address offset is a valid index.  If it is, fill in INFO
3235    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
3236
3237 static bool
3238 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3239                         machine_mode mode, bool strict_p)
3240 {
3241   enum aarch64_address_type type;
3242   rtx index;
3243   int shift;
3244
3245   /* (reg:P) */
3246   if ((REG_P (x) || GET_CODE (x) == SUBREG)
3247       && GET_MODE (x) == Pmode)
3248     {
3249       type = ADDRESS_REG_REG;
3250       index = x;
3251       shift = 0;
3252     }
3253   /* (sign_extend:DI (reg:SI)) */
3254   else if ((GET_CODE (x) == SIGN_EXTEND
3255             || GET_CODE (x) == ZERO_EXTEND)
3256            && GET_MODE (x) == DImode
3257            && GET_MODE (XEXP (x, 0)) == SImode)
3258     {
3259       type = (GET_CODE (x) == SIGN_EXTEND)
3260         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3261       index = XEXP (x, 0);
3262       shift = 0;
3263     }
3264   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3265   else if (GET_CODE (x) == MULT
3266            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3267                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3268            && GET_MODE (XEXP (x, 0)) == DImode
3269            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3270            && CONST_INT_P (XEXP (x, 1)))
3271     {
3272       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3273         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3274       index = XEXP (XEXP (x, 0), 0);
3275       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3276     }
3277   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3278   else if (GET_CODE (x) == ASHIFT
3279            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3280                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3281            && GET_MODE (XEXP (x, 0)) == DImode
3282            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3283            && CONST_INT_P (XEXP (x, 1)))
3284     {
3285       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3286         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3287       index = XEXP (XEXP (x, 0), 0);
3288       shift = INTVAL (XEXP (x, 1));
3289     }
3290   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3291   else if ((GET_CODE (x) == SIGN_EXTRACT
3292             || GET_CODE (x) == ZERO_EXTRACT)
3293            && GET_MODE (x) == DImode
3294            && GET_CODE (XEXP (x, 0)) == MULT
3295            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3296            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3297     {
3298       type = (GET_CODE (x) == SIGN_EXTRACT)
3299         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3300       index = XEXP (XEXP (x, 0), 0);
3301       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3302       if (INTVAL (XEXP (x, 1)) != 32 + shift
3303           || INTVAL (XEXP (x, 2)) != 0)
3304         shift = -1;
3305     }
3306   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3307      (const_int 0xffffffff<<shift)) */
3308   else if (GET_CODE (x) == AND
3309            && GET_MODE (x) == DImode
3310            && GET_CODE (XEXP (x, 0)) == MULT
3311            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3312            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3313            && CONST_INT_P (XEXP (x, 1)))
3314     {
3315       type = ADDRESS_REG_UXTW;
3316       index = XEXP (XEXP (x, 0), 0);
3317       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3318       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3319         shift = -1;
3320     }
3321   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3322   else if ((GET_CODE (x) == SIGN_EXTRACT
3323             || GET_CODE (x) == ZERO_EXTRACT)
3324            && GET_MODE (x) == DImode
3325            && GET_CODE (XEXP (x, 0)) == ASHIFT
3326            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3327            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3328     {
3329       type = (GET_CODE (x) == SIGN_EXTRACT)
3330         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3331       index = XEXP (XEXP (x, 0), 0);
3332       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3333       if (INTVAL (XEXP (x, 1)) != 32 + shift
3334           || INTVAL (XEXP (x, 2)) != 0)
3335         shift = -1;
3336     }
3337   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3338      (const_int 0xffffffff<<shift)) */
3339   else if (GET_CODE (x) == AND
3340            && GET_MODE (x) == DImode
3341            && GET_CODE (XEXP (x, 0)) == ASHIFT
3342            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3343            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3344            && CONST_INT_P (XEXP (x, 1)))
3345     {
3346       type = ADDRESS_REG_UXTW;
3347       index = XEXP (XEXP (x, 0), 0);
3348       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3349       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3350         shift = -1;
3351     }
3352   /* (mult:P (reg:P) (const_int scale)) */
3353   else if (GET_CODE (x) == MULT
3354            && GET_MODE (x) == Pmode
3355            && GET_MODE (XEXP (x, 0)) == Pmode
3356            && CONST_INT_P (XEXP (x, 1)))
3357     {
3358       type = ADDRESS_REG_REG;
3359       index = XEXP (x, 0);
3360       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3361     }
3362   /* (ashift:P (reg:P) (const_int shift)) */
3363   else if (GET_CODE (x) == ASHIFT
3364            && GET_MODE (x) == Pmode
3365            && GET_MODE (XEXP (x, 0)) == Pmode
3366            && CONST_INT_P (XEXP (x, 1)))
3367     {
3368       type = ADDRESS_REG_REG;
3369       index = XEXP (x, 0);
3370       shift = INTVAL (XEXP (x, 1));
3371     }
3372   else
3373     return false;
3374
3375   if (GET_CODE (index) == SUBREG)
3376     index = SUBREG_REG (index);
3377
3378   if ((shift == 0 ||
3379        (shift > 0 && shift <= 3
3380         && (1 << shift) == GET_MODE_SIZE (mode)))
3381       && REG_P (index)
3382       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3383     {
3384       info->type = type;
3385       info->offset = index;
3386       info->shift = shift;
3387       return true;
3388     }
3389
3390   return false;
3391 }
3392
3393 bool
3394 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3395 {
3396   return (offset >= -64 * GET_MODE_SIZE (mode)
3397           && offset < 64 * GET_MODE_SIZE (mode)
3398           && offset % GET_MODE_SIZE (mode) == 0);
3399 }
3400
3401 static inline bool
3402 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3403                                HOST_WIDE_INT offset)
3404 {
3405   return offset >= -256 && offset < 256;
3406 }
3407
3408 static inline bool
3409 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3410 {
3411   return (offset >= 0
3412           && offset < 4096 * GET_MODE_SIZE (mode)
3413           && offset % GET_MODE_SIZE (mode) == 0);
3414 }
3415
3416 /* Return true if X is a valid address for machine mode MODE.  If it is,
3417    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3418    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3419
3420 static bool
3421 aarch64_classify_address (struct aarch64_address_info *info,
3422                           rtx x, machine_mode mode,
3423                           RTX_CODE outer_code, bool strict_p)
3424 {
3425   enum rtx_code code = GET_CODE (x);
3426   rtx op0, op1;
3427
3428   /* On BE, we use load/store pair for all large int mode load/stores.  */
3429   bool load_store_pair_p = (outer_code == PARALLEL
3430                             || (BYTES_BIG_ENDIAN
3431                                 && aarch64_vect_struct_mode_p (mode)));
3432
3433   bool allow_reg_index_p =
3434     !load_store_pair_p
3435     && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3436     && !aarch64_vect_struct_mode_p (mode);
3437
3438   /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3439      REG addressing.  */
3440   if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
3441       && (code != POST_INC && code != REG))
3442     return false;
3443
3444   switch (code)
3445     {
3446     case REG:
3447     case SUBREG:
3448       info->type = ADDRESS_REG_IMM;
3449       info->base = x;
3450       info->offset = const0_rtx;
3451       return aarch64_base_register_rtx_p (x, strict_p);
3452
3453     case PLUS:
3454       op0 = XEXP (x, 0);
3455       op1 = XEXP (x, 1);
3456
3457       if (! strict_p
3458           && REG_P (op0)
3459           && (op0 == virtual_stack_vars_rtx
3460               || op0 == frame_pointer_rtx
3461               || op0 == arg_pointer_rtx)
3462           && CONST_INT_P (op1))
3463         {
3464           info->type = ADDRESS_REG_IMM;
3465           info->base = op0;
3466           info->offset = op1;
3467
3468           return true;
3469         }
3470
3471       if (GET_MODE_SIZE (mode) != 0
3472           && CONST_INT_P (op1)
3473           && aarch64_base_register_rtx_p (op0, strict_p))
3474         {
3475           HOST_WIDE_INT offset = INTVAL (op1);
3476
3477           info->type = ADDRESS_REG_IMM;
3478           info->base = op0;
3479           info->offset = op1;
3480
3481           /* TImode and TFmode values are allowed in both pairs of X
3482              registers and individual Q registers.  The available
3483              address modes are:
3484              X,X: 7-bit signed scaled offset
3485              Q:   9-bit signed offset
3486              We conservatively require an offset representable in either mode.
3487            */
3488           if (mode == TImode || mode == TFmode)
3489             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3490                     && offset_9bit_signed_unscaled_p (mode, offset));
3491
3492           /* A 7bit offset check because OImode will emit a ldp/stp
3493              instruction (only big endian will get here).
3494              For ldp/stp instructions, the offset is scaled for the size of a
3495              single element of the pair.  */
3496           if (mode == OImode)
3497             return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3498
3499           /* Three 9/12 bit offsets checks because CImode will emit three
3500              ldr/str instructions (only big endian will get here).  */
3501           if (mode == CImode)
3502             return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3503                     && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3504                         || offset_12bit_unsigned_scaled_p (V16QImode,
3505                                                            offset + 32)));
3506
3507           /* Two 7bit offsets checks because XImode will emit two ldp/stp
3508              instructions (only big endian will get here).  */
3509           if (mode == XImode)
3510             return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3511                     && aarch64_offset_7bit_signed_scaled_p (TImode,
3512                                                             offset + 32));
3513
3514           if (load_store_pair_p)
3515             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3516                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3517           else
3518             return (offset_9bit_signed_unscaled_p (mode, offset)
3519                     || offset_12bit_unsigned_scaled_p (mode, offset));
3520         }
3521
3522       if (allow_reg_index_p)
3523         {
3524           /* Look for base + (scaled/extended) index register.  */
3525           if (aarch64_base_register_rtx_p (op0, strict_p)
3526               && aarch64_classify_index (info, op1, mode, strict_p))
3527             {
3528               info->base = op0;
3529               return true;
3530             }
3531           if (aarch64_base_register_rtx_p (op1, strict_p)
3532               && aarch64_classify_index (info, op0, mode, strict_p))
3533             {
3534               info->base = op1;
3535               return true;
3536             }
3537         }
3538
3539       return false;
3540
3541     case POST_INC:
3542     case POST_DEC:
3543     case PRE_INC:
3544     case PRE_DEC:
3545       info->type = ADDRESS_REG_WB;
3546       info->base = XEXP (x, 0);
3547       info->offset = NULL_RTX;
3548       return aarch64_base_register_rtx_p (info->base, strict_p);
3549
3550     case POST_MODIFY:
3551     case PRE_MODIFY:
3552       info->type = ADDRESS_REG_WB;
3553       info->base = XEXP (x, 0);
3554       if (GET_CODE (XEXP (x, 1)) == PLUS
3555           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3556           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3557           && aarch64_base_register_rtx_p (info->base, strict_p))
3558         {
3559           HOST_WIDE_INT offset;
3560           info->offset = XEXP (XEXP (x, 1), 1);
3561           offset = INTVAL (info->offset);
3562
3563           /* TImode and TFmode values are allowed in both pairs of X
3564              registers and individual Q registers.  The available
3565              address modes are:
3566              X,X: 7-bit signed scaled offset
3567              Q:   9-bit signed offset
3568              We conservatively require an offset representable in either mode.
3569            */
3570           if (mode == TImode || mode == TFmode)
3571             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3572                     && offset_9bit_signed_unscaled_p (mode, offset));
3573
3574           if (load_store_pair_p)
3575             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3576                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3577           else
3578             return offset_9bit_signed_unscaled_p (mode, offset);
3579         }
3580       return false;
3581
3582     case CONST:
3583     case SYMBOL_REF:
3584     case LABEL_REF:
3585       /* load literal: pc-relative constant pool entry.  Only supported
3586          for SI mode or larger.  */
3587       info->type = ADDRESS_SYMBOLIC;
3588
3589       if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
3590         {
3591           rtx sym, addend;
3592
3593           split_const (x, &sym, &addend);
3594           return (GET_CODE (sym) == LABEL_REF
3595                   || (GET_CODE (sym) == SYMBOL_REF
3596                       && CONSTANT_POOL_ADDRESS_P (sym)));
3597         }
3598       return false;
3599
3600     case LO_SUM:
3601       info->type = ADDRESS_LO_SUM;
3602       info->base = XEXP (x, 0);
3603       info->offset = XEXP (x, 1);
3604       if (allow_reg_index_p
3605           && aarch64_base_register_rtx_p (info->base, strict_p))
3606         {
3607           rtx sym, offs;
3608           split_const (info->offset, &sym, &offs);
3609           if (GET_CODE (sym) == SYMBOL_REF
3610               && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3611                   == SYMBOL_SMALL_ABSOLUTE))
3612             {
3613               /* The symbol and offset must be aligned to the access size.  */
3614               unsigned int align;
3615               unsigned int ref_size;
3616
3617               if (CONSTANT_POOL_ADDRESS_P (sym))
3618                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3619               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3620                 {
3621                   tree exp = SYMBOL_REF_DECL (sym);
3622                   align = TYPE_ALIGN (TREE_TYPE (exp));
3623                   align = CONSTANT_ALIGNMENT (exp, align);
3624                 }
3625               else if (SYMBOL_REF_DECL (sym))
3626                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3627               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3628                        && SYMBOL_REF_BLOCK (sym) != NULL)
3629                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3630               else
3631                 align = BITS_PER_UNIT;
3632
3633               ref_size = GET_MODE_SIZE (mode);
3634               if (ref_size == 0)
3635                 ref_size = GET_MODE_SIZE (DImode);
3636
3637               return ((INTVAL (offs) & (ref_size - 1)) == 0
3638                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3639             }
3640         }
3641       return false;
3642
3643     default:
3644       return false;
3645     }
3646 }
3647
3648 bool
3649 aarch64_symbolic_address_p (rtx x)
3650 {
3651   rtx offset;
3652
3653   split_const (x, &x, &offset);
3654   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3655 }
3656
3657 /* Classify the base of symbolic expression X, given that X appears in
3658    context CONTEXT.  */
3659
3660 enum aarch64_symbol_type
3661 aarch64_classify_symbolic_expression (rtx x,
3662                                       enum aarch64_symbol_context context)
3663 {
3664   rtx offset;
3665
3666   split_const (x, &x, &offset);
3667   return aarch64_classify_symbol (x, offset, context);
3668 }
3669
3670
3671 /* Return TRUE if X is a legitimate address for accessing memory in
3672    mode MODE.  */
3673 static bool
3674 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3675 {
3676   struct aarch64_address_info addr;
3677
3678   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3679 }
3680
3681 /* Return TRUE if X is a legitimate address for accessing memory in
3682    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3683    pair operation.  */
3684 bool
3685 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3686                               RTX_CODE outer_code, bool strict_p)
3687 {
3688   struct aarch64_address_info addr;
3689
3690   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3691 }
3692
3693 /* Return TRUE if rtx X is immediate constant 0.0 */
3694 bool
3695 aarch64_float_const_zero_rtx_p (rtx x)
3696 {
3697   REAL_VALUE_TYPE r;
3698
3699   if (GET_MODE (x) == VOIDmode)
3700     return false;
3701
3702   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3703   if (REAL_VALUE_MINUS_ZERO (r))
3704     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3705   return REAL_VALUES_EQUAL (r, dconst0);
3706 }
3707
3708 /* Return the fixed registers used for condition codes.  */
3709
3710 static bool
3711 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3712 {
3713   *p1 = CC_REGNUM;
3714   *p2 = INVALID_REGNUM;
3715   return true;
3716 }
3717
3718 /* Emit call insn with PAT and do aarch64-specific handling.  */
3719
3720 void
3721 aarch64_emit_call_insn (rtx pat)
3722 {
3723   rtx insn = emit_call_insn (pat);
3724
3725   rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3726   clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3727   clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3728 }
3729
3730 machine_mode
3731 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3732 {
3733   /* All floating point compares return CCFP if it is an equality
3734      comparison, and CCFPE otherwise.  */
3735   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3736     {
3737       switch (code)
3738         {
3739         case EQ:
3740         case NE:
3741         case UNORDERED:
3742         case ORDERED:
3743         case UNLT:
3744         case UNLE:
3745         case UNGT:
3746         case UNGE:
3747         case UNEQ:
3748         case LTGT:
3749           return CCFPmode;
3750
3751         case LT:
3752         case LE:
3753         case GT:
3754         case GE:
3755           return CCFPEmode;
3756
3757         default:
3758           gcc_unreachable ();
3759         }
3760     }
3761
3762   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3763       && y == const0_rtx
3764       && (code == EQ || code == NE || code == LT || code == GE)
3765       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3766           || GET_CODE (x) == NEG))
3767     return CC_NZmode;
3768
3769   /* A compare with a shifted operand.  Because of canonicalization,
3770      the comparison will have to be swapped when we emit the assembly
3771      code.  */
3772   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3773       && (REG_P (y) || GET_CODE (y) == SUBREG)
3774       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3775           || GET_CODE (x) == LSHIFTRT
3776           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3777     return CC_SWPmode;
3778
3779   /* Similarly for a negated operand, but we can only do this for
3780      equalities.  */
3781   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3782       && (REG_P (y) || GET_CODE (y) == SUBREG)
3783       && (code == EQ || code == NE)
3784       && GET_CODE (x) == NEG)
3785     return CC_Zmode;
3786
3787   /* A compare of a mode narrower than SI mode against zero can be done
3788      by extending the value in the comparison.  */
3789   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3790       && y == const0_rtx)
3791     /* Only use sign-extension if we really need it.  */
3792     return ((code == GT || code == GE || code == LE || code == LT)
3793             ? CC_SESWPmode : CC_ZESWPmode);
3794
3795   /* For everything else, return CCmode.  */
3796   return CCmode;
3797 }
3798
3799 static int
3800 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3801
3802 int
3803 aarch64_get_condition_code (rtx x)
3804 {
3805   machine_mode mode = GET_MODE (XEXP (x, 0));
3806   enum rtx_code comp_code = GET_CODE (x);
3807
3808   if (GET_MODE_CLASS (mode) != MODE_CC)
3809     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3810   return aarch64_get_condition_code_1 (mode, comp_code);
3811 }
3812
3813 static int
3814 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3815 {
3816   int ne = -1, eq = -1;
3817   switch (mode)
3818     {
3819     case CCFPmode:
3820     case CCFPEmode:
3821       switch (comp_code)
3822         {
3823         case GE: return AARCH64_GE;
3824         case GT: return AARCH64_GT;
3825         case LE: return AARCH64_LS;
3826         case LT: return AARCH64_MI;
3827         case NE: return AARCH64_NE;
3828         case EQ: return AARCH64_EQ;
3829         case ORDERED: return AARCH64_VC;
3830         case UNORDERED: return AARCH64_VS;
3831         case UNLT: return AARCH64_LT;
3832         case UNLE: return AARCH64_LE;
3833         case UNGT: return AARCH64_HI;
3834         case UNGE: return AARCH64_PL;
3835         default: return -1;
3836         }
3837       break;
3838
3839     case CC_DNEmode:
3840       ne = AARCH64_NE;
3841       eq = AARCH64_EQ;
3842       break;
3843
3844     case CC_DEQmode:
3845       ne = AARCH64_EQ;
3846       eq = AARCH64_NE;
3847       break;
3848
3849     case CC_DGEmode:
3850       ne = AARCH64_GE;
3851       eq = AARCH64_LT;
3852       break;
3853
3854     case CC_DLTmode:
3855       ne = AARCH64_LT;
3856       eq = AARCH64_GE;
3857       break;
3858
3859     case CC_DGTmode:
3860       ne = AARCH64_GT;
3861       eq = AARCH64_LE;
3862       break;
3863
3864     case CC_DLEmode:
3865       ne = AARCH64_LE;
3866       eq = AARCH64_GT;
3867       break;
3868
3869     case CC_DGEUmode:
3870       ne = AARCH64_CS;
3871       eq = AARCH64_CC;
3872       break;
3873
3874     case CC_DLTUmode:
3875       ne = AARCH64_CC;
3876       eq = AARCH64_CS;
3877       break;
3878
3879     case CC_DGTUmode:
3880       ne = AARCH64_HI;
3881       eq = AARCH64_LS;
3882       break;
3883
3884     case CC_DLEUmode:
3885       ne = AARCH64_LS;
3886       eq = AARCH64_HI;
3887       break;
3888
3889     case CCmode:
3890       switch (comp_code)
3891         {
3892         case NE: return AARCH64_NE;
3893         case EQ: return AARCH64_EQ;
3894         case GE: return AARCH64_GE;
3895         case GT: return AARCH64_GT;
3896         case LE: return AARCH64_LE;
3897         case LT: return AARCH64_LT;
3898         case GEU: return AARCH64_CS;
3899         case GTU: return AARCH64_HI;
3900         case LEU: return AARCH64_LS;
3901         case LTU: return AARCH64_CC;
3902         default: return -1;
3903         }
3904       break;
3905
3906     case CC_SWPmode:
3907     case CC_ZESWPmode:
3908     case CC_SESWPmode:
3909       switch (comp_code)
3910         {
3911         case NE: return AARCH64_NE;
3912         case EQ: return AARCH64_EQ;
3913         case GE: return AARCH64_LE;
3914         case GT: return AARCH64_LT;
3915         case LE: return AARCH64_GE;
3916         case LT: return AARCH64_GT;
3917         case GEU: return AARCH64_LS;
3918         case GTU: return AARCH64_CC;
3919         case LEU: return AARCH64_CS;
3920         case LTU: return AARCH64_HI;
3921         default: return -1;
3922         }
3923       break;
3924
3925     case CC_NZmode:
3926       switch (comp_code)
3927         {
3928         case NE: return AARCH64_NE;
3929         case EQ: return AARCH64_EQ;
3930         case GE: return AARCH64_PL;
3931         case LT: return AARCH64_MI;
3932         default: return -1;
3933         }
3934       break;
3935
3936     case CC_Zmode:
3937       switch (comp_code)
3938         {
3939         case NE: return AARCH64_NE;
3940         case EQ: return AARCH64_EQ;
3941         default: return -1;
3942         }
3943       break;
3944
3945     default:
3946       return -1;
3947       break;
3948     }
3949
3950   if (comp_code == NE)
3951     return ne;
3952
3953   if (comp_code == EQ)
3954     return eq;
3955
3956   return -1;
3957 }
3958
3959 bool
3960 aarch64_const_vec_all_same_in_range_p (rtx x,
3961                                   HOST_WIDE_INT minval,
3962                                   HOST_WIDE_INT maxval)
3963 {
3964   HOST_WIDE_INT firstval;
3965   int count, i;
3966
3967   if (GET_CODE (x) != CONST_VECTOR
3968       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3969     return false;
3970
3971   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3972   if (firstval < minval || firstval > maxval)
3973     return false;
3974
3975   count = CONST_VECTOR_NUNITS (x);
3976   for (i = 1; i < count; i++)
3977     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3978       return false;
3979
3980   return true;
3981 }
3982
3983 bool
3984 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3985 {
3986   return aarch64_const_vec_all_same_in_range_p (x, val, val);
3987 }
3988
3989 static unsigned
3990 bit_count (unsigned HOST_WIDE_INT value)
3991 {
3992   unsigned count = 0;
3993
3994   while (value)
3995     {
3996       count++;
3997       value &= value - 1;
3998     }
3999
4000   return count;
4001 }
4002
4003 /* N Z C V.  */
4004 #define AARCH64_CC_V 1
4005 #define AARCH64_CC_C (1 << 1)
4006 #define AARCH64_CC_Z (1 << 2)
4007 #define AARCH64_CC_N (1 << 3)
4008
4009 /* N Z C V flags for ccmp.  The first code is for AND op and the other
4010    is for IOR op.  Indexed by AARCH64_COND_CODE.  */
4011 static const int aarch64_nzcv_codes[][2] =
4012 {
4013   {AARCH64_CC_Z, 0}, /* EQ, Z == 1.  */
4014   {0, AARCH64_CC_Z}, /* NE, Z == 0.  */
4015   {AARCH64_CC_C, 0}, /* CS, C == 1.  */
4016   {0, AARCH64_CC_C}, /* CC, C == 0.  */
4017   {AARCH64_CC_N, 0}, /* MI, N == 1.  */
4018   {0, AARCH64_CC_N}, /* PL, N == 0.  */
4019   {AARCH64_CC_V, 0}, /* VS, V == 1.  */
4020   {0, AARCH64_CC_V}, /* VC, V == 0.  */
4021   {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0.  */
4022   {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0).  */
4023   {0, AARCH64_CC_V}, /* GE, N == V.  */
4024   {AARCH64_CC_V, 0}, /* LT, N != V.  */
4025   {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V.  */
4026   {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V).  */
4027   {0, 0}, /* AL, Any.  */
4028   {0, 0}, /* NV, Any.  */
4029 };
4030
4031 int
4032 aarch64_ccmp_mode_to_code (enum machine_mode mode)
4033 {
4034   switch (mode)
4035     {
4036     case CC_DNEmode:
4037       return NE;
4038
4039     case CC_DEQmode:
4040       return EQ;
4041
4042     case CC_DLEmode:
4043       return LE;
4044
4045     case CC_DGTmode:
4046       return GT;
4047
4048     case CC_DLTmode:
4049       return LT;
4050
4051     case CC_DGEmode:
4052       return GE;
4053
4054     case CC_DLEUmode:
4055       return LEU;
4056
4057     case CC_DGTUmode:
4058       return GTU;
4059
4060     case CC_DLTUmode:
4061       return LTU;
4062
4063     case CC_DGEUmode:
4064       return GEU;
4065
4066     default:
4067       gcc_unreachable ();
4068     }
4069 }
4070
4071
4072 void
4073 aarch64_print_operand (FILE *f, rtx x, char code)
4074 {
4075   switch (code)
4076     {
4077     /* An integer or symbol address without a preceding # sign.  */
4078     case 'c':
4079       switch (GET_CODE (x))
4080         {
4081         case CONST_INT:
4082           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4083           break;
4084
4085         case SYMBOL_REF:
4086           output_addr_const (f, x);
4087           break;
4088
4089         case CONST:
4090           if (GET_CODE (XEXP (x, 0)) == PLUS
4091               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4092             {
4093               output_addr_const (f, x);
4094               break;
4095             }
4096           /* Fall through.  */
4097
4098         default:
4099           output_operand_lossage ("Unsupported operand for code '%c'", code);
4100         }
4101       break;
4102
4103     case 'e':
4104       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
4105       {
4106         int n;
4107
4108         if (!CONST_INT_P (x)
4109             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4110           {
4111             output_operand_lossage ("invalid operand for '%%%c'", code);
4112             return;
4113           }
4114
4115         switch (n)
4116           {
4117           case 3:
4118             fputc ('b', f);
4119             break;
4120           case 4:
4121             fputc ('h', f);
4122             break;
4123           case 5:
4124             fputc ('w', f);
4125             break;
4126           default:
4127             output_operand_lossage ("invalid operand for '%%%c'", code);
4128             return;
4129           }
4130       }
4131       break;
4132
4133     case 'p':
4134       {
4135         int n;
4136
4137         /* Print N such that 2^N == X.  */
4138         if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4139           {
4140             output_operand_lossage ("invalid operand for '%%%c'", code);
4141             return;
4142           }
4143
4144         asm_fprintf (f, "%d", n);
4145       }
4146       break;
4147
4148     case 'P':
4149       /* Print the number of non-zero bits in X (a const_int).  */
4150       if (!CONST_INT_P (x))
4151         {
4152           output_operand_lossage ("invalid operand for '%%%c'", code);
4153           return;
4154         }
4155
4156       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4157       break;
4158
4159     case 'H':
4160       /* Print the higher numbered register of a pair (TImode) of regs.  */
4161       if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4162         {
4163           output_operand_lossage ("invalid operand for '%%%c'", code);
4164           return;
4165         }
4166
4167       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4168       break;
4169
4170     case 'm':
4171       {
4172         int cond_code;
4173         /* Print a condition (eq, ne, etc).  */
4174
4175         /* CONST_TRUE_RTX means always -- that's the default.  */
4176         if (x == const_true_rtx)
4177           return;
4178
4179         if (!COMPARISON_P (x))
4180           {
4181             output_operand_lossage ("invalid operand for '%%%c'", code);
4182             return;
4183           }
4184
4185         cond_code = aarch64_get_condition_code (x);
4186         gcc_assert (cond_code >= 0);
4187         fputs (aarch64_condition_codes[cond_code], f);
4188       }
4189       break;
4190
4191     case 'M':
4192       {
4193         int cond_code;
4194         /* Print the inverse of a condition (eq <-> ne, etc).  */
4195
4196         /* CONST_TRUE_RTX means never -- that's the default.  */
4197         if (x == const_true_rtx)
4198           {
4199             fputs ("nv", f);
4200             return;
4201           }
4202
4203         if (!COMPARISON_P (x))
4204           {
4205             output_operand_lossage ("invalid operand for '%%%c'", code);
4206             return;
4207           }
4208         cond_code = aarch64_get_condition_code (x);
4209         gcc_assert (cond_code >= 0);
4210         fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4211                                        (cond_code)], f);
4212       }
4213       break;
4214
4215     case 'b':
4216     case 'h':
4217     case 's':
4218     case 'd':
4219     case 'q':
4220       /* Print a scalar FP/SIMD register name.  */
4221       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4222         {
4223           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4224           return;
4225         }
4226       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4227       break;
4228
4229     case 'S':
4230     case 'T':
4231     case 'U':
4232     case 'V':
4233       /* Print the first FP/SIMD register name in a list.  */
4234       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4235         {
4236           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4237           return;
4238         }
4239       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4240       break;
4241
4242     case 'R':
4243       /* Print a scalar FP/SIMD register name + 1.  */
4244       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4245         {
4246           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4247           return;
4248         }
4249       asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4250       break;
4251
4252     case 'X':
4253       /* Print bottom 16 bits of integer constant in hex.  */
4254       if (!CONST_INT_P (x))
4255         {
4256           output_operand_lossage ("invalid operand for '%%%c'", code);
4257           return;
4258         }
4259       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4260       break;
4261
4262     case 'w':
4263     case 'x':
4264       /* Print a general register name or the zero register (32-bit or
4265          64-bit).  */
4266       if (x == const0_rtx
4267           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4268         {
4269           asm_fprintf (f, "%czr", code);
4270           break;
4271         }
4272
4273       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4274         {
4275           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4276           break;
4277         }
4278
4279       if (REG_P (x) && REGNO (x) == SP_REGNUM)
4280         {
4281           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4282           break;
4283         }
4284
4285       /* Fall through */
4286
4287     case 0:
4288       /* Print a normal operand, if it's a general register, then we
4289          assume DImode.  */
4290       if (x == NULL)
4291         {
4292           output_operand_lossage ("missing operand");
4293           return;
4294         }
4295
4296       switch (GET_CODE (x))
4297         {
4298         case REG:
4299           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4300           break;
4301
4302         case MEM:
4303           aarch64_memory_reference_mode = GET_MODE (x);
4304           output_address (XEXP (x, 0));
4305           break;
4306
4307         case LABEL_REF:
4308         case SYMBOL_REF:
4309           output_addr_const (asm_out_file, x);
4310           break;
4311
4312         case CONST_INT:
4313           asm_fprintf (f, "%wd", INTVAL (x));
4314           break;
4315
4316         case CONST_VECTOR:
4317           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4318             {
4319               gcc_assert (
4320                   aarch64_const_vec_all_same_in_range_p (x,
4321                                                          HOST_WIDE_INT_MIN,
4322                                                          HOST_WIDE_INT_MAX));
4323               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4324             }
4325           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4326             {
4327               fputc ('0', f);
4328             }
4329           else
4330             gcc_unreachable ();
4331           break;
4332
4333         case CONST_DOUBLE:
4334           /* CONST_DOUBLE can represent a double-width integer.
4335              In this case, the mode of x is VOIDmode.  */
4336           if (GET_MODE (x) == VOIDmode)
4337             ; /* Do Nothing.  */
4338           else if (aarch64_float_const_zero_rtx_p (x))
4339             {
4340               fputc ('0', f);
4341               break;
4342             }
4343           else if (aarch64_float_const_representable_p (x))
4344             {
4345 #define buf_size 20
4346               char float_buf[buf_size] = {'\0'};
4347               REAL_VALUE_TYPE r;
4348               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4349               real_to_decimal_for_mode (float_buf, &r,
4350                                         buf_size, buf_size,
4351                                         1, GET_MODE (x));
4352               asm_fprintf (asm_out_file, "%s", float_buf);
4353               break;
4354 #undef buf_size
4355             }
4356           output_operand_lossage ("invalid constant");
4357           return;
4358         default:
4359           output_operand_lossage ("invalid operand");
4360           return;
4361         }
4362       break;
4363
4364     case 'A':
4365       if (GET_CODE (x) == HIGH)
4366         x = XEXP (x, 0);
4367
4368       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4369         {
4370         case SYMBOL_SMALL_GOT:
4371           asm_fprintf (asm_out_file, ":got:");
4372           break;
4373
4374         case SYMBOL_SMALL_TLSGD:
4375           asm_fprintf (asm_out_file, ":tlsgd:");
4376           break;
4377
4378         case SYMBOL_SMALL_TLSDESC:
4379           asm_fprintf (asm_out_file, ":tlsdesc:");
4380           break;
4381
4382         case SYMBOL_SMALL_GOTTPREL:
4383           asm_fprintf (asm_out_file, ":gottprel:");
4384           break;
4385
4386         case SYMBOL_SMALL_TPREL:
4387           asm_fprintf (asm_out_file, ":tprel:");
4388           break;
4389
4390         case SYMBOL_TINY_GOT:
4391           gcc_unreachable ();
4392           break;
4393
4394         default:
4395           break;
4396         }
4397       output_addr_const (asm_out_file, x);
4398       break;
4399
4400     case 'L':
4401       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4402         {
4403         case SYMBOL_SMALL_GOT:
4404           asm_fprintf (asm_out_file, ":lo12:");
4405           break;
4406
4407         case SYMBOL_SMALL_TLSGD:
4408           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4409           break;
4410
4411         case SYMBOL_SMALL_TLSDESC:
4412           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4413           break;
4414
4415         case SYMBOL_SMALL_GOTTPREL:
4416           asm_fprintf (asm_out_file, ":gottprel_lo12:");
4417           break;
4418
4419         case SYMBOL_SMALL_TPREL:
4420           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4421           break;
4422
4423         case SYMBOL_TINY_GOT:
4424           asm_fprintf (asm_out_file, ":got:");
4425           break;
4426
4427         default:
4428           break;
4429         }
4430       output_addr_const (asm_out_file, x);
4431       break;
4432
4433     case 'G':
4434
4435       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4436         {
4437         case SYMBOL_SMALL_TPREL:
4438           asm_fprintf (asm_out_file, ":tprel_hi12:");
4439           break;
4440         default:
4441           break;
4442         }
4443       output_addr_const (asm_out_file, x);
4444       break;
4445
4446     case 'K':
4447       {
4448         int cond_code;
4449         /* Print nzcv.  */
4450
4451         if (!COMPARISON_P (x))
4452           {
4453             output_operand_lossage ("invalid operand for '%%%c'", code);
4454             return;
4455           }
4456
4457         cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4458         gcc_assert (cond_code >= 0);
4459         asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4460       }
4461       break;
4462
4463     case 'k':
4464       {
4465         int cond_code;
4466         /* Print nzcv.  */
4467
4468         if (!COMPARISON_P (x))
4469           {
4470             output_operand_lossage ("invalid operand for '%%%c'", code);
4471             return;
4472           }
4473
4474         cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4475         gcc_assert (cond_code >= 0);
4476         asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4477       }
4478       break;
4479
4480     default:
4481       output_operand_lossage ("invalid operand prefix '%%%c'", code);
4482       return;
4483     }
4484 }
4485
4486 void
4487 aarch64_print_operand_address (FILE *f, rtx x)
4488 {
4489   struct aarch64_address_info addr;
4490
4491   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4492                              MEM, true))
4493     switch (addr.type)
4494       {
4495       case ADDRESS_REG_IMM:
4496         if (addr.offset == const0_rtx)
4497           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4498         else
4499           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4500                        INTVAL (addr.offset));
4501         return;
4502
4503       case ADDRESS_REG_REG:
4504         if (addr.shift == 0)
4505           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4506                        reg_names [REGNO (addr.offset)]);
4507         else
4508           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4509                        reg_names [REGNO (addr.offset)], addr.shift);
4510         return;
4511
4512       case ADDRESS_REG_UXTW:
4513         if (addr.shift == 0)
4514           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4515                        REGNO (addr.offset) - R0_REGNUM);
4516         else
4517           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4518                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4519         return;
4520
4521       case ADDRESS_REG_SXTW:
4522         if (addr.shift == 0)
4523           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4524                        REGNO (addr.offset) - R0_REGNUM);
4525         else
4526           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4527                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4528         return;
4529
4530       case ADDRESS_REG_WB:
4531         switch (GET_CODE (x))
4532           {
4533           case PRE_INC:
4534             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4535                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4536             return;
4537           case POST_INC:
4538             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4539                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4540             return;
4541           case PRE_DEC:
4542             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4543                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4544             return;
4545           case POST_DEC:
4546             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4547                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4548             return;
4549           case PRE_MODIFY:
4550             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4551                          INTVAL (addr.offset));
4552             return;
4553           case POST_MODIFY:
4554             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4555                          INTVAL (addr.offset));
4556             return;
4557           default:
4558             break;
4559           }
4560         break;
4561
4562       case ADDRESS_LO_SUM:
4563         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4564         output_addr_const (f, addr.offset);
4565         asm_fprintf (f, "]");
4566         return;
4567
4568       case ADDRESS_SYMBOLIC:
4569         break;
4570       }
4571
4572   output_addr_const (f, x);
4573 }
4574
4575 bool
4576 aarch64_label_mentioned_p (rtx x)
4577 {
4578   const char *fmt;
4579   int i;
4580
4581   if (GET_CODE (x) == LABEL_REF)
4582     return true;
4583
4584   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4585      referencing instruction, but they are constant offsets, not
4586      symbols.  */
4587   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4588     return false;
4589
4590   fmt = GET_RTX_FORMAT (GET_CODE (x));
4591   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4592     {
4593       if (fmt[i] == 'E')
4594         {
4595           int j;
4596
4597           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4598             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4599               return 1;
4600         }
4601       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4602         return 1;
4603     }
4604
4605   return 0;
4606 }
4607
4608 /* Implement REGNO_REG_CLASS.  */
4609
4610 enum reg_class
4611 aarch64_regno_regclass (unsigned regno)
4612 {
4613   if (GP_REGNUM_P (regno))
4614     return GENERAL_REGS;
4615
4616   if (regno == SP_REGNUM)
4617     return STACK_REG;
4618
4619   if (regno == FRAME_POINTER_REGNUM
4620       || regno == ARG_POINTER_REGNUM)
4621     return POINTER_REGS;
4622
4623   if (FP_REGNUM_P (regno))
4624     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4625
4626   return NO_REGS;
4627 }
4628
4629 static rtx
4630 aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
4631 {
4632   /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4633      where mask is selected by alignment and size of the offset.
4634      We try to pick as large a range for the offset as possible to
4635      maximize the chance of a CSE.  However, for aligned addresses
4636      we limit the range to 4k so that structures with different sized
4637      elements are likely to use the same base.  */
4638
4639   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4640     {
4641       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4642       HOST_WIDE_INT base_offset;
4643
4644       /* Does it look like we'll need a load/store-pair operation?  */
4645       if (GET_MODE_SIZE (mode) > 16
4646           || mode == TImode)
4647         base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4648                        & ~((128 * GET_MODE_SIZE (mode)) - 1));
4649       /* For offsets aren't a multiple of the access size, the limit is
4650          -256...255.  */
4651       else if (offset & (GET_MODE_SIZE (mode) - 1))
4652         base_offset = (offset + 0x100) & ~0x1ff;
4653       else
4654         base_offset = offset & ~0xfff;
4655
4656       if (base_offset == 0)
4657         return x;
4658
4659       offset -= base_offset;
4660       rtx base_reg = gen_reg_rtx (Pmode);
4661       rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4662                            NULL_RTX);
4663       emit_move_insn (base_reg, val);
4664       x = plus_constant (Pmode, base_reg, offset);
4665     }
4666
4667   return x;
4668 }
4669
4670 /* Try a machine-dependent way of reloading an illegitimate address
4671    operand.  If we find one, push the reload and return the new rtx.  */
4672
4673 rtx
4674 aarch64_legitimize_reload_address (rtx *x_p,
4675                                    machine_mode mode,
4676                                    int opnum, int type,
4677                                    int ind_levels ATTRIBUTE_UNUSED)
4678 {
4679   rtx x = *x_p;
4680
4681   /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
4682   if (aarch64_vect_struct_mode_p (mode)
4683       && GET_CODE (x) == PLUS
4684       && REG_P (XEXP (x, 0))
4685       && CONST_INT_P (XEXP (x, 1)))
4686     {
4687       rtx orig_rtx = x;
4688       x = copy_rtx (x);
4689       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4690                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4691                    opnum, (enum reload_type) type);
4692       return x;
4693     }
4694
4695   /* We must recognize output that we have already generated ourselves.  */
4696   if (GET_CODE (x) == PLUS
4697       && GET_CODE (XEXP (x, 0)) == PLUS
4698       && REG_P (XEXP (XEXP (x, 0), 0))
4699       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4700       && CONST_INT_P (XEXP (x, 1)))
4701     {
4702       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4703                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4704                    opnum, (enum reload_type) type);
4705       return x;
4706     }
4707
4708   /* We wish to handle large displacements off a base register by splitting
4709      the addend across an add and the mem insn.  This can cut the number of
4710      extra insns needed from 3 to 1.  It is only useful for load/store of a
4711      single register with 12 bit offset field.  */
4712   if (GET_CODE (x) == PLUS
4713       && REG_P (XEXP (x, 0))
4714       && CONST_INT_P (XEXP (x, 1))
4715       && HARD_REGISTER_P (XEXP (x, 0))
4716       && mode != TImode
4717       && mode != TFmode
4718       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4719     {
4720       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4721       HOST_WIDE_INT low = val & 0xfff;
4722       HOST_WIDE_INT high = val - low;
4723       HOST_WIDE_INT offs;
4724       rtx cst;
4725       machine_mode xmode = GET_MODE (x);
4726
4727       /* In ILP32, xmode can be either DImode or SImode.  */
4728       gcc_assert (xmode == DImode || xmode == SImode);
4729
4730       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4731          BLKmode alignment.  */
4732       if (GET_MODE_SIZE (mode) == 0)
4733         return NULL_RTX;
4734
4735       offs = low % GET_MODE_SIZE (mode);
4736
4737       /* Align misaligned offset by adjusting high part to compensate.  */
4738       if (offs != 0)
4739         {
4740           if (aarch64_uimm12_shift (high + offs))
4741             {
4742               /* Align down.  */
4743               low = low - offs;
4744               high = high + offs;
4745             }
4746           else
4747             {
4748               /* Align up.  */
4749               offs = GET_MODE_SIZE (mode) - offs;
4750               low = low + offs;
4751               high = high + (low & 0x1000) - offs;
4752               low &= 0xfff;
4753             }
4754         }
4755
4756       /* Check for overflow.  */
4757       if (high + low != val)
4758         return NULL_RTX;
4759
4760       cst = GEN_INT (high);
4761       if (!aarch64_uimm12_shift (high))
4762         cst = force_const_mem (xmode, cst);
4763
4764       /* Reload high part into base reg, leaving the low part
4765          in the mem instruction.
4766          Note that replacing this gen_rtx_PLUS with plus_constant is
4767          wrong in this case because we rely on the
4768          (plus (plus reg c1) c2) structure being preserved so that
4769          XEXP (*p, 0) in push_reload below uses the correct term.  */
4770       x = gen_rtx_PLUS (xmode,
4771                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4772                         GEN_INT (low));
4773
4774       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4775                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4776                    opnum, (enum reload_type) type);
4777       return x;
4778     }
4779
4780   return NULL_RTX;
4781 }
4782
4783
4784 static reg_class_t
4785 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4786                           reg_class_t rclass,
4787                           machine_mode mode,
4788                           secondary_reload_info *sri)
4789 {
4790   /* Without the TARGET_SIMD instructions we cannot move a Q register
4791      to a Q register directly.  We need a scratch.  */
4792   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4793       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4794       && reg_class_subset_p (rclass, FP_REGS))
4795     {
4796       if (mode == TFmode)
4797         sri->icode = CODE_FOR_aarch64_reload_movtf;
4798       else if (mode == TImode)
4799         sri->icode = CODE_FOR_aarch64_reload_movti;
4800       return NO_REGS;
4801     }
4802
4803   /* A TFmode or TImode memory access should be handled via an FP_REGS
4804      because AArch64 has richer addressing modes for LDR/STR instructions
4805      than LDP/STP instructions.  */
4806   if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4807       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4808     return FP_REGS;
4809
4810   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4811       return GENERAL_REGS;
4812
4813   return NO_REGS;
4814 }
4815
4816 static bool
4817 aarch64_can_eliminate (const int from, const int to)
4818 {
4819   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4820      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4821
4822   if (frame_pointer_needed)
4823     {
4824       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4825         return true;
4826       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4827         return false;
4828       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4829           && !cfun->calls_alloca)
4830         return true;
4831       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4832         return true;
4833
4834       return false;
4835     }
4836   else
4837     {
4838       /* If we decided that we didn't need a leaf frame pointer but then used
4839          LR in the function, then we'll want a frame pointer after all, so
4840          prevent this elimination to ensure a frame pointer is used.  */
4841       if (to == STACK_POINTER_REGNUM
4842           && flag_omit_leaf_frame_pointer
4843           && df_regs_ever_live_p (LR_REGNUM))
4844         return false;
4845     }
4846
4847   return true;
4848 }
4849
4850 HOST_WIDE_INT
4851 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4852 {
4853   aarch64_layout_frame ();
4854
4855   if (to == HARD_FRAME_POINTER_REGNUM)
4856     {
4857       if (from == ARG_POINTER_REGNUM)
4858         return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4859
4860       if (from == FRAME_POINTER_REGNUM)
4861         return (cfun->machine->frame.hard_fp_offset
4862                 - cfun->machine->frame.saved_varargs_size);
4863     }
4864
4865   if (to == STACK_POINTER_REGNUM)
4866     {
4867       if (from == FRAME_POINTER_REGNUM)
4868           return (cfun->machine->frame.frame_size
4869                   - cfun->machine->frame.saved_varargs_size);
4870     }
4871
4872   return cfun->machine->frame.frame_size;
4873 }
4874
4875 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4876    previous frame.  */
4877
4878 rtx
4879 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4880 {
4881   if (count != 0)
4882     return const0_rtx;
4883   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4884 }
4885
4886
4887 static void
4888 aarch64_asm_trampoline_template (FILE *f)
4889 {
4890   if (TARGET_ILP32)
4891     {
4892       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4893       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4894     }
4895   else
4896     {
4897       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4898       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4899     }
4900   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4901   assemble_aligned_integer (4, const0_rtx);
4902   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4903   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4904 }
4905
4906 static void
4907 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4908 {
4909   rtx fnaddr, mem, a_tramp;
4910   const int tramp_code_sz = 16;
4911
4912   /* Don't need to copy the trailing D-words, we fill those in below.  */
4913   emit_block_move (m_tramp, assemble_trampoline_template (),
4914                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4915   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4916   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4917   if (GET_MODE (fnaddr) != ptr_mode)
4918     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4919   emit_move_insn (mem, fnaddr);
4920
4921   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4922   emit_move_insn (mem, chain_value);
4923
4924   /* XXX We should really define a "clear_cache" pattern and use
4925      gen_clear_cache().  */
4926   a_tramp = XEXP (m_tramp, 0);
4927   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4928                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4929                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4930                      ptr_mode);
4931 }
4932
4933 static unsigned char
4934 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4935 {
4936   switch (regclass)
4937     {
4938     case CALLER_SAVE_REGS:
4939     case POINTER_REGS:
4940     case GENERAL_REGS:
4941     case ALL_REGS:
4942     case FP_REGS:
4943     case FP_LO_REGS:
4944       return
4945         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4946                                        (GET_MODE_SIZE (mode) + 7) / 8;
4947     case STACK_REG:
4948       return 1;
4949
4950     case NO_REGS:
4951       return 0;
4952
4953     default:
4954       break;
4955     }
4956   gcc_unreachable ();
4957 }
4958
4959 static reg_class_t
4960 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4961 {
4962   if (regclass == POINTER_REGS)
4963     return GENERAL_REGS;
4964
4965   if (regclass == STACK_REG)
4966     {
4967       if (REG_P(x)
4968           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4969           return regclass;
4970
4971       return NO_REGS;
4972     }
4973
4974   /* If it's an integer immediate that MOVI can't handle, then
4975      FP_REGS is not an option, so we return NO_REGS instead.  */
4976   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4977       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4978     return NO_REGS;
4979
4980   /* Register eliminiation can result in a request for
4981      SP+constant->FP_REGS.  We cannot support such operations which
4982      use SP as source and an FP_REG as destination, so reject out
4983      right now.  */
4984   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4985     {
4986       rtx lhs = XEXP (x, 0);
4987
4988       /* Look through a possible SUBREG introduced by ILP32.  */
4989       if (GET_CODE (lhs) == SUBREG)
4990         lhs = SUBREG_REG (lhs);
4991
4992       gcc_assert (REG_P (lhs));
4993       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4994                                       POINTER_REGS));
4995       return NO_REGS;
4996     }
4997
4998   return regclass;
4999 }
5000
5001 void
5002 aarch64_asm_output_labelref (FILE* f, const char *name)
5003 {
5004   asm_fprintf (f, "%U%s", name);
5005 }
5006
5007 static void
5008 aarch64_elf_asm_constructor (rtx symbol, int priority)
5009 {
5010   if (priority == DEFAULT_INIT_PRIORITY)
5011     default_ctor_section_asm_out_constructor (symbol, priority);
5012   else
5013     {
5014       section *s;
5015       char buf[18];
5016       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5017       s = get_section (buf, SECTION_WRITE, NULL);
5018       switch_to_section (s);
5019       assemble_align (POINTER_SIZE);
5020       assemble_aligned_integer (POINTER_BYTES, symbol);
5021     }
5022 }
5023
5024 static void
5025 aarch64_elf_asm_destructor (rtx symbol, int priority)
5026 {
5027   if (priority == DEFAULT_INIT_PRIORITY)
5028     default_dtor_section_asm_out_destructor (symbol, priority);
5029   else
5030     {
5031       section *s;
5032       char buf[18];
5033       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5034       s = get_section (buf, SECTION_WRITE, NULL);
5035       switch_to_section (s);
5036       assemble_align (POINTER_SIZE);
5037       assemble_aligned_integer (POINTER_BYTES, symbol);
5038     }
5039 }
5040
5041 const char*
5042 aarch64_output_casesi (rtx *operands)
5043 {
5044   char buf[100];
5045   char label[100];
5046   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
5047   int index;
5048   static const char *const patterns[4][2] =
5049   {
5050     {
5051       "ldrb\t%w3, [%0,%w1,uxtw]",
5052       "add\t%3, %4, %w3, sxtb #2"
5053     },
5054     {
5055       "ldrh\t%w3, [%0,%w1,uxtw #1]",
5056       "add\t%3, %4, %w3, sxth #2"
5057     },
5058     {
5059       "ldr\t%w3, [%0,%w1,uxtw #2]",
5060       "add\t%3, %4, %w3, sxtw #2"
5061     },
5062     /* We assume that DImode is only generated when not optimizing and
5063        that we don't really need 64-bit address offsets.  That would
5064        imply an object file with 8GB of code in a single function!  */
5065     {
5066       "ldr\t%w3, [%0,%w1,uxtw #2]",
5067       "add\t%3, %4, %w3, sxtw #2"
5068     }
5069   };
5070
5071   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5072
5073   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5074
5075   gcc_assert (index >= 0 && index <= 3);
5076
5077   /* Need to implement table size reduction, by chaning the code below.  */
5078   output_asm_insn (patterns[index][0], operands);
5079   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5080   snprintf (buf, sizeof (buf),
5081             "adr\t%%4, %s", targetm.strip_name_encoding (label));
5082   output_asm_insn (buf, operands);
5083   output_asm_insn (patterns[index][1], operands);
5084   output_asm_insn ("br\t%3", operands);
5085   assemble_label (asm_out_file, label);
5086   return "";
5087 }
5088
5089
5090 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5091    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5092    operator.  */
5093
5094 int
5095 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5096 {
5097   if (shift >= 0 && shift <= 3)
5098     {
5099       int size;
5100       for (size = 8; size <= 32; size *= 2)
5101         {
5102           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5103           if (mask == bits << shift)
5104             return size;
5105         }
5106     }
5107   return 0;
5108 }
5109
5110 static bool
5111 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
5112                                    const_rtx x ATTRIBUTE_UNUSED)
5113 {
5114   /* We can't use blocks for constants when we're using a per-function
5115      constant pool.  */
5116   return false;
5117 }
5118
5119 static section *
5120 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5121                             rtx x ATTRIBUTE_UNUSED,
5122                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5123 {
5124   /* Force all constant pool entries into the current function section.  */
5125   return function_section (current_function_decl);
5126 }
5127
5128
5129 /* Costs.  */
5130
5131 /* Helper function for rtx cost calculation.  Strip a shift expression
5132    from X.  Returns the inner operand if successful, or the original
5133    expression on failure.  */
5134 static rtx
5135 aarch64_strip_shift (rtx x)
5136 {
5137   rtx op = x;
5138
5139   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5140      we can convert both to ROR during final output.  */
5141   if ((GET_CODE (op) == ASHIFT
5142        || GET_CODE (op) == ASHIFTRT
5143        || GET_CODE (op) == LSHIFTRT
5144        || GET_CODE (op) == ROTATERT
5145        || GET_CODE (op) == ROTATE)
5146       && CONST_INT_P (XEXP (op, 1)))
5147     return XEXP (op, 0);
5148
5149   if (GET_CODE (op) == MULT
5150       && CONST_INT_P (XEXP (op, 1))
5151       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5152     return XEXP (op, 0);
5153
5154   return x;
5155 }
5156
5157 /* Helper function for rtx cost calculation.  Strip an extend
5158    expression from X.  Returns the inner operand if successful, or the
5159    original expression on failure.  We deal with a number of possible
5160    canonicalization variations here.  */
5161 static rtx
5162 aarch64_strip_extend (rtx x)
5163 {
5164   rtx op = x;
5165
5166   /* Zero and sign extraction of a widened value.  */
5167   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5168       && XEXP (op, 2) == const0_rtx
5169       && GET_CODE (XEXP (op, 0)) == MULT
5170       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5171                                          XEXP (op, 1)))
5172     return XEXP (XEXP (op, 0), 0);
5173
5174   /* It can also be represented (for zero-extend) as an AND with an
5175      immediate.  */
5176   if (GET_CODE (op) == AND
5177       && GET_CODE (XEXP (op, 0)) == MULT
5178       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5179       && CONST_INT_P (XEXP (op, 1))
5180       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5181                            INTVAL (XEXP (op, 1))) != 0)
5182     return XEXP (XEXP (op, 0), 0);
5183
5184   /* Now handle extended register, as this may also have an optional
5185      left shift by 1..4.  */
5186   if (GET_CODE (op) == ASHIFT
5187       && CONST_INT_P (XEXP (op, 1))
5188       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5189     op = XEXP (op, 0);
5190
5191   if (GET_CODE (op) == ZERO_EXTEND
5192       || GET_CODE (op) == SIGN_EXTEND)
5193     op = XEXP (op, 0);
5194
5195   if (op != x)
5196     return op;
5197
5198   return x;
5199 }
5200
5201 /* Helper function for rtx cost calculation.  Calculate the cost of
5202    a MULT, which may be part of a multiply-accumulate rtx.  Return
5203    the calculated cost of the expression, recursing manually in to
5204    operands where needed.  */
5205
5206 static int
5207 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5208 {
5209   rtx op0, op1;
5210   const struct cpu_cost_table *extra_cost
5211     = aarch64_tune_params->insn_extra_cost;
5212   int cost = 0;
5213   bool maybe_fma = (outer == PLUS || outer == MINUS);
5214   machine_mode mode = GET_MODE (x);
5215
5216   gcc_checking_assert (code == MULT);
5217
5218   op0 = XEXP (x, 0);
5219   op1 = XEXP (x, 1);
5220
5221   if (VECTOR_MODE_P (mode))
5222     mode = GET_MODE_INNER (mode);
5223
5224   /* Integer multiply/fma.  */
5225   if (GET_MODE_CLASS (mode) == MODE_INT)
5226     {
5227       /* The multiply will be canonicalized as a shift, cost it as such.  */
5228       if (CONST_INT_P (op1)
5229           && exact_log2 (INTVAL (op1)) > 0)
5230         {
5231           if (speed)
5232             {
5233               if (maybe_fma)
5234                 /* ADD (shifted register).  */
5235                 cost += extra_cost->alu.arith_shift;
5236               else
5237                 /* LSL (immediate).  */
5238                 cost += extra_cost->alu.shift;
5239             }
5240
5241           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5242
5243           return cost;
5244         }
5245
5246       /* Integer multiplies or FMAs have zero/sign extending variants.  */
5247       if ((GET_CODE (op0) == ZERO_EXTEND
5248            && GET_CODE (op1) == ZERO_EXTEND)
5249           || (GET_CODE (op0) == SIGN_EXTEND
5250               && GET_CODE (op1) == SIGN_EXTEND))
5251         {
5252           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5253                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5254
5255           if (speed)
5256             {
5257               if (maybe_fma)
5258                 /* MADD/SMADDL/UMADDL.  */
5259                 cost += extra_cost->mult[0].extend_add;
5260               else
5261                 /* MUL/SMULL/UMULL.  */
5262                 cost += extra_cost->mult[0].extend;
5263             }
5264
5265           return cost;
5266         }
5267
5268       /* This is either an integer multiply or an FMA.  In both cases
5269          we want to recurse and cost the operands.  */
5270       cost += rtx_cost (op0, MULT, 0, speed)
5271               + rtx_cost (op1, MULT, 1, speed);
5272
5273       if (speed)
5274         {
5275           if (maybe_fma)
5276             /* MADD.  */
5277             cost += extra_cost->mult[mode == DImode].add;
5278           else
5279             /* MUL.  */
5280             cost += extra_cost->mult[mode == DImode].simple;
5281         }
5282
5283       return cost;
5284     }
5285   else
5286     {
5287       if (speed)
5288         {
5289           /* Floating-point FMA/FMUL can also support negations of the
5290              operands.  */
5291           if (GET_CODE (op0) == NEG)
5292             op0 = XEXP (op0, 0);
5293           if (GET_CODE (op1) == NEG)
5294             op1 = XEXP (op1, 0);
5295
5296           if (maybe_fma)
5297             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
5298             cost += extra_cost->fp[mode == DFmode].fma;
5299           else
5300             /* FMUL/FNMUL.  */
5301             cost += extra_cost->fp[mode == DFmode].mult;
5302         }
5303
5304       cost += rtx_cost (op0, MULT, 0, speed)
5305               + rtx_cost (op1, MULT, 1, speed);
5306       return cost;
5307     }
5308 }
5309
5310 static int
5311 aarch64_address_cost (rtx x,
5312                       machine_mode mode,
5313                       addr_space_t as ATTRIBUTE_UNUSED,
5314                       bool speed)
5315 {
5316   enum rtx_code c = GET_CODE (x);
5317   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5318   struct aarch64_address_info info;
5319   int cost = 0;
5320   info.shift = 0;
5321
5322   if (!aarch64_classify_address (&info, x, mode, c, false))
5323     {
5324       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5325         {
5326           /* This is a CONST or SYMBOL ref which will be split
5327              in a different way depending on the code model in use.
5328              Cost it through the generic infrastructure.  */
5329           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5330           /* Divide through by the cost of one instruction to
5331              bring it to the same units as the address costs.  */
5332           cost_symbol_ref /= COSTS_N_INSNS (1);
5333           /* The cost is then the cost of preparing the address,
5334              followed by an immediate (possibly 0) offset.  */
5335           return cost_symbol_ref + addr_cost->imm_offset;
5336         }
5337       else
5338         {
5339           /* This is most likely a jump table from a case
5340              statement.  */
5341           return addr_cost->register_offset;
5342         }
5343     }
5344
5345   switch (info.type)
5346     {
5347       case ADDRESS_LO_SUM:
5348       case ADDRESS_SYMBOLIC:
5349       case ADDRESS_REG_IMM:
5350         cost += addr_cost->imm_offset;
5351         break;
5352
5353       case ADDRESS_REG_WB:
5354         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5355           cost += addr_cost->pre_modify;
5356         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5357           cost += addr_cost->post_modify;
5358         else
5359           gcc_unreachable ();
5360
5361         break;
5362
5363       case ADDRESS_REG_REG:
5364         cost += addr_cost->register_offset;
5365         break;
5366
5367       case ADDRESS_REG_UXTW:
5368       case ADDRESS_REG_SXTW:
5369         cost += addr_cost->register_extend;
5370         break;
5371
5372       default:
5373         gcc_unreachable ();
5374     }
5375
5376
5377   if (info.shift > 0)
5378     {
5379       /* For the sake of calculating the cost of the shifted register
5380          component, we can treat same sized modes in the same way.  */
5381       switch (GET_MODE_BITSIZE (mode))
5382         {
5383           case 16:
5384             cost += addr_cost->addr_scale_costs.hi;
5385             break;
5386
5387           case 32:
5388             cost += addr_cost->addr_scale_costs.si;
5389             break;
5390
5391           case 64:
5392             cost += addr_cost->addr_scale_costs.di;
5393             break;
5394
5395           /* We can't tell, or this is a 128-bit vector.  */
5396           default:
5397             cost += addr_cost->addr_scale_costs.ti;
5398             break;
5399         }
5400     }
5401
5402   return cost;
5403 }
5404
5405 /* Return true if the RTX X in mode MODE is a zero or sign extract
5406    usable in an ADD or SUB (extended register) instruction.  */
5407 static bool
5408 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5409 {
5410   /* Catch add with a sign extract.
5411      This is add_<optab><mode>_multp2.  */
5412   if (GET_CODE (x) == SIGN_EXTRACT
5413       || GET_CODE (x) == ZERO_EXTRACT)
5414     {
5415       rtx op0 = XEXP (x, 0);
5416       rtx op1 = XEXP (x, 1);
5417       rtx op2 = XEXP (x, 2);
5418
5419       if (GET_CODE (op0) == MULT
5420           && CONST_INT_P (op1)
5421           && op2 == const0_rtx
5422           && CONST_INT_P (XEXP (op0, 1))
5423           && aarch64_is_extend_from_extract (mode,
5424                                              XEXP (op0, 1),
5425                                              op1))
5426         {
5427           return true;
5428         }
5429     }
5430
5431   return false;
5432 }
5433
5434 static bool
5435 aarch64_frint_unspec_p (unsigned int u)
5436 {
5437   switch (u)
5438     {
5439       case UNSPEC_FRINTZ:
5440       case UNSPEC_FRINTP:
5441       case UNSPEC_FRINTM:
5442       case UNSPEC_FRINTA:
5443       case UNSPEC_FRINTN:
5444       case UNSPEC_FRINTX:
5445       case UNSPEC_FRINTI:
5446         return true;
5447
5448       default:
5449         return false;
5450     }
5451 }
5452
5453 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5454    storing it in *COST.  Result is true if the total cost of the operation
5455    has now been calculated.  */
5456 static bool
5457 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5458 {
5459   rtx inner;
5460   rtx comparator;
5461   enum rtx_code cmpcode;
5462
5463   if (COMPARISON_P (op0))
5464     {
5465       inner = XEXP (op0, 0);
5466       comparator = XEXP (op0, 1);
5467       cmpcode = GET_CODE (op0);
5468     }
5469   else
5470     {
5471       inner = op0;
5472       comparator = const0_rtx;
5473       cmpcode = NE;
5474     }
5475
5476   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5477     {
5478       /* Conditional branch.  */
5479       if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5480         return true;
5481       else
5482         {
5483           if (cmpcode == NE || cmpcode == EQ)
5484             {
5485               if (comparator == const0_rtx)
5486                 {
5487                   /* TBZ/TBNZ/CBZ/CBNZ.  */
5488                   if (GET_CODE (inner) == ZERO_EXTRACT)
5489                     /* TBZ/TBNZ.  */
5490                     *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5491                                        0, speed);
5492                 else
5493                   /* CBZ/CBNZ.  */
5494                   *cost += rtx_cost (inner, cmpcode, 0, speed);
5495
5496                 return true;
5497               }
5498             }
5499           else if (cmpcode == LT || cmpcode == GE)
5500             {
5501               /* TBZ/TBNZ.  */
5502               if (comparator == const0_rtx)
5503                 return true;
5504             }
5505         }
5506     }
5507   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5508     {
5509       /* It's a conditional operation based on the status flags,
5510          so it must be some flavor of CSEL.  */
5511
5512       /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
5513       if (GET_CODE (op1) == NEG
5514           || GET_CODE (op1) == NOT
5515           || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5516         op1 = XEXP (op1, 0);
5517
5518       *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5519       *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5520       return true;
5521     }
5522
5523   /* We don't know what this is, cost all operands.  */
5524   return false;
5525 }
5526
5527 /* Calculate the cost of calculating X, storing it in *COST.  Result
5528    is true if the total cost of the operation has now been calculated.  */
5529 static bool
5530 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5531                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5532 {
5533   rtx op0, op1, op2;
5534   const struct cpu_cost_table *extra_cost
5535     = aarch64_tune_params->insn_extra_cost;
5536   machine_mode mode = GET_MODE (x);
5537
5538   /* By default, assume that everything has equivalent cost to the
5539      cheapest instruction.  Any additional costs are applied as a delta
5540      above this default.  */
5541   *cost = COSTS_N_INSNS (1);
5542
5543   /* TODO: The cost infrastructure currently does not handle
5544      vector operations.  Assume that all vector operations
5545      are equally expensive.  */
5546   if (VECTOR_MODE_P (mode))
5547     {
5548       if (speed)
5549         *cost += extra_cost->vect.alu;
5550       return true;
5551     }
5552
5553   switch (code)
5554     {
5555     case SET:
5556       /* The cost depends entirely on the operands to SET.  */
5557       *cost = 0;
5558       op0 = SET_DEST (x);
5559       op1 = SET_SRC (x);
5560
5561       switch (GET_CODE (op0))
5562         {
5563         case MEM:
5564           if (speed)
5565             {
5566               rtx address = XEXP (op0, 0);
5567               if (GET_MODE_CLASS (mode) == MODE_INT)
5568                 *cost += extra_cost->ldst.store;
5569               else if (mode == SFmode)
5570                 *cost += extra_cost->ldst.storef;
5571               else if (mode == DFmode)
5572                 *cost += extra_cost->ldst.stored;
5573
5574               *cost +=
5575                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5576                                                      0, speed));
5577             }
5578
5579           *cost += rtx_cost (op1, SET, 1, speed);
5580           return true;
5581
5582         case SUBREG:
5583           if (! REG_P (SUBREG_REG (op0)))
5584             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5585
5586           /* Fall through.  */
5587         case REG:
5588           /* const0_rtx is in general free, but we will use an
5589              instruction to set a register to 0.  */
5590           if (REG_P (op1) || op1 == const0_rtx)
5591             {
5592               /* The cost is 1 per register copied.  */
5593               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5594                               / UNITS_PER_WORD;
5595               *cost = COSTS_N_INSNS (n_minus_1 + 1);
5596             }
5597           else
5598             /* Cost is just the cost of the RHS of the set.  */
5599             *cost += rtx_cost (op1, SET, 1, speed);
5600           return true;
5601
5602         case ZERO_EXTRACT:
5603         case SIGN_EXTRACT:
5604           /* Bit-field insertion.  Strip any redundant widening of
5605              the RHS to meet the width of the target.  */
5606           if (GET_CODE (op1) == SUBREG)
5607             op1 = SUBREG_REG (op1);
5608           if ((GET_CODE (op1) == ZERO_EXTEND
5609                || GET_CODE (op1) == SIGN_EXTEND)
5610               && CONST_INT_P (XEXP (op0, 1))
5611               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5612                   >= INTVAL (XEXP (op0, 1))))
5613             op1 = XEXP (op1, 0);
5614
5615           if (CONST_INT_P (op1))
5616             {
5617               /* MOV immediate is assumed to always be cheap.  */
5618               *cost = COSTS_N_INSNS (1);
5619             }
5620           else
5621             {
5622               /* BFM.  */
5623               if (speed)
5624                 *cost += extra_cost->alu.bfi;
5625               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5626             }
5627
5628           return true;
5629
5630         default:
5631           /* We can't make sense of this, assume default cost.  */
5632           *cost = COSTS_N_INSNS (1);
5633           return false;
5634         }
5635       return false;
5636
5637     case CONST_INT:
5638       /* If an instruction can incorporate a constant within the
5639          instruction, the instruction's expression avoids calling
5640          rtx_cost() on the constant.  If rtx_cost() is called on a
5641          constant, then it is usually because the constant must be
5642          moved into a register by one or more instructions.
5643
5644          The exception is constant 0, which can be expressed
5645          as XZR/WZR and is therefore free.  The exception to this is
5646          if we have (set (reg) (const0_rtx)) in which case we must cost
5647          the move.  However, we can catch that when we cost the SET, so
5648          we don't need to consider that here.  */
5649       if (x == const0_rtx)
5650         *cost = 0;
5651       else
5652         {
5653           /* To an approximation, building any other constant is
5654              proportionally expensive to the number of instructions
5655              required to build that constant.  This is true whether we
5656              are compiling for SPEED or otherwise.  */
5657           *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5658                                  (NULL_RTX, x, false, mode));
5659         }
5660       return true;
5661
5662     case CONST_DOUBLE:
5663       if (speed)
5664         {
5665           /* mov[df,sf]_aarch64.  */
5666           if (aarch64_float_const_representable_p (x))
5667             /* FMOV (scalar immediate).  */
5668             *cost += extra_cost->fp[mode == DFmode].fpconst;
5669           else if (!aarch64_float_const_zero_rtx_p (x))
5670             {
5671               /* This will be a load from memory.  */
5672               if (mode == DFmode)
5673                 *cost += extra_cost->ldst.loadd;
5674               else
5675                 *cost += extra_cost->ldst.loadf;
5676             }
5677           else
5678             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5679                or MOV v0.s[0], wzr - neither of which are modeled by the
5680                cost tables.  Just use the default cost.  */
5681             {
5682             }
5683         }
5684
5685       return true;
5686
5687     case MEM:
5688       if (speed)
5689         {
5690           /* For loads we want the base cost of a load, plus an
5691              approximation for the additional cost of the addressing
5692              mode.  */
5693           rtx address = XEXP (x, 0);
5694           if (GET_MODE_CLASS (mode) == MODE_INT)
5695             *cost += extra_cost->ldst.load;
5696           else if (mode == SFmode)
5697             *cost += extra_cost->ldst.loadf;
5698           else if (mode == DFmode)
5699             *cost += extra_cost->ldst.loadd;
5700
5701           *cost +=
5702                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5703                                                      0, speed));
5704         }
5705
5706       return true;
5707
5708     case NEG:
5709       op0 = XEXP (x, 0);
5710
5711       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5712        {
5713           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5714               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5715             {
5716               /* CSETM.  */
5717               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5718               return true;
5719             }
5720
5721           /* Cost this as SUB wzr, X.  */
5722           op0 = CONST0_RTX (GET_MODE (x));
5723           op1 = XEXP (x, 0);
5724           goto cost_minus;
5725         }
5726
5727       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5728         {
5729           /* Support (neg(fma...)) as a single instruction only if
5730              sign of zeros is unimportant.  This matches the decision
5731              making in aarch64.md.  */
5732           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5733             {
5734               /* FNMADD.  */
5735               *cost = rtx_cost (op0, NEG, 0, speed);
5736               return true;
5737             }
5738           if (speed)
5739             /* FNEG.  */
5740             *cost += extra_cost->fp[mode == DFmode].neg;
5741           return false;
5742         }
5743
5744       return false;
5745
5746     case CLRSB:
5747     case CLZ:
5748       if (speed)
5749         *cost += extra_cost->alu.clz;
5750
5751       return false;
5752
5753     case COMPARE:
5754       op0 = XEXP (x, 0);
5755       op1 = XEXP (x, 1);
5756
5757       if (op1 == const0_rtx
5758           && GET_CODE (op0) == AND)
5759         {
5760           x = op0;
5761           goto cost_logic;
5762         }
5763
5764       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5765         {
5766           /* TODO: A write to the CC flags possibly costs extra, this
5767              needs encoding in the cost tables.  */
5768
5769           /* CC_ZESWPmode supports zero extend for free.  */
5770           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5771             op0 = XEXP (op0, 0);
5772
5773           /* ANDS.  */
5774           if (GET_CODE (op0) == AND)
5775             {
5776               x = op0;
5777               goto cost_logic;
5778             }
5779
5780           if (GET_CODE (op0) == PLUS)
5781             {
5782               /* ADDS (and CMN alias).  */
5783               x = op0;
5784               goto cost_plus;
5785             }
5786
5787           if (GET_CODE (op0) == MINUS)
5788             {
5789               /* SUBS.  */
5790               x = op0;
5791               goto cost_minus;
5792             }
5793
5794           if (GET_CODE (op1) == NEG)
5795             {
5796               /* CMN.  */
5797               if (speed)
5798                 *cost += extra_cost->alu.arith;
5799
5800               *cost += rtx_cost (op0, COMPARE, 0, speed);
5801               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5802               return true;
5803             }
5804
5805           /* CMP.
5806
5807              Compare can freely swap the order of operands, and
5808              canonicalization puts the more complex operation first.
5809              But the integer MINUS logic expects the shift/extend
5810              operation in op1.  */
5811           if (! (REG_P (op0)
5812                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5813           {
5814             op0 = XEXP (x, 1);
5815             op1 = XEXP (x, 0);
5816           }
5817           goto cost_minus;
5818         }
5819
5820       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5821         {
5822           /* FCMP.  */
5823           if (speed)
5824             *cost += extra_cost->fp[mode == DFmode].compare;
5825
5826           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5827             {
5828               /* FCMP supports constant 0.0 for no extra cost. */
5829               return true;
5830             }
5831           return false;
5832         }
5833
5834       return false;
5835
5836     case MINUS:
5837       {
5838         op0 = XEXP (x, 0);
5839         op1 = XEXP (x, 1);
5840
5841 cost_minus:
5842         /* Detect valid immediates.  */
5843         if ((GET_MODE_CLASS (mode) == MODE_INT
5844              || (GET_MODE_CLASS (mode) == MODE_CC
5845                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5846             && CONST_INT_P (op1)
5847             && aarch64_uimm12_shift (INTVAL (op1)))
5848           {
5849             *cost += rtx_cost (op0, MINUS, 0, speed);
5850
5851             if (speed)
5852               /* SUB(S) (immediate).  */
5853               *cost += extra_cost->alu.arith;
5854             return true;
5855
5856           }
5857
5858         /* Look for SUB (extended register).  */
5859         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5860           {
5861             if (speed)
5862               *cost += extra_cost->alu.arith_shift;
5863
5864             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5865                                (enum rtx_code) GET_CODE (op1),
5866                                0, speed);
5867             return true;
5868           }
5869
5870         rtx new_op1 = aarch64_strip_extend (op1);
5871
5872         /* Cost this as an FMA-alike operation.  */
5873         if ((GET_CODE (new_op1) == MULT
5874              || GET_CODE (new_op1) == ASHIFT)
5875             && code != COMPARE)
5876           {
5877             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5878                                             (enum rtx_code) code,
5879                                             speed);
5880             *cost += rtx_cost (op0, MINUS, 0, speed);
5881             return true;
5882           }
5883
5884         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5885
5886         if (speed)
5887           {
5888             if (GET_MODE_CLASS (mode) == MODE_INT)
5889               /* SUB(S).  */
5890               *cost += extra_cost->alu.arith;
5891             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5892               /* FSUB.  */
5893               *cost += extra_cost->fp[mode == DFmode].addsub;
5894           }
5895         return true;
5896       }
5897
5898     case PLUS:
5899       {
5900         rtx new_op0;
5901
5902         op0 = XEXP (x, 0);
5903         op1 = XEXP (x, 1);
5904
5905 cost_plus:
5906         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5907             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5908           {
5909             /* CSINC.  */
5910             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5911             *cost += rtx_cost (op1, PLUS, 1, speed);
5912             return true;
5913           }
5914
5915         if (GET_MODE_CLASS (mode) == MODE_INT
5916             && CONST_INT_P (op1)
5917             && aarch64_uimm12_shift (INTVAL (op1)))
5918           {
5919             *cost += rtx_cost (op0, PLUS, 0, speed);
5920
5921             if (speed)
5922               /* ADD (immediate).  */
5923               *cost += extra_cost->alu.arith;
5924             return true;
5925           }
5926
5927         /* Look for ADD (extended register).  */
5928         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5929           {
5930             if (speed)
5931               *cost += extra_cost->alu.arith_shift;
5932
5933             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5934                                (enum rtx_code) GET_CODE (op0),
5935                                0, speed);
5936             return true;
5937           }
5938
5939         /* Strip any extend, leave shifts behind as we will
5940            cost them through mult_cost.  */
5941         new_op0 = aarch64_strip_extend (op0);
5942
5943         if (GET_CODE (new_op0) == MULT
5944             || GET_CODE (new_op0) == ASHIFT)
5945           {
5946             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5947                                             speed);
5948             *cost += rtx_cost (op1, PLUS, 1, speed);
5949             return true;
5950           }
5951
5952         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5953                   + rtx_cost (op1, PLUS, 1, speed));
5954
5955         if (speed)
5956           {
5957             if (GET_MODE_CLASS (mode) == MODE_INT)
5958               /* ADD.  */
5959               *cost += extra_cost->alu.arith;
5960             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5961               /* FADD.  */
5962               *cost += extra_cost->fp[mode == DFmode].addsub;
5963           }
5964         return true;
5965       }
5966
5967     case BSWAP:
5968       *cost = COSTS_N_INSNS (1);
5969
5970       if (speed)
5971         *cost += extra_cost->alu.rev;
5972
5973       return false;
5974
5975     case IOR:
5976       if (aarch_rev16_p (x))
5977         {
5978           *cost = COSTS_N_INSNS (1);
5979
5980           if (speed)
5981             *cost += extra_cost->alu.rev;
5982
5983           return true;
5984         }
5985     /* Fall through.  */
5986     case XOR:
5987     case AND:
5988     cost_logic:
5989       op0 = XEXP (x, 0);
5990       op1 = XEXP (x, 1);
5991
5992       if (code == AND
5993           && GET_CODE (op0) == MULT
5994           && CONST_INT_P (XEXP (op0, 1))
5995           && CONST_INT_P (op1)
5996           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5997                                INTVAL (op1)) != 0)
5998         {
5999           /* This is a UBFM/SBFM.  */
6000           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
6001           if (speed)
6002             *cost += extra_cost->alu.bfx;
6003           return true;
6004         }
6005
6006       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6007         {
6008           /* We possibly get the immediate for free, this is not
6009              modelled.  */
6010           if (CONST_INT_P (op1)
6011               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
6012             {
6013               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6014
6015               if (speed)
6016                 *cost += extra_cost->alu.logical;
6017
6018               return true;
6019             }
6020           else
6021             {
6022               rtx new_op0 = op0;
6023
6024               /* Handle ORN, EON, or BIC.  */
6025               if (GET_CODE (op0) == NOT)
6026                 op0 = XEXP (op0, 0);
6027
6028               new_op0 = aarch64_strip_shift (op0);
6029
6030               /* If we had a shift on op0 then this is a logical-shift-
6031                  by-register/immediate operation.  Otherwise, this is just
6032                  a logical operation.  */
6033               if (speed)
6034                 {
6035                   if (new_op0 != op0)
6036                     {
6037                       /* Shift by immediate.  */
6038                       if (CONST_INT_P (XEXP (op0, 1)))
6039                         *cost += extra_cost->alu.log_shift;
6040                       else
6041                         *cost += extra_cost->alu.log_shift_reg;
6042                     }
6043                   else
6044                     *cost += extra_cost->alu.logical;
6045                 }
6046
6047               /* In both cases we want to cost both operands.  */
6048               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6049                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6050
6051               return true;
6052             }
6053         }
6054       return false;
6055
6056     case NOT:
6057       /* MVN.  */
6058       if (speed)
6059         *cost += extra_cost->alu.logical;
6060
6061       /* The logical instruction could have the shifted register form,
6062          but the cost is the same if the shift is processed as a separate
6063          instruction, so we don't bother with it here.  */
6064       return false;
6065
6066     case ZERO_EXTEND:
6067
6068       op0 = XEXP (x, 0);
6069       /* If a value is written in SI mode, then zero extended to DI
6070          mode, the operation will in general be free as a write to
6071          a 'w' register implicitly zeroes the upper bits of an 'x'
6072          register.  However, if this is
6073
6074            (set (reg) (zero_extend (reg)))
6075
6076          we must cost the explicit register move.  */
6077       if (mode == DImode
6078           && GET_MODE (op0) == SImode
6079           && outer == SET)
6080         {
6081           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6082
6083           if (!op_cost && speed)
6084             /* MOV.  */
6085             *cost += extra_cost->alu.extend;
6086           else
6087             /* Free, the cost is that of the SI mode operation.  */
6088             *cost = op_cost;
6089
6090           return true;
6091         }
6092       else if (MEM_P (XEXP (x, 0)))
6093         {
6094           /* All loads can zero extend to any size for free.  */
6095           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
6096           return true;
6097         }
6098
6099       /* UXTB/UXTH.  */
6100       if (speed)
6101         *cost += extra_cost->alu.extend;
6102
6103       return false;
6104
6105     case SIGN_EXTEND:
6106       if (MEM_P (XEXP (x, 0)))
6107         {
6108           /* LDRSH.  */
6109           if (speed)
6110             {
6111               rtx address = XEXP (XEXP (x, 0), 0);
6112               *cost += extra_cost->ldst.load_sign_extend;
6113
6114               *cost +=
6115                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6116                                                      0, speed));
6117             }
6118           return true;
6119         }
6120
6121       if (speed)
6122         *cost += extra_cost->alu.extend;
6123       return false;
6124
6125     case ASHIFT:
6126       op0 = XEXP (x, 0);
6127       op1 = XEXP (x, 1);
6128
6129       if (CONST_INT_P (op1))
6130         {
6131           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
6132              aliases.  */
6133           if (speed)
6134             *cost += extra_cost->alu.shift;
6135
6136           /* We can incorporate zero/sign extend for free.  */
6137           if (GET_CODE (op0) == ZERO_EXTEND
6138               || GET_CODE (op0) == SIGN_EXTEND)
6139             op0 = XEXP (op0, 0);
6140
6141           *cost += rtx_cost (op0, ASHIFT, 0, speed);
6142           return true;
6143         }
6144       else
6145         {
6146           /* LSLV.  */
6147           if (speed)
6148             *cost += extra_cost->alu.shift_reg;
6149
6150           return false;  /* All arguments need to be in registers.  */
6151         }
6152
6153     case ROTATE:
6154     case ROTATERT:
6155     case LSHIFTRT:
6156     case ASHIFTRT:
6157       op0 = XEXP (x, 0);
6158       op1 = XEXP (x, 1);
6159
6160       if (CONST_INT_P (op1))
6161         {
6162           /* ASR (immediate) and friends.  */
6163           if (speed)
6164             *cost += extra_cost->alu.shift;
6165
6166           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6167           return true;
6168         }
6169       else
6170         {
6171
6172           /* ASR (register) and friends.  */
6173           if (speed)
6174             *cost += extra_cost->alu.shift_reg;
6175
6176           return false;  /* All arguments need to be in registers.  */
6177         }
6178
6179     case SYMBOL_REF:
6180
6181       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6182         {
6183           /* LDR.  */
6184           if (speed)
6185             *cost += extra_cost->ldst.load;
6186         }
6187       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6188                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6189         {
6190           /* ADRP, followed by ADD.  */
6191           *cost += COSTS_N_INSNS (1);
6192           if (speed)
6193             *cost += 2 * extra_cost->alu.arith;
6194         }
6195       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6196                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6197         {
6198           /* ADR.  */
6199           if (speed)
6200             *cost += extra_cost->alu.arith;
6201         }
6202
6203       if (flag_pic)
6204         {
6205           /* One extra load instruction, after accessing the GOT.  */
6206           *cost += COSTS_N_INSNS (1);
6207           if (speed)
6208             *cost += extra_cost->ldst.load;
6209         }
6210       return true;
6211
6212     case HIGH:
6213     case LO_SUM:
6214       /* ADRP/ADD (immediate).  */
6215       if (speed)
6216         *cost += extra_cost->alu.arith;
6217       return true;
6218
6219     case ZERO_EXTRACT:
6220     case SIGN_EXTRACT:
6221       /* UBFX/SBFX.  */
6222       if (speed)
6223         *cost += extra_cost->alu.bfx;
6224
6225       /* We can trust that the immediates used will be correct (there
6226          are no by-register forms), so we need only cost op0.  */
6227       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
6228       return true;
6229
6230     case MULT:
6231       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6232       /* aarch64_rtx_mult_cost always handles recursion to its
6233          operands.  */
6234       return true;
6235
6236     case MOD:
6237     case UMOD:
6238       if (speed)
6239         {
6240           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6241             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6242                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
6243           else if (GET_MODE (x) == DFmode)
6244             *cost += (extra_cost->fp[1].mult
6245                       + extra_cost->fp[1].div);
6246           else if (GET_MODE (x) == SFmode)
6247             *cost += (extra_cost->fp[0].mult
6248                       + extra_cost->fp[0].div);
6249         }
6250       return false;  /* All arguments need to be in registers.  */
6251
6252     case DIV:
6253     case UDIV:
6254     case SQRT:
6255       if (speed)
6256         {
6257           if (GET_MODE_CLASS (mode) == MODE_INT)
6258             /* There is no integer SQRT, so only DIV and UDIV can get
6259                here.  */
6260             *cost += extra_cost->mult[mode == DImode].idiv;
6261           else
6262             *cost += extra_cost->fp[mode == DFmode].div;
6263         }
6264       return false;  /* All arguments need to be in registers.  */
6265
6266     case IF_THEN_ELSE:
6267       return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6268                                          XEXP (x, 2), cost, speed);
6269
6270     case EQ:
6271     case NE:
6272     case GT:
6273     case GTU:
6274     case LT:
6275     case LTU:
6276     case GE:
6277     case GEU:
6278     case LE:
6279     case LEU:
6280
6281       return false; /* All arguments must be in registers.  */
6282
6283     case FMA:
6284       op0 = XEXP (x, 0);
6285       op1 = XEXP (x, 1);
6286       op2 = XEXP (x, 2);
6287
6288       if (speed)
6289         *cost += extra_cost->fp[mode == DFmode].fma;
6290
6291       /* FMSUB, FNMADD, and FNMSUB are free.  */
6292       if (GET_CODE (op0) == NEG)
6293         op0 = XEXP (op0, 0);
6294
6295       if (GET_CODE (op2) == NEG)
6296         op2 = XEXP (op2, 0);
6297
6298       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6299          and the by-element operand as operand 0.  */
6300       if (GET_CODE (op1) == NEG)
6301         op1 = XEXP (op1, 0);
6302
6303       /* Catch vector-by-element operations.  The by-element operand can
6304          either be (vec_duplicate (vec_select (x))) or just
6305          (vec_select (x)), depending on whether we are multiplying by
6306          a vector or a scalar.
6307
6308          Canonicalization is not very good in these cases, FMA4 will put the
6309          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
6310       if (GET_CODE (op0) == VEC_DUPLICATE)
6311         op0 = XEXP (op0, 0);
6312       else if (GET_CODE (op1) == VEC_DUPLICATE)
6313         op1 = XEXP (op1, 0);
6314
6315       if (GET_CODE (op0) == VEC_SELECT)
6316         op0 = XEXP (op0, 0);
6317       else if (GET_CODE (op1) == VEC_SELECT)
6318         op1 = XEXP (op1, 0);
6319
6320       /* If the remaining parameters are not registers,
6321          get the cost to put them into registers.  */
6322       *cost += rtx_cost (op0, FMA, 0, speed);
6323       *cost += rtx_cost (op1, FMA, 1, speed);
6324       *cost += rtx_cost (op2, FMA, 2, speed);
6325       return true;
6326
6327     case FLOAT_EXTEND:
6328       if (speed)
6329         *cost += extra_cost->fp[mode == DFmode].widen;
6330       return false;
6331
6332     case FLOAT_TRUNCATE:
6333       if (speed)
6334         *cost += extra_cost->fp[mode == DFmode].narrow;
6335       return false;
6336
6337     case FIX:
6338     case UNSIGNED_FIX:
6339       x = XEXP (x, 0);
6340       /* Strip the rounding part.  They will all be implemented
6341          by the fcvt* family of instructions anyway.  */
6342       if (GET_CODE (x) == UNSPEC)
6343         {
6344           unsigned int uns_code = XINT (x, 1);
6345
6346           if (uns_code == UNSPEC_FRINTA
6347               || uns_code == UNSPEC_FRINTM
6348               || uns_code == UNSPEC_FRINTN
6349               || uns_code == UNSPEC_FRINTP
6350               || uns_code == UNSPEC_FRINTZ)
6351             x = XVECEXP (x, 0, 0);
6352         }
6353
6354       if (speed)
6355         *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6356
6357       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6358       return true;
6359
6360     case ABS:
6361       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6362         {
6363           /* FABS and FNEG are analogous.  */
6364           if (speed)
6365             *cost += extra_cost->fp[mode == DFmode].neg;
6366         }
6367       else
6368         {
6369           /* Integer ABS will either be split to
6370              two arithmetic instructions, or will be an ABS
6371              (scalar), which we don't model.  */
6372           *cost = COSTS_N_INSNS (2);
6373           if (speed)
6374             *cost += 2 * extra_cost->alu.arith;
6375         }
6376       return false;
6377
6378     case SMAX:
6379     case SMIN:
6380       if (speed)
6381         {
6382           /* FMAXNM/FMINNM/FMAX/FMIN.
6383              TODO: This may not be accurate for all implementations, but
6384              we do not model this in the cost tables.  */
6385           *cost += extra_cost->fp[mode == DFmode].addsub;
6386         }
6387       return false;
6388
6389     case UNSPEC:
6390       /* The floating point round to integer frint* instructions.  */
6391       if (aarch64_frint_unspec_p (XINT (x, 1)))
6392         {
6393           if (speed)
6394             *cost += extra_cost->fp[mode == DFmode].roundint;
6395
6396           return false;
6397         }
6398
6399       if (XINT (x, 1) == UNSPEC_RBIT)
6400         {
6401           if (speed)
6402             *cost += extra_cost->alu.rev;
6403
6404           return false;
6405         }
6406       break;
6407
6408     case TRUNCATE:
6409
6410       /* Decompose <su>muldi3_highpart.  */
6411       if (/* (truncate:DI  */
6412           mode == DImode
6413           /*   (lshiftrt:TI  */
6414           && GET_MODE (XEXP (x, 0)) == TImode
6415           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6416           /*      (mult:TI  */
6417           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6418           /*        (ANY_EXTEND:TI (reg:DI))
6419                     (ANY_EXTEND:TI (reg:DI)))  */
6420           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6421                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6422               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6423                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6424           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6425           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6426           /*     (const_int 64)  */
6427           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6428           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6429         {
6430           /* UMULH/SMULH.  */
6431           if (speed)
6432             *cost += extra_cost->mult[mode == DImode].extend;
6433           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6434                              MULT, 0, speed);
6435           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6436                              MULT, 1, speed);
6437           return true;
6438         }
6439
6440       /* Fall through.  */
6441     default:
6442       break;
6443     }
6444
6445   if (dump_file && (dump_flags & TDF_DETAILS))
6446     fprintf (dump_file,
6447       "\nFailed to cost RTX.  Assuming default cost.\n");
6448
6449   return true;
6450 }
6451
6452 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6453    calculated for X.  This cost is stored in *COST.  Returns true
6454    if the total cost of X was calculated.  */
6455 static bool
6456 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6457                    int param, int *cost, bool speed)
6458 {
6459   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6460
6461   if (dump_file && (dump_flags & TDF_DETAILS))
6462     {
6463       print_rtl_single (dump_file, x);
6464       fprintf (dump_file, "\n%s cost: %d (%s)\n",
6465                speed ? "Hot" : "Cold",
6466                *cost, result ? "final" : "partial");
6467     }
6468
6469   return result;
6470 }
6471
6472 static int
6473 aarch64_register_move_cost (machine_mode mode,
6474                             reg_class_t from_i, reg_class_t to_i)
6475 {
6476   enum reg_class from = (enum reg_class) from_i;
6477   enum reg_class to = (enum reg_class) to_i;
6478   const struct cpu_regmove_cost *regmove_cost
6479     = aarch64_tune_params->regmove_cost;
6480
6481   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
6482   if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6483     to = GENERAL_REGS;
6484
6485   if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6486     from = GENERAL_REGS;
6487
6488   /* Moving between GPR and stack cost is the same as GP2GP.  */
6489   if ((from == GENERAL_REGS && to == STACK_REG)
6490       || (to == GENERAL_REGS && from == STACK_REG))
6491     return regmove_cost->GP2GP;
6492
6493   /* To/From the stack register, we move via the gprs.  */
6494   if (to == STACK_REG || from == STACK_REG)
6495     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6496             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6497
6498   if (GET_MODE_SIZE (mode) == 16)
6499     {
6500       /* 128-bit operations on general registers require 2 instructions.  */
6501       if (from == GENERAL_REGS && to == GENERAL_REGS)
6502         return regmove_cost->GP2GP * 2;
6503       else if (from == GENERAL_REGS)
6504         return regmove_cost->GP2FP * 2;
6505       else if (to == GENERAL_REGS)
6506         return regmove_cost->FP2GP * 2;
6507
6508       /* When AdvSIMD instructions are disabled it is not possible to move
6509          a 128-bit value directly between Q registers.  This is handled in
6510          secondary reload.  A general register is used as a scratch to move
6511          the upper DI value and the lower DI value is moved directly,
6512          hence the cost is the sum of three moves. */
6513       if (! TARGET_SIMD)
6514         return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6515
6516       return regmove_cost->FP2FP;
6517     }
6518
6519   if (from == GENERAL_REGS && to == GENERAL_REGS)
6520     return regmove_cost->GP2GP;
6521   else if (from == GENERAL_REGS)
6522     return regmove_cost->GP2FP;
6523   else if (to == GENERAL_REGS)
6524     return regmove_cost->FP2GP;
6525
6526   return regmove_cost->FP2FP;
6527 }
6528
6529 static int
6530 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6531                           reg_class_t rclass ATTRIBUTE_UNUSED,
6532                           bool in ATTRIBUTE_UNUSED)
6533 {
6534   return aarch64_tune_params->memmov_cost;
6535 }
6536
6537 /* Return the number of instructions that can be issued per cycle.  */
6538 static int
6539 aarch64_sched_issue_rate (void)
6540 {
6541   return aarch64_tune_params->issue_rate;
6542 }
6543
6544 static int
6545 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6546 {
6547   int issue_rate = aarch64_sched_issue_rate ();
6548
6549   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6550 }
6551
6552 /* Vectorizer cost model target hooks.  */
6553
6554 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6555 static int
6556 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6557                                     tree vectype,
6558                                     int misalign ATTRIBUTE_UNUSED)
6559 {
6560   unsigned elements;
6561
6562   switch (type_of_cost)
6563     {
6564       case scalar_stmt:
6565         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6566
6567       case scalar_load:
6568         return aarch64_tune_params->vec_costs->scalar_load_cost;
6569
6570       case scalar_store:
6571         return aarch64_tune_params->vec_costs->scalar_store_cost;
6572
6573       case vector_stmt:
6574         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6575
6576       case vector_load:
6577         return aarch64_tune_params->vec_costs->vec_align_load_cost;
6578
6579       case vector_store:
6580         return aarch64_tune_params->vec_costs->vec_store_cost;
6581
6582       case vec_to_scalar:
6583         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6584
6585       case scalar_to_vec:
6586         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6587
6588       case unaligned_load:
6589         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6590
6591       case unaligned_store:
6592         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6593
6594       case cond_branch_taken:
6595         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6596
6597       case cond_branch_not_taken:
6598         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6599
6600       case vec_perm:
6601       case vec_promote_demote:
6602         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6603
6604       case vec_construct:
6605         elements = TYPE_VECTOR_SUBPARTS (vectype);
6606         return elements / 2 + 1;
6607
6608       default:
6609         gcc_unreachable ();
6610     }
6611 }
6612
6613 /* Implement targetm.vectorize.add_stmt_cost.  */
6614 static unsigned
6615 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6616                        struct _stmt_vec_info *stmt_info, int misalign,
6617                        enum vect_cost_model_location where)
6618 {
6619   unsigned *cost = (unsigned *) data;
6620   unsigned retval = 0;
6621
6622   if (flag_vect_cost_model)
6623     {
6624       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6625       int stmt_cost =
6626             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6627
6628       /* Statements in an inner loop relative to the loop being
6629          vectorized are weighted more heavily.  The value here is
6630          a function (linear for now) of the loop nest level.  */
6631       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6632         {
6633           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6634           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
6635           unsigned nest_level = loop_depth (loop);
6636
6637           count *= nest_level;
6638         }
6639
6640       retval = (unsigned) (count * stmt_cost);
6641       cost[where] += retval;
6642     }
6643
6644   return retval;
6645 }
6646
6647 static void initialize_aarch64_code_model (void);
6648
6649 /* Parse the architecture extension string.  */
6650
6651 static void
6652 aarch64_parse_extension (char *str)
6653 {
6654   /* The extension string is parsed left to right.  */
6655   const struct aarch64_option_extension *opt = NULL;
6656
6657   /* Flag to say whether we are adding or removing an extension.  */
6658   int adding_ext = -1;
6659
6660   while (str != NULL && *str != 0)
6661     {
6662       char *ext;
6663       size_t len;
6664
6665       str++;
6666       ext = strchr (str, '+');
6667
6668       if (ext != NULL)
6669         len = ext - str;
6670       else
6671         len = strlen (str);
6672
6673       if (len >= 2 && strncmp (str, "no", 2) == 0)
6674         {
6675           adding_ext = 0;
6676           len -= 2;
6677           str += 2;
6678         }
6679       else if (len > 0)
6680         adding_ext = 1;
6681
6682       if (len == 0)
6683         {
6684           error ("missing feature modifier after %qs", adding_ext ? "+"
6685                                                                   : "+no");
6686           return;
6687         }
6688
6689       /* Scan over the extensions table trying to find an exact match.  */
6690       for (opt = all_extensions; opt->name != NULL; opt++)
6691         {
6692           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6693             {
6694               /* Add or remove the extension.  */
6695               if (adding_ext)
6696                 aarch64_isa_flags |= opt->flags_on;
6697               else
6698                 aarch64_isa_flags &= ~(opt->flags_off);
6699               break;
6700             }
6701         }
6702
6703       if (opt->name == NULL)
6704         {
6705           /* Extension not found in list.  */
6706           error ("unknown feature modifier %qs", str);
6707           return;
6708         }
6709
6710       str = ext;
6711     };
6712
6713   return;
6714 }
6715
6716 /* Parse the ARCH string.  */
6717
6718 static void
6719 aarch64_parse_arch (void)
6720 {
6721   char *ext;
6722   const struct processor *arch;
6723   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6724   size_t len;
6725
6726   strcpy (str, aarch64_arch_string);
6727
6728   ext = strchr (str, '+');
6729
6730   if (ext != NULL)
6731     len = ext - str;
6732   else
6733     len = strlen (str);
6734
6735   if (len == 0)
6736     {
6737       error ("missing arch name in -march=%qs", str);
6738       return;
6739     }
6740
6741   /* Loop through the list of supported ARCHs to find a match.  */
6742   for (arch = all_architectures; arch->name != NULL; arch++)
6743     {
6744       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6745         {
6746           selected_arch = arch;
6747           aarch64_isa_flags = selected_arch->flags;
6748
6749           if (!selected_cpu)
6750             selected_cpu = &all_cores[selected_arch->core];
6751
6752           if (ext != NULL)
6753             {
6754               /* ARCH string contains at least one extension.  */
6755               aarch64_parse_extension (ext);
6756             }
6757
6758           if (strcmp (selected_arch->arch, selected_cpu->arch))
6759             {
6760               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6761                        selected_cpu->name, selected_arch->name);
6762             }
6763
6764           return;
6765         }
6766     }
6767
6768   /* ARCH name not found in list.  */
6769   error ("unknown value %qs for -march", str);
6770   return;
6771 }
6772
6773 /* Parse the CPU string.  */
6774
6775 static void
6776 aarch64_parse_cpu (void)
6777 {
6778   char *ext;
6779   const struct processor *cpu;
6780   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6781   size_t len;
6782
6783   strcpy (str, aarch64_cpu_string);
6784
6785   ext = strchr (str, '+');
6786
6787   if (ext != NULL)
6788     len = ext - str;
6789   else
6790     len = strlen (str);
6791
6792   if (len == 0)
6793     {
6794       error ("missing cpu name in -mcpu=%qs", str);
6795       return;
6796     }
6797
6798   /* Loop through the list of supported CPUs to find a match.  */
6799   for (cpu = all_cores; cpu->name != NULL; cpu++)
6800     {
6801       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6802         {
6803           selected_cpu = cpu;
6804           aarch64_isa_flags = selected_cpu->flags;
6805
6806           if (ext != NULL)
6807             {
6808               /* CPU string contains at least one extension.  */
6809               aarch64_parse_extension (ext);
6810             }
6811
6812           return;
6813         }
6814     }
6815
6816   /* CPU name not found in list.  */
6817   error ("unknown value %qs for -mcpu", str);
6818   return;
6819 }
6820
6821 /* Parse the TUNE string.  */
6822
6823 static void
6824 aarch64_parse_tune (void)
6825 {
6826   const struct processor *cpu;
6827   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6828   strcpy (str, aarch64_tune_string);
6829
6830   /* Loop through the list of supported CPUs to find a match.  */
6831   for (cpu = all_cores; cpu->name != NULL; cpu++)
6832     {
6833       if (strcmp (cpu->name, str) == 0)
6834         {
6835           selected_tune = cpu;
6836           return;
6837         }
6838     }
6839
6840   /* CPU name not found in list.  */
6841   error ("unknown value %qs for -mtune", str);
6842   return;
6843 }
6844
6845
6846 /* Implement TARGET_OPTION_OVERRIDE.  */
6847
6848 static void
6849 aarch64_override_options (void)
6850 {
6851   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6852      If either of -march or -mtune is given, they override their
6853      respective component of -mcpu.
6854
6855      So, first parse AARCH64_CPU_STRING, then the others, be careful
6856      with -march as, if -mcpu is not present on the command line, march
6857      must set a sensible default CPU.  */
6858   if (aarch64_cpu_string)
6859     {
6860       aarch64_parse_cpu ();
6861     }
6862
6863   if (aarch64_arch_string)
6864     {
6865       aarch64_parse_arch ();
6866     }
6867
6868   if (aarch64_tune_string)
6869     {
6870       aarch64_parse_tune ();
6871     }
6872
6873 #ifndef HAVE_AS_MABI_OPTION
6874   /* The compiler may have been configured with 2.23.* binutils, which does
6875      not have support for ILP32.  */
6876   if (TARGET_ILP32)
6877     error ("Assembler does not support -mabi=ilp32");
6878 #endif
6879
6880   initialize_aarch64_code_model ();
6881
6882   aarch64_build_bitmask_table ();
6883
6884   /* This target defaults to strict volatile bitfields.  */
6885   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6886     flag_strict_volatile_bitfields = 1;
6887
6888   /* If the user did not specify a processor, choose the default
6889      one for them.  This will be the CPU set during configuration using
6890      --with-cpu, otherwise it is "generic".  */
6891   if (!selected_cpu)
6892     {
6893       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6894       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6895     }
6896
6897   gcc_assert (selected_cpu);
6898
6899   if (!selected_tune)
6900     selected_tune = selected_cpu;
6901
6902   aarch64_tune_flags = selected_tune->flags;
6903   aarch64_tune = selected_tune->core;
6904   aarch64_tune_params = selected_tune->tune;
6905   aarch64_architecture_version = selected_cpu->architecture_version;
6906
6907   if (aarch64_fix_a53_err835769 == 2)
6908     {
6909 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6910       aarch64_fix_a53_err835769 = 1;
6911 #else
6912       aarch64_fix_a53_err835769 = 0;
6913 #endif
6914     }
6915
6916   /* If not opzimizing for size, set the default
6917      alignment to what the target wants */
6918   if (!optimize_size)
6919     {
6920       if (align_loops <= 0)
6921         align_loops = aarch64_tune_params->loop_align;
6922       if (align_jumps <= 0)
6923         align_jumps = aarch64_tune_params->jump_align;
6924       if (align_functions <= 0)
6925         align_functions = aarch64_tune_params->function_align;
6926     }
6927
6928   aarch64_override_options_after_change ();
6929 }
6930
6931 /* Implement targetm.override_options_after_change.  */
6932
6933 static void
6934 aarch64_override_options_after_change (void)
6935 {
6936   if (flag_omit_frame_pointer)
6937     flag_omit_leaf_frame_pointer = false;
6938   else if (flag_omit_leaf_frame_pointer)
6939     flag_omit_frame_pointer = true;
6940 }
6941
6942 static struct machine_function *
6943 aarch64_init_machine_status (void)
6944 {
6945   struct machine_function *machine;
6946   machine = ggc_cleared_alloc<machine_function> ();
6947   return machine;
6948 }
6949
6950 void
6951 aarch64_init_expanders (void)
6952 {
6953   init_machine_status = aarch64_init_machine_status;
6954 }
6955
6956 /* A checking mechanism for the implementation of the various code models.  */
6957 static void
6958 initialize_aarch64_code_model (void)
6959 {
6960    if (flag_pic)
6961      {
6962        switch (aarch64_cmodel_var)
6963          {
6964          case AARCH64_CMODEL_TINY:
6965            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6966            break;
6967          case AARCH64_CMODEL_SMALL:
6968            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6969            break;
6970          case AARCH64_CMODEL_LARGE:
6971            sorry ("code model %qs with -f%s", "large",
6972                   flag_pic > 1 ? "PIC" : "pic");
6973          default:
6974            gcc_unreachable ();
6975          }
6976      }
6977    else
6978      aarch64_cmodel = aarch64_cmodel_var;
6979 }
6980
6981 /* Return true if SYMBOL_REF X binds locally.  */
6982
6983 static bool
6984 aarch64_symbol_binds_local_p (const_rtx x)
6985 {
6986   return (SYMBOL_REF_DECL (x)
6987           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6988           : SYMBOL_REF_LOCAL_P (x));
6989 }
6990
6991 /* Return true if SYMBOL_REF X is thread local */
6992 static bool
6993 aarch64_tls_symbol_p (rtx x)
6994 {
6995   if (! TARGET_HAVE_TLS)
6996     return false;
6997
6998   if (GET_CODE (x) != SYMBOL_REF)
6999     return false;
7000
7001   return SYMBOL_REF_TLS_MODEL (x) != 0;
7002 }
7003
7004 /* Classify a TLS symbol into one of the TLS kinds.  */
7005 enum aarch64_symbol_type
7006 aarch64_classify_tls_symbol (rtx x)
7007 {
7008   enum tls_model tls_kind = tls_symbolic_operand_type (x);
7009
7010   switch (tls_kind)
7011     {
7012     case TLS_MODEL_GLOBAL_DYNAMIC:
7013     case TLS_MODEL_LOCAL_DYNAMIC:
7014       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
7015
7016     case TLS_MODEL_INITIAL_EXEC:
7017       return SYMBOL_SMALL_GOTTPREL;
7018
7019     case TLS_MODEL_LOCAL_EXEC:
7020       return SYMBOL_SMALL_TPREL;
7021
7022     case TLS_MODEL_EMULATED:
7023     case TLS_MODEL_NONE:
7024       return SYMBOL_FORCE_TO_MEM;
7025
7026     default:
7027       gcc_unreachable ();
7028     }
7029 }
7030
7031 /* Return the method that should be used to access SYMBOL_REF or
7032    LABEL_REF X in context CONTEXT.  */
7033
7034 enum aarch64_symbol_type
7035 aarch64_classify_symbol (rtx x, rtx offset,
7036                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
7037 {
7038   if (GET_CODE (x) == LABEL_REF)
7039     {
7040       switch (aarch64_cmodel)
7041         {
7042         case AARCH64_CMODEL_LARGE:
7043           return SYMBOL_FORCE_TO_MEM;
7044
7045         case AARCH64_CMODEL_TINY_PIC:
7046         case AARCH64_CMODEL_TINY:
7047           return SYMBOL_TINY_ABSOLUTE;
7048
7049         case AARCH64_CMODEL_SMALL_PIC:
7050         case AARCH64_CMODEL_SMALL:
7051           return SYMBOL_SMALL_ABSOLUTE;
7052
7053         default:
7054           gcc_unreachable ();
7055         }
7056     }
7057
7058   if (GET_CODE (x) == SYMBOL_REF)
7059     {
7060       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7061           return SYMBOL_FORCE_TO_MEM;
7062
7063       if (aarch64_tls_symbol_p (x))
7064         return aarch64_classify_tls_symbol (x);
7065
7066       switch (aarch64_cmodel)
7067         {
7068         case AARCH64_CMODEL_TINY:
7069           /* When we retreive symbol + offset address, we have to make sure
7070              the offset does not cause overflow of the final address.  But
7071              we have no way of knowing the address of symbol at compile time
7072              so we can't accurately say if the distance between the PC and
7073              symbol + offset is outside the addressible range of +/-1M in the
7074              TINY code model.  So we rely on images not being greater than
7075              1M and cap the offset at 1M and anything beyond 1M will have to
7076              be loaded using an alternative mechanism.  */
7077           if (SYMBOL_REF_WEAK (x)
7078               || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
7079             return SYMBOL_FORCE_TO_MEM;
7080           return SYMBOL_TINY_ABSOLUTE;
7081
7082         case AARCH64_CMODEL_SMALL:
7083           /* Same reasoning as the tiny code model, but the offset cap here is
7084              4G.  */
7085           if (SYMBOL_REF_WEAK (x)
7086               || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
7087               || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
7088             return SYMBOL_FORCE_TO_MEM;
7089           return SYMBOL_SMALL_ABSOLUTE;
7090
7091         case AARCH64_CMODEL_TINY_PIC:
7092           if (!aarch64_symbol_binds_local_p (x))
7093             return SYMBOL_TINY_GOT;
7094           return SYMBOL_TINY_ABSOLUTE;
7095
7096         case AARCH64_CMODEL_SMALL_PIC:
7097           if (!aarch64_symbol_binds_local_p (x))
7098             return SYMBOL_SMALL_GOT;
7099           return SYMBOL_SMALL_ABSOLUTE;
7100
7101         default:
7102           gcc_unreachable ();
7103         }
7104     }
7105
7106   /* By default push everything into the constant pool.  */
7107   return SYMBOL_FORCE_TO_MEM;
7108 }
7109
7110 bool
7111 aarch64_constant_address_p (rtx x)
7112 {
7113   return (CONSTANT_P (x) && memory_address_p (DImode, x));
7114 }
7115
7116 bool
7117 aarch64_legitimate_pic_operand_p (rtx x)
7118 {
7119   if (GET_CODE (x) == SYMBOL_REF
7120       || (GET_CODE (x) == CONST
7121           && GET_CODE (XEXP (x, 0)) == PLUS
7122           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7123      return false;
7124
7125   return true;
7126 }
7127
7128 /* Return true if X holds either a quarter-precision or
7129      floating-point +0.0 constant.  */
7130 static bool
7131 aarch64_valid_floating_const (machine_mode mode, rtx x)
7132 {
7133   if (!CONST_DOUBLE_P (x))
7134     return false;
7135
7136   /* TODO: We could handle moving 0.0 to a TFmode register,
7137      but first we would like to refactor the movtf_aarch64
7138      to be more amicable to split moves properly and
7139      correctly gate on TARGET_SIMD.  For now - reject all
7140      constants which are not to SFmode or DFmode registers.  */
7141   if (!(mode == SFmode || mode == DFmode))
7142     return false;
7143
7144   if (aarch64_float_const_zero_rtx_p (x))
7145     return true;
7146   return aarch64_float_const_representable_p (x);
7147 }
7148
7149 static bool
7150 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
7151 {
7152   /* Do not allow vector struct mode constants.  We could support
7153      0 and -1 easily, but they need support in aarch64-simd.md.  */
7154   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7155     return false;
7156
7157   /* This could probably go away because
7158      we now decompose CONST_INTs according to expand_mov_immediate.  */
7159   if ((GET_CODE (x) == CONST_VECTOR
7160        && aarch64_simd_valid_immediate (x, mode, false, NULL))
7161       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7162         return !targetm.cannot_force_const_mem (mode, x);
7163
7164   if (GET_CODE (x) == HIGH
7165       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7166     return true;
7167
7168   return aarch64_constant_address_p (x);
7169 }
7170
7171 rtx
7172 aarch64_load_tp (rtx target)
7173 {
7174   if (!target
7175       || GET_MODE (target) != Pmode
7176       || !register_operand (target, Pmode))
7177     target = gen_reg_rtx (Pmode);
7178
7179   /* Can return in any reg.  */
7180   emit_insn (gen_aarch64_load_tp_hard (target));
7181   return target;
7182 }
7183
7184 /* On AAPCS systems, this is the "struct __va_list".  */
7185 static GTY(()) tree va_list_type;
7186
7187 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7188    Return the type to use as __builtin_va_list.
7189
7190    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7191
7192    struct __va_list
7193    {
7194      void *__stack;
7195      void *__gr_top;
7196      void *__vr_top;
7197      int   __gr_offs;
7198      int   __vr_offs;
7199    };  */
7200
7201 static tree
7202 aarch64_build_builtin_va_list (void)
7203 {
7204   tree va_list_name;
7205   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7206
7207   /* Create the type.  */
7208   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7209   /* Give it the required name.  */
7210   va_list_name = build_decl (BUILTINS_LOCATION,
7211                              TYPE_DECL,
7212                              get_identifier ("__va_list"),
7213                              va_list_type);
7214   DECL_ARTIFICIAL (va_list_name) = 1;
7215   TYPE_NAME (va_list_type) = va_list_name;
7216   TYPE_STUB_DECL (va_list_type) = va_list_name;
7217
7218   /* Create the fields.  */
7219   f_stack = build_decl (BUILTINS_LOCATION,
7220                         FIELD_DECL, get_identifier ("__stack"),
7221                         ptr_type_node);
7222   f_grtop = build_decl (BUILTINS_LOCATION,
7223                         FIELD_DECL, get_identifier ("__gr_top"),
7224                         ptr_type_node);
7225   f_vrtop = build_decl (BUILTINS_LOCATION,
7226                         FIELD_DECL, get_identifier ("__vr_top"),
7227                         ptr_type_node);
7228   f_groff = build_decl (BUILTINS_LOCATION,
7229                         FIELD_DECL, get_identifier ("__gr_offs"),
7230                         integer_type_node);
7231   f_vroff = build_decl (BUILTINS_LOCATION,
7232                         FIELD_DECL, get_identifier ("__vr_offs"),
7233                         integer_type_node);
7234
7235   DECL_ARTIFICIAL (f_stack) = 1;
7236   DECL_ARTIFICIAL (f_grtop) = 1;
7237   DECL_ARTIFICIAL (f_vrtop) = 1;
7238   DECL_ARTIFICIAL (f_groff) = 1;
7239   DECL_ARTIFICIAL (f_vroff) = 1;
7240
7241   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7242   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7243   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7244   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7245   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7246
7247   TYPE_FIELDS (va_list_type) = f_stack;
7248   DECL_CHAIN (f_stack) = f_grtop;
7249   DECL_CHAIN (f_grtop) = f_vrtop;
7250   DECL_CHAIN (f_vrtop) = f_groff;
7251   DECL_CHAIN (f_groff) = f_vroff;
7252
7253   /* Compute its layout.  */
7254   layout_type (va_list_type);
7255
7256   return va_list_type;
7257 }
7258
7259 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
7260 static void
7261 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7262 {
7263   const CUMULATIVE_ARGS *cum;
7264   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7265   tree stack, grtop, vrtop, groff, vroff;
7266   tree t;
7267   int gr_save_area_size;
7268   int vr_save_area_size;
7269   int vr_offset;
7270
7271   cum = &crtl->args.info;
7272   gr_save_area_size
7273     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7274   vr_save_area_size
7275     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7276
7277   if (TARGET_GENERAL_REGS_ONLY)
7278     {
7279       if (cum->aapcs_nvrn > 0)
7280         sorry ("%qs and floating point or vector arguments",
7281                "-mgeneral-regs-only");
7282       vr_save_area_size = 0;
7283     }
7284
7285   f_stack = TYPE_FIELDS (va_list_type_node);
7286   f_grtop = DECL_CHAIN (f_stack);
7287   f_vrtop = DECL_CHAIN (f_grtop);
7288   f_groff = DECL_CHAIN (f_vrtop);
7289   f_vroff = DECL_CHAIN (f_groff);
7290
7291   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7292                   NULL_TREE);
7293   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7294                   NULL_TREE);
7295   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7296                   NULL_TREE);
7297   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7298                   NULL_TREE);
7299   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7300                   NULL_TREE);
7301
7302   /* Emit code to initialize STACK, which points to the next varargs stack
7303      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
7304      by named arguments.  STACK is 8-byte aligned.  */
7305   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7306   if (cum->aapcs_stack_size > 0)
7307     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7308   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7309   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7310
7311   /* Emit code to initialize GRTOP, the top of the GR save area.
7312      virtual_incoming_args_rtx should have been 16 byte aligned.  */
7313   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7314   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7315   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7316
7317   /* Emit code to initialize VRTOP, the top of the VR save area.
7318      This address is gr_save_area_bytes below GRTOP, rounded
7319      down to the next 16-byte boundary.  */
7320   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7321   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7322                              STACK_BOUNDARY / BITS_PER_UNIT);
7323
7324   if (vr_offset)
7325     t = fold_build_pointer_plus_hwi (t, -vr_offset);
7326   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7327   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7328
7329   /* Emit code to initialize GROFF, the offset from GRTOP of the
7330      next GPR argument.  */
7331   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7332               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7333   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7334
7335   /* Likewise emit code to initialize VROFF, the offset from FTOP
7336      of the next VR argument.  */
7337   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7338               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7339   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7340 }
7341
7342 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
7343
7344 static tree
7345 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7346                               gimple_seq *post_p ATTRIBUTE_UNUSED)
7347 {
7348   tree addr;
7349   bool indirect_p;
7350   bool is_ha;           /* is HFA or HVA.  */
7351   bool dw_align;        /* double-word align.  */
7352   machine_mode ag_mode = VOIDmode;
7353   int nregs;
7354   machine_mode mode;
7355
7356   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7357   tree stack, f_top, f_off, off, arg, roundup, on_stack;
7358   HOST_WIDE_INT size, rsize, adjust, align;
7359   tree t, u, cond1, cond2;
7360
7361   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7362   if (indirect_p)
7363     type = build_pointer_type (type);
7364
7365   mode = TYPE_MODE (type);
7366
7367   f_stack = TYPE_FIELDS (va_list_type_node);
7368   f_grtop = DECL_CHAIN (f_stack);
7369   f_vrtop = DECL_CHAIN (f_grtop);
7370   f_groff = DECL_CHAIN (f_vrtop);
7371   f_vroff = DECL_CHAIN (f_groff);
7372
7373   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7374                   f_stack, NULL_TREE);
7375   size = int_size_in_bytes (type);
7376   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7377
7378   dw_align = false;
7379   adjust = 0;
7380   if (aarch64_vfp_is_call_or_return_candidate (mode,
7381                                                type,
7382                                                &ag_mode,
7383                                                &nregs,
7384                                                &is_ha))
7385     {
7386       /* TYPE passed in fp/simd registers.  */
7387       if (TARGET_GENERAL_REGS_ONLY)
7388         sorry ("%qs and floating point or vector arguments",
7389                "-mgeneral-regs-only");
7390
7391       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7392                       unshare_expr (valist), f_vrtop, NULL_TREE);
7393       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7394                       unshare_expr (valist), f_vroff, NULL_TREE);
7395
7396       rsize = nregs * UNITS_PER_VREG;
7397
7398       if (is_ha)
7399         {
7400           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7401             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7402         }
7403       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7404                && size < UNITS_PER_VREG)
7405         {
7406           adjust = UNITS_PER_VREG - size;
7407         }
7408     }
7409   else
7410     {
7411       /* TYPE passed in general registers.  */
7412       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7413                       unshare_expr (valist), f_grtop, NULL_TREE);
7414       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7415                       unshare_expr (valist), f_groff, NULL_TREE);
7416       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7417       nregs = rsize / UNITS_PER_WORD;
7418
7419       if (align > 8)
7420         dw_align = true;
7421
7422       if (BLOCK_REG_PADDING (mode, type, 1) == downward
7423           && size < UNITS_PER_WORD)
7424         {
7425           adjust = UNITS_PER_WORD  - size;
7426         }
7427     }
7428
7429   /* Get a local temporary for the field value.  */
7430   off = get_initialized_tmp_var (f_off, pre_p, NULL);
7431
7432   /* Emit code to branch if off >= 0.  */
7433   t = build2 (GE_EXPR, boolean_type_node, off,
7434               build_int_cst (TREE_TYPE (off), 0));
7435   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7436
7437   if (dw_align)
7438     {
7439       /* Emit: offs = (offs + 15) & -16.  */
7440       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7441                   build_int_cst (TREE_TYPE (off), 15));
7442       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7443                   build_int_cst (TREE_TYPE (off), -16));
7444       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7445     }
7446   else
7447     roundup = NULL;
7448
7449   /* Update ap.__[g|v]r_offs  */
7450   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7451               build_int_cst (TREE_TYPE (off), rsize));
7452   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7453
7454   /* String up.  */
7455   if (roundup)
7456     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7457
7458   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
7459   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7460               build_int_cst (TREE_TYPE (f_off), 0));
7461   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7462
7463   /* String up: make sure the assignment happens before the use.  */
7464   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7465   COND_EXPR_ELSE (cond1) = t;
7466
7467   /* Prepare the trees handling the argument that is passed on the stack;
7468      the top level node will store in ON_STACK.  */
7469   arg = get_initialized_tmp_var (stack, pre_p, NULL);
7470   if (align > 8)
7471     {
7472       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
7473       t = fold_convert (intDI_type_node, arg);
7474       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7475                   build_int_cst (TREE_TYPE (t), 15));
7476       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7477                   build_int_cst (TREE_TYPE (t), -16));
7478       t = fold_convert (TREE_TYPE (arg), t);
7479       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7480     }
7481   else
7482     roundup = NULL;
7483   /* Advance ap.__stack  */
7484   t = fold_convert (intDI_type_node, arg);
7485   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7486               build_int_cst (TREE_TYPE (t), size + 7));
7487   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7488               build_int_cst (TREE_TYPE (t), -8));
7489   t = fold_convert (TREE_TYPE (arg), t);
7490   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7491   /* String up roundup and advance.  */
7492   if (roundup)
7493     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7494   /* String up with arg */
7495   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7496   /* Big-endianness related address adjustment.  */
7497   if (BLOCK_REG_PADDING (mode, type, 1) == downward
7498       && size < UNITS_PER_WORD)
7499   {
7500     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7501                 size_int (UNITS_PER_WORD - size));
7502     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7503   }
7504
7505   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7506   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7507
7508   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
7509   t = off;
7510   if (adjust)
7511     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7512                 build_int_cst (TREE_TYPE (off), adjust));
7513
7514   t = fold_convert (sizetype, t);
7515   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7516
7517   if (is_ha)
7518     {
7519       /* type ha; // treat as "struct {ftype field[n];}"
7520          ... [computing offs]
7521          for (i = 0; i <nregs; ++i, offs += 16)
7522            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7523          return ha;  */
7524       int i;
7525       tree tmp_ha, field_t, field_ptr_t;
7526
7527       /* Declare a local variable.  */
7528       tmp_ha = create_tmp_var_raw (type, "ha");
7529       gimple_add_tmp_var (tmp_ha);
7530
7531       /* Establish the base type.  */
7532       switch (ag_mode)
7533         {
7534         case SFmode:
7535           field_t = float_type_node;
7536           field_ptr_t = float_ptr_type_node;
7537           break;
7538         case DFmode:
7539           field_t = double_type_node;
7540           field_ptr_t = double_ptr_type_node;
7541           break;
7542         case TFmode:
7543           field_t = long_double_type_node;
7544           field_ptr_t = long_double_ptr_type_node;
7545           break;
7546 /* The half precision and quad precision are not fully supported yet.  Enable
7547    the following code after the support is complete.  Need to find the correct
7548    type node for __fp16 *.  */
7549 #if 0
7550         case HFmode:
7551           field_t = float_type_node;
7552           field_ptr_t = float_ptr_type_node;
7553           break;
7554 #endif
7555         case V2SImode:
7556         case V4SImode:
7557             {
7558               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7559               field_t = build_vector_type_for_mode (innertype, ag_mode);
7560               field_ptr_t = build_pointer_type (field_t);
7561             }
7562           break;
7563         default:
7564           gcc_assert (0);
7565         }
7566
7567       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
7568       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7569       addr = t;
7570       t = fold_convert (field_ptr_t, addr);
7571       t = build2 (MODIFY_EXPR, field_t,
7572                   build1 (INDIRECT_REF, field_t, tmp_ha),
7573                   build1 (INDIRECT_REF, field_t, t));
7574
7575       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
7576       for (i = 1; i < nregs; ++i)
7577         {
7578           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7579           u = fold_convert (field_ptr_t, addr);
7580           u = build2 (MODIFY_EXPR, field_t,
7581                       build2 (MEM_REF, field_t, tmp_ha,
7582                               build_int_cst (field_ptr_t,
7583                                              (i *
7584                                               int_size_in_bytes (field_t)))),
7585                       build1 (INDIRECT_REF, field_t, u));
7586           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7587         }
7588
7589       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7590       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7591     }
7592
7593   COND_EXPR_ELSE (cond2) = t;
7594   addr = fold_convert (build_pointer_type (type), cond1);
7595   addr = build_va_arg_indirect_ref (addr);
7596
7597   if (indirect_p)
7598     addr = build_va_arg_indirect_ref (addr);
7599
7600   return addr;
7601 }
7602
7603 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
7604
7605 static void
7606 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7607                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7608                                 int no_rtl)
7609 {
7610   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7611   CUMULATIVE_ARGS local_cum;
7612   int gr_saved, vr_saved;
7613
7614   /* The caller has advanced CUM up to, but not beyond, the last named
7615      argument.  Advance a local copy of CUM past the last "real" named
7616      argument, to find out how many registers are left over.  */
7617   local_cum = *cum;
7618   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7619
7620   /* Found out how many registers we need to save.  */
7621   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7622   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7623
7624   if (TARGET_GENERAL_REGS_ONLY)
7625     {
7626       if (local_cum.aapcs_nvrn > 0)
7627         sorry ("%qs and floating point or vector arguments",
7628                "-mgeneral-regs-only");
7629       vr_saved = 0;
7630     }
7631
7632   if (!no_rtl)
7633     {
7634       if (gr_saved > 0)
7635         {
7636           rtx ptr, mem;
7637
7638           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
7639           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7640                                - gr_saved * UNITS_PER_WORD);
7641           mem = gen_frame_mem (BLKmode, ptr);
7642           set_mem_alias_set (mem, get_varargs_alias_set ());
7643
7644           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7645                                mem, gr_saved);
7646         }
7647       if (vr_saved > 0)
7648         {
7649           /* We can't use move_block_from_reg, because it will use
7650              the wrong mode, storing D regs only.  */
7651           machine_mode mode = TImode;
7652           int off, i;
7653
7654           /* Set OFF to the offset from virtual_incoming_args_rtx of
7655              the first vector register.  The VR save area lies below
7656              the GR one, and is aligned to 16 bytes.  */
7657           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7658                                    STACK_BOUNDARY / BITS_PER_UNIT);
7659           off -= vr_saved * UNITS_PER_VREG;
7660
7661           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7662             {
7663               rtx ptr, mem;
7664
7665               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7666               mem = gen_frame_mem (mode, ptr);
7667               set_mem_alias_set (mem, get_varargs_alias_set ());
7668               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7669               off += UNITS_PER_VREG;
7670             }
7671         }
7672     }
7673
7674   /* We don't save the size into *PRETEND_SIZE because we want to avoid
7675      any complication of having crtl->args.pretend_args_size changed.  */
7676   cfun->machine->frame.saved_varargs_size
7677     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7678                       STACK_BOUNDARY / BITS_PER_UNIT)
7679        + vr_saved * UNITS_PER_VREG);
7680 }
7681
7682 static void
7683 aarch64_conditional_register_usage (void)
7684 {
7685   int i;
7686   if (!TARGET_FLOAT)
7687     {
7688       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7689         {
7690           fixed_regs[i] = 1;
7691           call_used_regs[i] = 1;
7692         }
7693     }
7694 }
7695
7696 /* Walk down the type tree of TYPE counting consecutive base elements.
7697    If *MODEP is VOIDmode, then set it to the first valid floating point
7698    type.  If a non-floating point type is found, or if a floating point
7699    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7700    otherwise return the count in the sub-tree.  */
7701 static int
7702 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7703 {
7704   machine_mode mode;
7705   HOST_WIDE_INT size;
7706
7707   switch (TREE_CODE (type))
7708     {
7709     case REAL_TYPE:
7710       mode = TYPE_MODE (type);
7711       if (mode != DFmode && mode != SFmode && mode != TFmode)
7712         return -1;
7713
7714       if (*modep == VOIDmode)
7715         *modep = mode;
7716
7717       if (*modep == mode)
7718         return 1;
7719
7720       break;
7721
7722     case COMPLEX_TYPE:
7723       mode = TYPE_MODE (TREE_TYPE (type));
7724       if (mode != DFmode && mode != SFmode && mode != TFmode)
7725         return -1;
7726
7727       if (*modep == VOIDmode)
7728         *modep = mode;
7729
7730       if (*modep == mode)
7731         return 2;
7732
7733       break;
7734
7735     case VECTOR_TYPE:
7736       /* Use V2SImode and V4SImode as representatives of all 64-bit
7737          and 128-bit vector types.  */
7738       size = int_size_in_bytes (type);
7739       switch (size)
7740         {
7741         case 8:
7742           mode = V2SImode;
7743           break;
7744         case 16:
7745           mode = V4SImode;
7746           break;
7747         default:
7748           return -1;
7749         }
7750
7751       if (*modep == VOIDmode)
7752         *modep = mode;
7753
7754       /* Vector modes are considered to be opaque: two vectors are
7755          equivalent for the purposes of being homogeneous aggregates
7756          if they are the same size.  */
7757       if (*modep == mode)
7758         return 1;
7759
7760       break;
7761
7762     case ARRAY_TYPE:
7763       {
7764         int count;
7765         tree index = TYPE_DOMAIN (type);
7766
7767         /* Can't handle incomplete types nor sizes that are not
7768            fixed.  */
7769         if (!COMPLETE_TYPE_P (type)
7770             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7771           return -1;
7772
7773         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7774         if (count == -1
7775             || !index
7776             || !TYPE_MAX_VALUE (index)
7777             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7778             || !TYPE_MIN_VALUE (index)
7779             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7780             || count < 0)
7781           return -1;
7782
7783         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7784                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7785
7786         /* There must be no padding.  */
7787         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7788           return -1;
7789
7790         return count;
7791       }
7792
7793     case RECORD_TYPE:
7794       {
7795         int count = 0;
7796         int sub_count;
7797         tree field;
7798
7799         /* Can't handle incomplete types nor sizes that are not
7800            fixed.  */
7801         if (!COMPLETE_TYPE_P (type)
7802             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7803           return -1;
7804
7805         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7806           {
7807             if (TREE_CODE (field) != FIELD_DECL)
7808               continue;
7809
7810             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7811             if (sub_count < 0)
7812               return -1;
7813             count += sub_count;
7814           }
7815
7816         /* There must be no padding.  */
7817         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7818           return -1;
7819
7820         return count;
7821       }
7822
7823     case UNION_TYPE:
7824     case QUAL_UNION_TYPE:
7825       {
7826         /* These aren't very interesting except in a degenerate case.  */
7827         int count = 0;
7828         int sub_count;
7829         tree field;
7830
7831         /* Can't handle incomplete types nor sizes that are not
7832            fixed.  */
7833         if (!COMPLETE_TYPE_P (type)
7834             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7835           return -1;
7836
7837         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7838           {
7839             if (TREE_CODE (field) != FIELD_DECL)
7840               continue;
7841
7842             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7843             if (sub_count < 0)
7844               return -1;
7845             count = count > sub_count ? count : sub_count;
7846           }
7847
7848         /* There must be no padding.  */
7849         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7850           return -1;
7851
7852         return count;
7853       }
7854
7855     default:
7856       break;
7857     }
7858
7859   return -1;
7860 }
7861
7862 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7863    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7864    array types.  The C99 floating-point complex types are also considered
7865    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7866    types, which are GCC extensions and out of the scope of AAPCS64, are
7867    treated as composite types here as well.
7868
7869    Note that MODE itself is not sufficient in determining whether a type
7870    is such a composite type or not.  This is because
7871    stor-layout.c:compute_record_mode may have already changed the MODE
7872    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7873    structure with only one field may have its MODE set to the mode of the
7874    field.  Also an integer mode whose size matches the size of the
7875    RECORD_TYPE type may be used to substitute the original mode
7876    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7877    solely relied on.  */
7878
7879 static bool
7880 aarch64_composite_type_p (const_tree type,
7881                           machine_mode mode)
7882 {
7883   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7884     return true;
7885
7886   if (mode == BLKmode
7887       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7888       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7889     return true;
7890
7891   return false;
7892 }
7893
7894 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7895    type as described in AAPCS64 \S 4.1.2.
7896
7897    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7898
7899 static bool
7900 aarch64_short_vector_p (const_tree type,
7901                         machine_mode mode)
7902 {
7903   HOST_WIDE_INT size = -1;
7904
7905   if (type && TREE_CODE (type) == VECTOR_TYPE)
7906     size = int_size_in_bytes (type);
7907   else if (!aarch64_composite_type_p (type, mode)
7908            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7909                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7910     size = GET_MODE_SIZE (mode);
7911
7912   return (size == 8 || size == 16) ? true : false;
7913 }
7914
7915 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7916    shall be passed or returned in simd/fp register(s) (providing these
7917    parameter passing registers are available).
7918
7919    Upon successful return, *COUNT returns the number of needed registers,
7920    *BASE_MODE returns the mode of the individual register and when IS_HAF
7921    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7922    floating-point aggregate or a homogeneous short-vector aggregate.  */
7923
7924 static bool
7925 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7926                                          const_tree type,
7927                                          machine_mode *base_mode,
7928                                          int *count,
7929                                          bool *is_ha)
7930 {
7931   machine_mode new_mode = VOIDmode;
7932   bool composite_p = aarch64_composite_type_p (type, mode);
7933
7934   if (is_ha != NULL) *is_ha = false;
7935
7936   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7937       || aarch64_short_vector_p (type, mode))
7938     {
7939       *count = 1;
7940       new_mode = mode;
7941     }
7942   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7943     {
7944       if (is_ha != NULL) *is_ha = true;
7945       *count = 2;
7946       new_mode = GET_MODE_INNER (mode);
7947     }
7948   else if (type && composite_p)
7949     {
7950       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7951
7952       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7953         {
7954           if (is_ha != NULL) *is_ha = true;
7955           *count = ag_count;
7956         }
7957       else
7958         return false;
7959     }
7960   else
7961     return false;
7962
7963   *base_mode = new_mode;
7964   return true;
7965 }
7966
7967 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7968
7969 static rtx
7970 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7971                           int incoming ATTRIBUTE_UNUSED)
7972 {
7973   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7974 }
7975
7976 /* Implements target hook vector_mode_supported_p.  */
7977 static bool
7978 aarch64_vector_mode_supported_p (machine_mode mode)
7979 {
7980   if (TARGET_SIMD
7981       && (mode == V4SImode  || mode == V8HImode
7982           || mode == V16QImode || mode == V2DImode
7983           || mode == V2SImode  || mode == V4HImode
7984           || mode == V8QImode || mode == V2SFmode
7985           || mode == V4SFmode || mode == V2DFmode
7986           || mode == V1DFmode))
7987     return true;
7988
7989   return false;
7990 }
7991
7992 /* Return appropriate SIMD container
7993    for MODE within a vector of WIDTH bits.  */
7994 static machine_mode
7995 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7996 {
7997   gcc_assert (width == 64 || width == 128);
7998   if (TARGET_SIMD)
7999     {
8000       if (width == 128)
8001         switch (mode)
8002           {
8003           case DFmode:
8004             return V2DFmode;
8005           case SFmode:
8006             return V4SFmode;
8007           case SImode:
8008             return V4SImode;
8009           case HImode:
8010             return V8HImode;
8011           case QImode:
8012             return V16QImode;
8013           case DImode:
8014             return V2DImode;
8015           default:
8016             break;
8017           }
8018       else
8019         switch (mode)
8020           {
8021           case SFmode:
8022             return V2SFmode;
8023           case SImode:
8024             return V2SImode;
8025           case HImode:
8026             return V4HImode;
8027           case QImode:
8028             return V8QImode;
8029           default:
8030             break;
8031           }
8032     }
8033   return word_mode;
8034 }
8035
8036 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
8037 static machine_mode
8038 aarch64_preferred_simd_mode (machine_mode mode)
8039 {
8040   return aarch64_simd_container_mode (mode, 128);
8041 }
8042
8043 /* Return the bitmask of possible vector sizes for the vectorizer
8044    to iterate over.  */
8045 static unsigned int
8046 aarch64_autovectorize_vector_sizes (void)
8047 {
8048   return (16 | 8);
8049 }
8050
8051 /* Implement TARGET_MANGLE_TYPE.  */
8052
8053 static const char *
8054 aarch64_mangle_type (const_tree type)
8055 {
8056   /* The AArch64 ABI documents say that "__va_list" has to be
8057      managled as if it is in the "std" namespace.  */
8058   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8059     return "St9__va_list";
8060
8061   /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
8062      builtin types.  */
8063   if (TYPE_NAME (type) != NULL)
8064     return aarch64_mangle_builtin_type (type);
8065
8066   /* Use the default mangling.  */
8067   return NULL;
8068 }
8069
8070
8071 /* Return true if the rtx_insn contains a MEM RTX somewhere
8072    in it.  */
8073
8074 static bool
8075 has_memory_op (rtx_insn *mem_insn)
8076 {
8077   subrtx_iterator::array_type array;
8078   FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8079     if (MEM_P (*iter))
8080       return true;
8081
8082   return false;
8083 }
8084
8085 /* Find the first rtx_insn before insn that will generate an assembly
8086    instruction.  */
8087
8088 static rtx_insn *
8089 aarch64_prev_real_insn (rtx_insn *insn)
8090 {
8091   if (!insn)
8092     return NULL;
8093
8094   do
8095     {
8096       insn = prev_real_insn (insn);
8097     }
8098   while (insn && recog_memoized (insn) < 0);
8099
8100   return insn;
8101 }
8102
8103 static bool
8104 is_madd_op (enum attr_type t1)
8105 {
8106   unsigned int i;
8107   /* A number of these may be AArch32 only.  */
8108   enum attr_type mlatypes[] = {
8109     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8110     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8111     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8112   };
8113
8114   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8115     {
8116       if (t1 == mlatypes[i])
8117         return true;
8118     }
8119
8120   return false;
8121 }
8122
8123 /* Check if there is a register dependency between a load and the insn
8124    for which we hold recog_data.  */
8125
8126 static bool
8127 dep_between_memop_and_curr (rtx memop)
8128 {
8129   rtx load_reg;
8130   int opno;
8131
8132   gcc_assert (GET_CODE (memop) == SET);
8133
8134   if (!REG_P (SET_DEST (memop)))
8135     return false;
8136
8137   load_reg = SET_DEST (memop);
8138   for (opno = 1; opno < recog_data.n_operands; opno++)
8139     {
8140       rtx operand = recog_data.operand[opno];
8141       if (REG_P (operand)
8142           && reg_overlap_mentioned_p (load_reg, operand))
8143         return true;
8144
8145     }
8146   return false;
8147 }
8148
8149
8150 /* When working around the Cortex-A53 erratum 835769,
8151    given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8152    instruction and has a preceding memory instruction such that a NOP
8153    should be inserted between them.  */
8154
8155 bool
8156 aarch64_madd_needs_nop (rtx_insn* insn)
8157 {
8158   enum attr_type attr_type;
8159   rtx_insn *prev;
8160   rtx body;
8161
8162   if (!aarch64_fix_a53_err835769)
8163     return false;
8164
8165   if (recog_memoized (insn) < 0)
8166     return false;
8167
8168   attr_type = get_attr_type (insn);
8169   if (!is_madd_op (attr_type))
8170     return false;
8171
8172   prev = aarch64_prev_real_insn (insn);
8173   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8174      Restore recog state to INSN to avoid state corruption.  */
8175   extract_constrain_insn_cached (insn);
8176
8177   if (!prev || !has_memory_op (prev))
8178     return false;
8179
8180   body = single_set (prev);
8181
8182   /* If the previous insn is a memory op and there is no dependency between
8183      it and the DImode madd, emit a NOP between them.  If body is NULL then we
8184      have a complex memory operation, probably a load/store pair.
8185      Be conservative for now and emit a NOP.  */
8186   if (GET_MODE (recog_data.operand[0]) == DImode
8187       && (!body || !dep_between_memop_and_curr (body)))
8188     return true;
8189
8190   return false;
8191
8192 }
8193
8194
8195 /* Implement FINAL_PRESCAN_INSN.  */
8196
8197 void
8198 aarch64_final_prescan_insn (rtx_insn *insn)
8199 {
8200   if (aarch64_madd_needs_nop (insn))
8201     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8202 }
8203
8204
8205 /* Return the equivalent letter for size.  */
8206 static char
8207 sizetochar (int size)
8208 {
8209   switch (size)
8210     {
8211     case 64: return 'd';
8212     case 32: return 's';
8213     case 16: return 'h';
8214     case 8 : return 'b';
8215     default: gcc_unreachable ();
8216     }
8217 }
8218
8219 /* Return true iff x is a uniform vector of floating-point
8220    constants, and the constant can be represented in
8221    quarter-precision form.  Note, as aarch64_float_const_representable
8222    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
8223 static bool
8224 aarch64_vect_float_const_representable_p (rtx x)
8225 {
8226   int i = 0;
8227   REAL_VALUE_TYPE r0, ri;
8228   rtx x0, xi;
8229
8230   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8231     return false;
8232
8233   x0 = CONST_VECTOR_ELT (x, 0);
8234   if (!CONST_DOUBLE_P (x0))
8235     return false;
8236
8237   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8238
8239   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8240     {
8241       xi = CONST_VECTOR_ELT (x, i);
8242       if (!CONST_DOUBLE_P (xi))
8243         return false;
8244
8245       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8246       if (!REAL_VALUES_EQUAL (r0, ri))
8247         return false;
8248     }
8249
8250   return aarch64_float_const_representable_p (x0);
8251 }
8252
8253 /* Return true for valid and false for invalid.  */
8254 bool
8255 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
8256                               struct simd_immediate_info *info)
8257 {
8258 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
8259   matches = 1;                                          \
8260   for (i = 0; i < idx; i += (STRIDE))                   \
8261     if (!(TEST))                                        \
8262       matches = 0;                                      \
8263   if (matches)                                          \
8264     {                                                   \
8265       immtype = (CLASS);                                \
8266       elsize = (ELSIZE);                                \
8267       eshift = (SHIFT);                                 \
8268       emvn = (NEG);                                     \
8269       break;                                            \
8270     }
8271
8272   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8273   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8274   unsigned char bytes[16];
8275   int immtype = -1, matches;
8276   unsigned int invmask = inverse ? 0xff : 0;
8277   int eshift, emvn;
8278
8279   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8280     {
8281       if (! (aarch64_simd_imm_zero_p (op, mode)
8282              || aarch64_vect_float_const_representable_p (op)))
8283         return false;
8284
8285       if (info)
8286         {
8287           info->value = CONST_VECTOR_ELT (op, 0);
8288           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
8289           info->mvn = false;
8290           info->shift = 0;
8291         }
8292
8293       return true;
8294     }
8295
8296   /* Splat vector constant out into a byte vector.  */
8297   for (i = 0; i < n_elts; i++)
8298     {
8299       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
8300          it must be laid out in the vector register in reverse order.  */
8301       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
8302       unsigned HOST_WIDE_INT elpart;
8303       unsigned int part, parts;
8304
8305       if (CONST_INT_P (el))
8306         {
8307           elpart = INTVAL (el);
8308           parts = 1;
8309         }
8310       else if (GET_CODE (el) == CONST_DOUBLE)
8311         {
8312           elpart = CONST_DOUBLE_LOW (el);
8313           parts = 2;
8314         }
8315       else
8316         gcc_unreachable ();
8317
8318       for (part = 0; part < parts; part++)
8319         {
8320           unsigned int byte;
8321           for (byte = 0; byte < innersize; byte++)
8322             {
8323               bytes[idx++] = (elpart & 0xff) ^ invmask;
8324               elpart >>= BITS_PER_UNIT;
8325             }
8326           if (GET_CODE (el) == CONST_DOUBLE)
8327             elpart = CONST_DOUBLE_HIGH (el);
8328         }
8329     }
8330
8331   /* Sanity check.  */
8332   gcc_assert (idx == GET_MODE_SIZE (mode));
8333
8334   do
8335     {
8336       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8337              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8338
8339       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8340              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8341
8342       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8343              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8344
8345       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8346              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8347
8348       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8349
8350       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8351
8352       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8353              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8354
8355       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8356              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8357
8358       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8359              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8360
8361       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8362              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8363
8364       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8365
8366       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8367
8368       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8369              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8370
8371       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8372              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8373
8374       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8375              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8376
8377       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8378              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8379
8380       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8381
8382       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8383              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8384     }
8385   while (0);
8386
8387   if (immtype == -1)
8388     return false;
8389
8390   if (info)
8391     {
8392       info->element_width = elsize;
8393       info->mvn = emvn != 0;
8394       info->shift = eshift;
8395
8396       unsigned HOST_WIDE_INT imm = 0;
8397
8398       if (immtype >= 12 && immtype <= 15)
8399         info->msl = true;
8400
8401       /* Un-invert bytes of recognized vector, if necessary.  */
8402       if (invmask != 0)
8403         for (i = 0; i < idx; i++)
8404           bytes[i] ^= invmask;
8405
8406       if (immtype == 17)
8407         {
8408           /* FIXME: Broken on 32-bit H_W_I hosts.  */
8409           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8410
8411           for (i = 0; i < 8; i++)
8412             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8413               << (i * BITS_PER_UNIT);
8414
8415
8416           info->value = GEN_INT (imm);
8417         }
8418       else
8419         {
8420           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8421             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8422
8423           /* Construct 'abcdefgh' because the assembler cannot handle
8424              generic constants.  */
8425           if (info->mvn)
8426             imm = ~imm;
8427           imm = (imm >> info->shift) & 0xff;
8428           info->value = GEN_INT (imm);
8429         }
8430     }
8431
8432   return true;
8433 #undef CHECK
8434 }
8435
8436 /* Check of immediate shift constants are within range.  */
8437 bool
8438 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8439 {
8440   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8441   if (left)
8442     return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8443   else
8444     return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8445 }
8446
8447 /* Return true if X is a uniform vector where all elements
8448    are either the floating-point constant 0.0 or the
8449    integer constant 0.  */
8450 bool
8451 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8452 {
8453   return x == CONST0_RTX (mode);
8454 }
8455
8456 bool
8457 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8458 {
8459   HOST_WIDE_INT imm = INTVAL (x);
8460   int i;
8461
8462   for (i = 0; i < 8; i++)
8463     {
8464       unsigned int byte = imm & 0xff;
8465       if (byte != 0xff && byte != 0)
8466        return false;
8467       imm >>= 8;
8468     }
8469
8470   return true;
8471 }
8472
8473 bool
8474 aarch64_mov_operand_p (rtx x,
8475                        enum aarch64_symbol_context context,
8476                        machine_mode mode)
8477 {
8478   if (GET_CODE (x) == HIGH
8479       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8480     return true;
8481
8482   if (CONST_INT_P (x))
8483     return true;
8484
8485   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8486     return true;
8487
8488   return aarch64_classify_symbolic_expression (x, context)
8489     == SYMBOL_TINY_ABSOLUTE;
8490 }
8491
8492 /* Return a const_int vector of VAL.  */
8493 rtx
8494 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8495 {
8496   int nunits = GET_MODE_NUNITS (mode);
8497   rtvec v = rtvec_alloc (nunits);
8498   int i;
8499
8500   for (i=0; i < nunits; i++)
8501     RTVEC_ELT (v, i) = GEN_INT (val);
8502
8503   return gen_rtx_CONST_VECTOR (mode, v);
8504 }
8505
8506 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
8507
8508 bool
8509 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8510 {
8511   machine_mode vmode;
8512
8513   gcc_assert (!VECTOR_MODE_P (mode));
8514   vmode = aarch64_preferred_simd_mode (mode);
8515   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8516   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8517 }
8518
8519 /* Construct and return a PARALLEL RTX vector with elements numbering the
8520    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8521    the vector - from the perspective of the architecture.  This does not
8522    line up with GCC's perspective on lane numbers, so we end up with
8523    different masks depending on our target endian-ness.  The diagram
8524    below may help.  We must draw the distinction when building masks
8525    which select one half of the vector.  An instruction selecting
8526    architectural low-lanes for a big-endian target, must be described using
8527    a mask selecting GCC high-lanes.
8528
8529                  Big-Endian             Little-Endian
8530
8531 GCC             0   1   2   3           3   2   1   0
8532               | x | x | x | x |       | x | x | x | x |
8533 Architecture    3   2   1   0           3   2   1   0
8534
8535 Low Mask:         { 2, 3 }                { 0, 1 }
8536 High Mask:        { 0, 1 }                { 2, 3 }
8537 */
8538
8539 rtx
8540 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8541 {
8542   int nunits = GET_MODE_NUNITS (mode);
8543   rtvec v = rtvec_alloc (nunits / 2);
8544   int high_base = nunits / 2;
8545   int low_base = 0;
8546   int base;
8547   rtx t1;
8548   int i;
8549
8550   if (BYTES_BIG_ENDIAN)
8551     base = high ? low_base : high_base;
8552   else
8553     base = high ? high_base : low_base;
8554
8555   for (i = 0; i < nunits / 2; i++)
8556     RTVEC_ELT (v, i) = GEN_INT (base + i);
8557
8558   t1 = gen_rtx_PARALLEL (mode, v);
8559   return t1;
8560 }
8561
8562 /* Check OP for validity as a PARALLEL RTX vector with elements
8563    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8564    from the perspective of the architecture.  See the diagram above
8565    aarch64_simd_vect_par_cnst_half for more details.  */
8566
8567 bool
8568 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8569                                        bool high)
8570 {
8571   rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8572   HOST_WIDE_INT count_op = XVECLEN (op, 0);
8573   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8574   int i = 0;
8575
8576   if (!VECTOR_MODE_P (mode))
8577     return false;
8578
8579   if (count_op != count_ideal)
8580     return false;
8581
8582   for (i = 0; i < count_ideal; i++)
8583     {
8584       rtx elt_op = XVECEXP (op, 0, i);
8585       rtx elt_ideal = XVECEXP (ideal, 0, i);
8586
8587       if (!CONST_INT_P (elt_op)
8588           || INTVAL (elt_ideal) != INTVAL (elt_op))
8589         return false;
8590     }
8591   return true;
8592 }
8593
8594 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
8595    HIGH (exclusive).  */
8596 void
8597 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8598                           const_tree exp)
8599 {
8600   HOST_WIDE_INT lane;
8601   gcc_assert (CONST_INT_P (operand));
8602   lane = INTVAL (operand);
8603
8604   if (lane < low || lane >= high)
8605   {
8606     if (exp)
8607       error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8608     else
8609       error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8610   }
8611 }
8612
8613 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8614    registers).  */
8615 void
8616 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8617                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8618                             rtx op1)
8619 {
8620   rtx mem = gen_rtx_MEM (mode, destaddr);
8621   rtx tmp1 = gen_reg_rtx (mode);
8622   rtx tmp2 = gen_reg_rtx (mode);
8623
8624   emit_insn (intfn (tmp1, op1, tmp2));
8625
8626   emit_move_insn (mem, tmp1);
8627   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8628   emit_move_insn (mem, tmp2);
8629 }
8630
8631 /* Return TRUE if OP is a valid vector addressing mode.  */
8632 bool
8633 aarch64_simd_mem_operand_p (rtx op)
8634 {
8635   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8636                         || REG_P (XEXP (op, 0)));
8637 }
8638
8639 /* Emit a register copy from operand to operand, taking care not to
8640    early-clobber source registers in the process.
8641
8642    COUNT is the number of components into which the copy needs to be
8643    decomposed.  */
8644 void
8645 aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
8646                                 unsigned int count)
8647 {
8648   unsigned int i;
8649   int rdest = REGNO (operands[0]);
8650   int rsrc = REGNO (operands[1]);
8651
8652   if (!reg_overlap_mentioned_p (operands[0], operands[1])
8653       || rdest < rsrc)
8654     for (i = 0; i < count; i++)
8655       emit_move_insn (gen_rtx_REG (mode, rdest + i),
8656                       gen_rtx_REG (mode, rsrc + i));
8657   else
8658     for (i = 0; i < count; i++)
8659       emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
8660                       gen_rtx_REG (mode, rsrc + count - i - 1));
8661 }
8662
8663 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8664    one of VSTRUCT modes: OI, CI or XI.  */
8665 int
8666 aarch64_simd_attr_length_move (rtx_insn *insn)
8667 {
8668   machine_mode mode;
8669
8670   extract_insn_cached (insn);
8671
8672   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8673     {
8674       mode = GET_MODE (recog_data.operand[0]);
8675       switch (mode)
8676         {
8677         case OImode:
8678           return 8;
8679         case CImode:
8680           return 12;
8681         case XImode:
8682           return 16;
8683         default:
8684           gcc_unreachable ();
8685         }
8686     }
8687   return 4;
8688 }
8689
8690 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
8691    alignment of a vector to 128 bits.  */
8692 static HOST_WIDE_INT
8693 aarch64_simd_vector_alignment (const_tree type)
8694 {
8695   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8696   return MIN (align, 128);
8697 }
8698
8699 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
8700 static bool
8701 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8702 {
8703   if (is_packed)
8704     return false;
8705
8706   /* We guarantee alignment for vectors up to 128-bits.  */
8707   if (tree_int_cst_compare (TYPE_SIZE (type),
8708                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8709     return false;
8710
8711   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
8712   return true;
8713 }
8714
8715 /* If VALS is a vector constant that can be loaded into a register
8716    using DUP, generate instructions to do so and return an RTX to
8717    assign to the register.  Otherwise return NULL_RTX.  */
8718 static rtx
8719 aarch64_simd_dup_constant (rtx vals)
8720 {
8721   machine_mode mode = GET_MODE (vals);
8722   machine_mode inner_mode = GET_MODE_INNER (mode);
8723   int n_elts = GET_MODE_NUNITS (mode);
8724   bool all_same = true;
8725   rtx x;
8726   int i;
8727
8728   if (GET_CODE (vals) != CONST_VECTOR)
8729     return NULL_RTX;
8730
8731   for (i = 1; i < n_elts; ++i)
8732     {
8733       x = CONST_VECTOR_ELT (vals, i);
8734       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8735         all_same = false;
8736     }
8737
8738   if (!all_same)
8739     return NULL_RTX;
8740
8741   /* We can load this constant by using DUP and a constant in a
8742      single ARM register.  This will be cheaper than a vector
8743      load.  */
8744   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8745   return gen_rtx_VEC_DUPLICATE (mode, x);
8746 }
8747
8748
8749 /* Generate code to load VALS, which is a PARALLEL containing only
8750    constants (for vec_init) or CONST_VECTOR, efficiently into a
8751    register.  Returns an RTX to copy into the register, or NULL_RTX
8752    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
8753 static rtx
8754 aarch64_simd_make_constant (rtx vals)
8755 {
8756   machine_mode mode = GET_MODE (vals);
8757   rtx const_dup;
8758   rtx const_vec = NULL_RTX;
8759   int n_elts = GET_MODE_NUNITS (mode);
8760   int n_const = 0;
8761   int i;
8762
8763   if (GET_CODE (vals) == CONST_VECTOR)
8764     const_vec = vals;
8765   else if (GET_CODE (vals) == PARALLEL)
8766     {
8767       /* A CONST_VECTOR must contain only CONST_INTs and
8768          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8769          Only store valid constants in a CONST_VECTOR.  */
8770       for (i = 0; i < n_elts; ++i)
8771         {
8772           rtx x = XVECEXP (vals, 0, i);
8773           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8774             n_const++;
8775         }
8776       if (n_const == n_elts)
8777         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8778     }
8779   else
8780     gcc_unreachable ();
8781
8782   if (const_vec != NULL_RTX
8783       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8784     /* Load using MOVI/MVNI.  */
8785     return const_vec;
8786   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8787     /* Loaded using DUP.  */
8788     return const_dup;
8789   else if (const_vec != NULL_RTX)
8790     /* Load from constant pool. We can not take advantage of single-cycle
8791        LD1 because we need a PC-relative addressing mode.  */
8792     return const_vec;
8793   else
8794     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8795        We can not construct an initializer.  */
8796     return NULL_RTX;
8797 }
8798
8799 void
8800 aarch64_expand_vector_init (rtx target, rtx vals)
8801 {
8802   machine_mode mode = GET_MODE (target);
8803   machine_mode inner_mode = GET_MODE_INNER (mode);
8804   int n_elts = GET_MODE_NUNITS (mode);
8805   int n_var = 0, one_var = -1;
8806   bool all_same = true;
8807   rtx x, mem;
8808   int i;
8809
8810   x = XVECEXP (vals, 0, 0);
8811   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8812     n_var = 1, one_var = 0;
8813
8814   for (i = 1; i < n_elts; ++i)
8815     {
8816       x = XVECEXP (vals, 0, i);
8817       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8818         ++n_var, one_var = i;
8819
8820       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8821         all_same = false;
8822     }
8823
8824   if (n_var == 0)
8825     {
8826       rtx constant = aarch64_simd_make_constant (vals);
8827       if (constant != NULL_RTX)
8828         {
8829           emit_move_insn (target, constant);
8830           return;
8831         }
8832     }
8833
8834   /* Splat a single non-constant element if we can.  */
8835   if (all_same)
8836     {
8837       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8838       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8839       return;
8840     }
8841
8842   /* One field is non-constant.  Load constant then overwrite varying
8843      field.  This is more efficient than using the stack.  */
8844   if (n_var == 1)
8845     {
8846       rtx copy = copy_rtx (vals);
8847       rtx index = GEN_INT (one_var);
8848       enum insn_code icode;
8849
8850       /* Load constant part of vector, substitute neighboring value for
8851          varying element.  */
8852       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8853       aarch64_expand_vector_init (target, copy);
8854
8855       /* Insert variable.  */
8856       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8857       icode = optab_handler (vec_set_optab, mode);
8858       gcc_assert (icode != CODE_FOR_nothing);
8859       emit_insn (GEN_FCN (icode) (target, x, index));
8860       return;
8861     }
8862
8863   /* Construct the vector in memory one field at a time
8864      and load the whole vector.  */
8865   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8866   for (i = 0; i < n_elts; i++)
8867     emit_move_insn (adjust_address_nv (mem, inner_mode,
8868                                     i * GET_MODE_SIZE (inner_mode)),
8869                     XVECEXP (vals, 0, i));
8870   emit_move_insn (target, mem);
8871
8872 }
8873
8874 static unsigned HOST_WIDE_INT
8875 aarch64_shift_truncation_mask (machine_mode mode)
8876 {
8877   return
8878     (aarch64_vector_mode_supported_p (mode)
8879      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8880 }
8881
8882 #ifndef TLS_SECTION_ASM_FLAG
8883 #define TLS_SECTION_ASM_FLAG 'T'
8884 #endif
8885
8886 void
8887 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8888                                tree decl ATTRIBUTE_UNUSED)
8889 {
8890   char flagchars[10], *f = flagchars;
8891
8892   /* If we have already declared this section, we can use an
8893      abbreviated form to switch back to it -- unless this section is
8894      part of a COMDAT groups, in which case GAS requires the full
8895      declaration every time.  */
8896   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8897       && (flags & SECTION_DECLARED))
8898     {
8899       fprintf (asm_out_file, "\t.section\t%s\n", name);
8900       return;
8901     }
8902
8903   if (!(flags & SECTION_DEBUG))
8904     *f++ = 'a';
8905   if (flags & SECTION_WRITE)
8906     *f++ = 'w';
8907   if (flags & SECTION_CODE)
8908     *f++ = 'x';
8909   if (flags & SECTION_SMALL)
8910     *f++ = 's';
8911   if (flags & SECTION_MERGE)
8912     *f++ = 'M';
8913   if (flags & SECTION_STRINGS)
8914     *f++ = 'S';
8915   if (flags & SECTION_TLS)
8916     *f++ = TLS_SECTION_ASM_FLAG;
8917   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8918     *f++ = 'G';
8919   *f = '\0';
8920
8921   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8922
8923   if (!(flags & SECTION_NOTYPE))
8924     {
8925       const char *type;
8926       const char *format;
8927
8928       if (flags & SECTION_BSS)
8929         type = "nobits";
8930       else
8931         type = "progbits";
8932
8933 #ifdef TYPE_OPERAND_FMT
8934       format = "," TYPE_OPERAND_FMT;
8935 #else
8936       format = ",@%s";
8937 #endif
8938
8939       fprintf (asm_out_file, format, type);
8940
8941       if (flags & SECTION_ENTSIZE)
8942         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8943       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8944         {
8945           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8946             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8947           else
8948             fprintf (asm_out_file, ",%s,comdat",
8949                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8950         }
8951     }
8952
8953   putc ('\n', asm_out_file);
8954 }
8955
8956 /* Select a format to encode pointers in exception handling data.  */
8957 int
8958 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8959 {
8960    int type;
8961    switch (aarch64_cmodel)
8962      {
8963      case AARCH64_CMODEL_TINY:
8964      case AARCH64_CMODEL_TINY_PIC:
8965      case AARCH64_CMODEL_SMALL:
8966      case AARCH64_CMODEL_SMALL_PIC:
8967        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8968           for everything.  */
8969        type = DW_EH_PE_sdata4;
8970        break;
8971      default:
8972        /* No assumptions here.  8-byte relocs required.  */
8973        type = DW_EH_PE_sdata8;
8974        break;
8975      }
8976    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8977 }
8978
8979 /* Emit load exclusive.  */
8980
8981 static void
8982 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8983                              rtx mem, rtx model_rtx)
8984 {
8985   rtx (*gen) (rtx, rtx, rtx);
8986
8987   switch (mode)
8988     {
8989     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8990     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8991     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8992     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8993     default:
8994       gcc_unreachable ();
8995     }
8996
8997   emit_insn (gen (rval, mem, model_rtx));
8998 }
8999
9000 /* Emit store exclusive.  */
9001
9002 static void
9003 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
9004                               rtx rval, rtx mem, rtx model_rtx)
9005 {
9006   rtx (*gen) (rtx, rtx, rtx, rtx);
9007
9008   switch (mode)
9009     {
9010     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
9011     case HImode: gen = gen_aarch64_store_exclusivehi; break;
9012     case SImode: gen = gen_aarch64_store_exclusivesi; break;
9013     case DImode: gen = gen_aarch64_store_exclusivedi; break;
9014     default:
9015       gcc_unreachable ();
9016     }
9017
9018   emit_insn (gen (bval, rval, mem, model_rtx));
9019 }
9020
9021 /* Mark the previous jump instruction as unlikely.  */
9022
9023 static void
9024 aarch64_emit_unlikely_jump (rtx insn)
9025 {
9026   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
9027
9028   insn = emit_jump_insn (insn);
9029   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
9030 }
9031
9032 /* Expand a compare and swap pattern.  */
9033
9034 void
9035 aarch64_expand_compare_and_swap (rtx operands[])
9036 {
9037   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
9038   machine_mode mode, cmp_mode;
9039   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9040
9041   bval = operands[0];
9042   rval = operands[1];
9043   mem = operands[2];
9044   oldval = operands[3];
9045   newval = operands[4];
9046   is_weak = operands[5];
9047   mod_s = operands[6];
9048   mod_f = operands[7];
9049   mode = GET_MODE (mem);
9050   cmp_mode = mode;
9051
9052   /* Normally the succ memory model must be stronger than fail, but in the
9053      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9054      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
9055
9056   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9057       && INTVAL (mod_s) == MEMMODEL_RELEASE)
9058     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9059
9060   switch (mode)
9061     {
9062     case QImode:
9063     case HImode:
9064       /* For short modes, we're going to perform the comparison in SImode,
9065          so do the zero-extension now.  */
9066       cmp_mode = SImode;
9067       rval = gen_reg_rtx (SImode);
9068       oldval = convert_modes (SImode, mode, oldval, true);
9069       /* Fall through.  */
9070
9071     case SImode:
9072     case DImode:
9073       /* Force the value into a register if needed.  */
9074       if (!aarch64_plus_operand (oldval, mode))
9075         oldval = force_reg (cmp_mode, oldval);
9076       break;
9077
9078     default:
9079       gcc_unreachable ();
9080     }
9081
9082   switch (mode)
9083     {
9084     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9085     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9086     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9087     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9088     default:
9089       gcc_unreachable ();
9090     }
9091
9092   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9093
9094   if (mode == QImode || mode == HImode)
9095     emit_move_insn (operands[1], gen_lowpart (mode, rval));
9096
9097   x = gen_rtx_REG (CCmode, CC_REGNUM);
9098   x = gen_rtx_EQ (SImode, x, const0_rtx);
9099   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9100 }
9101
9102 /* Split a compare and swap pattern.  */
9103
9104 void
9105 aarch64_split_compare_and_swap (rtx operands[])
9106 {
9107   rtx rval, mem, oldval, newval, scratch;
9108   machine_mode mode;
9109   bool is_weak;
9110   rtx_code_label *label1, *label2;
9111   rtx x, cond;
9112
9113   rval = operands[0];
9114   mem = operands[1];
9115   oldval = operands[2];
9116   newval = operands[3];
9117   is_weak = (operands[4] != const0_rtx);
9118   scratch = operands[7];
9119   mode = GET_MODE (mem);
9120
9121   label1 = NULL;
9122   if (!is_weak)
9123     {
9124       label1 = gen_label_rtx ();
9125       emit_label (label1);
9126     }
9127   label2 = gen_label_rtx ();
9128
9129   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9130
9131   cond = aarch64_gen_compare_reg (NE, rval, oldval);
9132   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9133   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9134                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9135   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9136
9137   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9138
9139   if (!is_weak)
9140     {
9141       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9142       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9143                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9144       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9145     }
9146   else
9147     {
9148       cond = gen_rtx_REG (CCmode, CC_REGNUM);
9149       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9150       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9151     }
9152
9153   emit_label (label2);
9154 }
9155
9156 /* Split an atomic operation.  */
9157
9158 void
9159 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9160                      rtx value, rtx model_rtx, rtx cond)
9161 {
9162   machine_mode mode = GET_MODE (mem);
9163   machine_mode wmode = (mode == DImode ? DImode : SImode);
9164   rtx_code_label *label;
9165   rtx x;
9166
9167   label = gen_label_rtx ();
9168   emit_label (label);
9169
9170   if (new_out)
9171     new_out = gen_lowpart (wmode, new_out);
9172   if (old_out)
9173     old_out = gen_lowpart (wmode, old_out);
9174   else
9175     old_out = new_out;
9176   value = simplify_gen_subreg (wmode, value, mode, 0);
9177
9178   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9179
9180   switch (code)
9181     {
9182     case SET:
9183       new_out = value;
9184       break;
9185
9186     case NOT:
9187       x = gen_rtx_AND (wmode, old_out, value);
9188       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9189       x = gen_rtx_NOT (wmode, new_out);
9190       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9191       break;
9192
9193     case MINUS:
9194       if (CONST_INT_P (value))
9195         {
9196           value = GEN_INT (-INTVAL (value));
9197           code = PLUS;
9198         }
9199       /* Fall through.  */
9200
9201     default:
9202       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9203       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9204       break;
9205     }
9206
9207   aarch64_emit_store_exclusive (mode, cond, mem,
9208                                 gen_lowpart (mode, new_out), model_rtx);
9209
9210   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9211   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9212                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9213   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9214 }
9215
9216 static void
9217 aarch64_print_extension (void)
9218 {
9219   const struct aarch64_option_extension *opt = NULL;
9220
9221   for (opt = all_extensions; opt->name != NULL; opt++)
9222     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9223       asm_fprintf (asm_out_file, "+%s", opt->name);
9224
9225   asm_fprintf (asm_out_file, "\n");
9226 }
9227
9228 static void
9229 aarch64_start_file (void)
9230 {
9231   if (selected_arch)
9232     {
9233       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9234       aarch64_print_extension ();
9235     }
9236   else if (selected_cpu)
9237     {
9238       const char *truncated_name
9239             = aarch64_rewrite_selected_cpu (selected_cpu->name);
9240       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
9241       aarch64_print_extension ();
9242     }
9243   default_file_start();
9244 }
9245
9246 /* Target hook for c_mode_for_suffix.  */
9247 static machine_mode
9248 aarch64_c_mode_for_suffix (char suffix)
9249 {
9250   if (suffix == 'q')
9251     return TFmode;
9252
9253   return VOIDmode;
9254 }
9255
9256 /* We can only represent floating point constants which will fit in
9257    "quarter-precision" values.  These values are characterised by
9258    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
9259    by:
9260
9261    (-1)^s * (n/16) * 2^r
9262
9263    Where:
9264      's' is the sign bit.
9265      'n' is an integer in the range 16 <= n <= 31.
9266      'r' is an integer in the range -3 <= r <= 4.  */
9267
9268 /* Return true iff X can be represented by a quarter-precision
9269    floating point immediate operand X.  Note, we cannot represent 0.0.  */
9270 bool
9271 aarch64_float_const_representable_p (rtx x)
9272 {
9273   /* This represents our current view of how many bits
9274      make up the mantissa.  */
9275   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9276   int exponent;
9277   unsigned HOST_WIDE_INT mantissa, mask;
9278   REAL_VALUE_TYPE r, m;
9279   bool fail;
9280
9281   if (!CONST_DOUBLE_P (x))
9282     return false;
9283
9284   if (GET_MODE (x) == VOIDmode)
9285     return false;
9286
9287   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9288
9289   /* We cannot represent infinities, NaNs or +/-zero.  We won't
9290      know if we have +zero until we analyse the mantissa, but we
9291      can reject the other invalid values.  */
9292   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9293       || REAL_VALUE_MINUS_ZERO (r))
9294     return false;
9295
9296   /* Extract exponent.  */
9297   r = real_value_abs (&r);
9298   exponent = REAL_EXP (&r);
9299
9300   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9301      highest (sign) bit, with a fixed binary point at bit point_pos.
9302      m1 holds the low part of the mantissa, m2 the high part.
9303      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9304      bits for the mantissa, this can fail (low bits will be lost).  */
9305   real_ldexp (&m, &r, point_pos - exponent);
9306   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
9307
9308   /* If the low part of the mantissa has bits set we cannot represent
9309      the value.  */
9310   if (w.elt (0) != 0)
9311     return false;
9312   /* We have rejected the lower HOST_WIDE_INT, so update our
9313      understanding of how many bits lie in the mantissa and
9314      look only at the high HOST_WIDE_INT.  */
9315   mantissa = w.elt (1);
9316   point_pos -= HOST_BITS_PER_WIDE_INT;
9317
9318   /* We can only represent values with a mantissa of the form 1.xxxx.  */
9319   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9320   if ((mantissa & mask) != 0)
9321     return false;
9322
9323   /* Having filtered unrepresentable values, we may now remove all
9324      but the highest 5 bits.  */
9325   mantissa >>= point_pos - 5;
9326
9327   /* We cannot represent the value 0.0, so reject it.  This is handled
9328      elsewhere.  */
9329   if (mantissa == 0)
9330     return false;
9331
9332   /* Then, as bit 4 is always set, we can mask it off, leaving
9333      the mantissa in the range [0, 15].  */
9334   mantissa &= ~(1 << 4);
9335   gcc_assert (mantissa <= 15);
9336
9337   /* GCC internally does not use IEEE754-like encoding (where normalized
9338      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
9339      Our mantissa values are shifted 4 places to the left relative to
9340      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9341      by 5 places to correct for GCC's representation.  */
9342   exponent = 5 - exponent;
9343
9344   return (exponent >= 0 && exponent <= 7);
9345 }
9346
9347 char*
9348 aarch64_output_simd_mov_immediate (rtx const_vector,
9349                                    machine_mode mode,
9350                                    unsigned width)
9351 {
9352   bool is_valid;
9353   static char templ[40];
9354   const char *mnemonic;
9355   const char *shift_op;
9356   unsigned int lane_count = 0;
9357   char element_char;
9358
9359   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
9360
9361   /* This will return true to show const_vector is legal for use as either
9362      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
9363      also update INFO to show how the immediate should be generated.  */
9364   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
9365   gcc_assert (is_valid);
9366
9367   element_char = sizetochar (info.element_width);
9368   lane_count = width / info.element_width;
9369
9370   mode = GET_MODE_INNER (mode);
9371   if (mode == SFmode || mode == DFmode)
9372     {
9373       gcc_assert (info.shift == 0 && ! info.mvn);
9374       if (aarch64_float_const_zero_rtx_p (info.value))
9375         info.value = GEN_INT (0);
9376       else
9377         {
9378 #define buf_size 20
9379           REAL_VALUE_TYPE r;
9380           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9381           char float_buf[buf_size] = {'\0'};
9382           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9383 #undef buf_size
9384
9385           if (lane_count == 1)
9386             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9387           else
9388             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
9389                       lane_count, element_char, float_buf);
9390           return templ;
9391         }
9392     }
9393
9394   mnemonic = info.mvn ? "mvni" : "movi";
9395   shift_op = info.msl ? "msl" : "lsl";
9396
9397   if (lane_count == 1)
9398     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9399               mnemonic, UINTVAL (info.value));
9400   else if (info.shift)
9401     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9402               ", %s %d", mnemonic, lane_count, element_char,
9403               UINTVAL (info.value), shift_op, info.shift);
9404   else
9405     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9406               mnemonic, lane_count, element_char, UINTVAL (info.value));
9407   return templ;
9408 }
9409
9410 char*
9411 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9412                                           machine_mode mode)
9413 {
9414   machine_mode vmode;
9415
9416   gcc_assert (!VECTOR_MODE_P (mode));
9417   vmode = aarch64_simd_container_mode (mode, 64);
9418   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9419   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9420 }
9421
9422 /* Split operands into moves from op[1] + op[2] into op[0].  */
9423
9424 void
9425 aarch64_split_combinev16qi (rtx operands[3])
9426 {
9427   unsigned int dest = REGNO (operands[0]);
9428   unsigned int src1 = REGNO (operands[1]);
9429   unsigned int src2 = REGNO (operands[2]);
9430   machine_mode halfmode = GET_MODE (operands[1]);
9431   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9432   rtx destlo, desthi;
9433
9434   gcc_assert (halfmode == V16QImode);
9435
9436   if (src1 == dest && src2 == dest + halfregs)
9437     {
9438       /* No-op move.  Can't split to nothing; emit something.  */
9439       emit_note (NOTE_INSN_DELETED);
9440       return;
9441     }
9442
9443   /* Preserve register attributes for variable tracking.  */
9444   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9445   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9446                                GET_MODE_SIZE (halfmode));
9447
9448   /* Special case of reversed high/low parts.  */
9449   if (reg_overlap_mentioned_p (operands[2], destlo)
9450       && reg_overlap_mentioned_p (operands[1], desthi))
9451     {
9452       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9453       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9454       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9455     }
9456   else if (!reg_overlap_mentioned_p (operands[2], destlo))
9457     {
9458       /* Try to avoid unnecessary moves if part of the result
9459          is in the right place already.  */
9460       if (src1 != dest)
9461         emit_move_insn (destlo, operands[1]);
9462       if (src2 != dest + halfregs)
9463         emit_move_insn (desthi, operands[2]);
9464     }
9465   else
9466     {
9467       if (src2 != dest + halfregs)
9468         emit_move_insn (desthi, operands[2]);
9469       if (src1 != dest)
9470         emit_move_insn (destlo, operands[1]);
9471     }
9472 }
9473
9474 /* vec_perm support.  */
9475
9476 #define MAX_VECT_LEN 16
9477
9478 struct expand_vec_perm_d
9479 {
9480   rtx target, op0, op1;
9481   unsigned char perm[MAX_VECT_LEN];
9482   machine_mode vmode;
9483   unsigned char nelt;
9484   bool one_vector_p;
9485   bool testing_p;
9486 };
9487
9488 /* Generate a variable permutation.  */
9489
9490 static void
9491 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9492 {
9493   machine_mode vmode = GET_MODE (target);
9494   bool one_vector_p = rtx_equal_p (op0, op1);
9495
9496   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9497   gcc_checking_assert (GET_MODE (op0) == vmode);
9498   gcc_checking_assert (GET_MODE (op1) == vmode);
9499   gcc_checking_assert (GET_MODE (sel) == vmode);
9500   gcc_checking_assert (TARGET_SIMD);
9501
9502   if (one_vector_p)
9503     {
9504       if (vmode == V8QImode)
9505         {
9506           /* Expand the argument to a V16QI mode by duplicating it.  */
9507           rtx pair = gen_reg_rtx (V16QImode);
9508           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9509           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9510         }
9511       else
9512         {
9513           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9514         }
9515     }
9516   else
9517     {
9518       rtx pair;
9519
9520       if (vmode == V8QImode)
9521         {
9522           pair = gen_reg_rtx (V16QImode);
9523           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9524           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9525         }
9526       else
9527         {
9528           pair = gen_reg_rtx (OImode);
9529           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9530           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9531         }
9532     }
9533 }
9534
9535 void
9536 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9537 {
9538   machine_mode vmode = GET_MODE (target);
9539   unsigned int nelt = GET_MODE_NUNITS (vmode);
9540   bool one_vector_p = rtx_equal_p (op0, op1);
9541   rtx mask;
9542
9543   /* The TBL instruction does not use a modulo index, so we must take care
9544      of that ourselves.  */
9545   mask = aarch64_simd_gen_const_vector_dup (vmode,
9546       one_vector_p ? nelt - 1 : 2 * nelt - 1);
9547   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9548
9549   /* For big-endian, we also need to reverse the index within the vector
9550      (but not which vector).  */
9551   if (BYTES_BIG_ENDIAN)
9552     {
9553       /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
9554       if (!one_vector_p)
9555         mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9556       sel = expand_simple_binop (vmode, XOR, sel, mask,
9557                                  NULL, 0, OPTAB_LIB_WIDEN);
9558     }
9559   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9560 }
9561
9562 /* Recognize patterns suitable for the TRN instructions.  */
9563 static bool
9564 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9565 {
9566   unsigned int i, odd, mask, nelt = d->nelt;
9567   rtx out, in0, in1, x;
9568   rtx (*gen) (rtx, rtx, rtx);
9569   machine_mode vmode = d->vmode;
9570
9571   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9572     return false;
9573
9574   /* Note that these are little-endian tests.
9575      We correct for big-endian later.  */
9576   if (d->perm[0] == 0)
9577     odd = 0;
9578   else if (d->perm[0] == 1)
9579     odd = 1;
9580   else
9581     return false;
9582   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9583
9584   for (i = 0; i < nelt; i += 2)
9585     {
9586       if (d->perm[i] != i + odd)
9587         return false;
9588       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9589         return false;
9590     }
9591
9592   /* Success!  */
9593   if (d->testing_p)
9594     return true;
9595
9596   in0 = d->op0;
9597   in1 = d->op1;
9598   if (BYTES_BIG_ENDIAN)
9599     {
9600       x = in0, in0 = in1, in1 = x;
9601       odd = !odd;
9602     }
9603   out = d->target;
9604
9605   if (odd)
9606     {
9607       switch (vmode)
9608         {
9609         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9610         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9611         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9612         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9613         case V4SImode: gen = gen_aarch64_trn2v4si; break;
9614         case V2SImode: gen = gen_aarch64_trn2v2si; break;
9615         case V2DImode: gen = gen_aarch64_trn2v2di; break;
9616         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9617         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9618         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9619         default:
9620           return false;
9621         }
9622     }
9623   else
9624     {
9625       switch (vmode)
9626         {
9627         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9628         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9629         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9630         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9631         case V4SImode: gen = gen_aarch64_trn1v4si; break;
9632         case V2SImode: gen = gen_aarch64_trn1v2si; break;
9633         case V2DImode: gen = gen_aarch64_trn1v2di; break;
9634         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9635         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9636         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9637         default:
9638           return false;
9639         }
9640     }
9641
9642   emit_insn (gen (out, in0, in1));
9643   return true;
9644 }
9645
9646 /* Recognize patterns suitable for the UZP instructions.  */
9647 static bool
9648 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9649 {
9650   unsigned int i, odd, mask, nelt = d->nelt;
9651   rtx out, in0, in1, x;
9652   rtx (*gen) (rtx, rtx, rtx);
9653   machine_mode vmode = d->vmode;
9654
9655   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9656     return false;
9657
9658   /* Note that these are little-endian tests.
9659      We correct for big-endian later.  */
9660   if (d->perm[0] == 0)
9661     odd = 0;
9662   else if (d->perm[0] == 1)
9663     odd = 1;
9664   else
9665     return false;
9666   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9667
9668   for (i = 0; i < nelt; i++)
9669     {
9670       unsigned elt = (i * 2 + odd) & mask;
9671       if (d->perm[i] != elt)
9672         return false;
9673     }
9674
9675   /* Success!  */
9676   if (d->testing_p)
9677     return true;
9678
9679   in0 = d->op0;
9680   in1 = d->op1;
9681   if (BYTES_BIG_ENDIAN)
9682     {
9683       x = in0, in0 = in1, in1 = x;
9684       odd = !odd;
9685     }
9686   out = d->target;
9687
9688   if (odd)
9689     {
9690       switch (vmode)
9691         {
9692         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9693         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9694         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9695         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9696         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9697         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9698         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9699         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9700         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9701         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9702         default:
9703           return false;
9704         }
9705     }
9706   else
9707     {
9708       switch (vmode)
9709         {
9710         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9711         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9712         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9713         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9714         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9715         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9716         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9717         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9718         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9719         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9720         default:
9721           return false;
9722         }
9723     }
9724
9725   emit_insn (gen (out, in0, in1));
9726   return true;
9727 }
9728
9729 /* Recognize patterns suitable for the ZIP instructions.  */
9730 static bool
9731 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9732 {
9733   unsigned int i, high, mask, nelt = d->nelt;
9734   rtx out, in0, in1, x;
9735   rtx (*gen) (rtx, rtx, rtx);
9736   machine_mode vmode = d->vmode;
9737
9738   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9739     return false;
9740
9741   /* Note that these are little-endian tests.
9742      We correct for big-endian later.  */
9743   high = nelt / 2;
9744   if (d->perm[0] == high)
9745     /* Do Nothing.  */
9746     ;
9747   else if (d->perm[0] == 0)
9748     high = 0;
9749   else
9750     return false;
9751   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9752
9753   for (i = 0; i < nelt / 2; i++)
9754     {
9755       unsigned elt = (i + high) & mask;
9756       if (d->perm[i * 2] != elt)
9757         return false;
9758       elt = (elt + nelt) & mask;
9759       if (d->perm[i * 2 + 1] != elt)
9760         return false;
9761     }
9762
9763   /* Success!  */
9764   if (d->testing_p)
9765     return true;
9766
9767   in0 = d->op0;
9768   in1 = d->op1;
9769   if (BYTES_BIG_ENDIAN)
9770     {
9771       x = in0, in0 = in1, in1 = x;
9772       high = !high;
9773     }
9774   out = d->target;
9775
9776   if (high)
9777     {
9778       switch (vmode)
9779         {
9780         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9781         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9782         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9783         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9784         case V4SImode: gen = gen_aarch64_zip2v4si; break;
9785         case V2SImode: gen = gen_aarch64_zip2v2si; break;
9786         case V2DImode: gen = gen_aarch64_zip2v2di; break;
9787         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9788         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9789         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9790         default:
9791           return false;
9792         }
9793     }
9794   else
9795     {
9796       switch (vmode)
9797         {
9798         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9799         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9800         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9801         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9802         case V4SImode: gen = gen_aarch64_zip1v4si; break;
9803         case V2SImode: gen = gen_aarch64_zip1v2si; break;
9804         case V2DImode: gen = gen_aarch64_zip1v2di; break;
9805         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9806         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9807         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9808         default:
9809           return false;
9810         }
9811     }
9812
9813   emit_insn (gen (out, in0, in1));
9814   return true;
9815 }
9816
9817 /* Recognize patterns for the EXT insn.  */
9818
9819 static bool
9820 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9821 {
9822   unsigned int i, nelt = d->nelt;
9823   rtx (*gen) (rtx, rtx, rtx, rtx);
9824   rtx offset;
9825
9826   unsigned int location = d->perm[0]; /* Always < nelt.  */
9827
9828   /* Check if the extracted indices are increasing by one.  */
9829   for (i = 1; i < nelt; i++)
9830     {
9831       unsigned int required = location + i;
9832       if (d->one_vector_p)
9833         {
9834           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9835           required &= (nelt - 1);
9836         }
9837       if (d->perm[i] != required)
9838         return false;
9839     }
9840
9841   switch (d->vmode)
9842     {
9843     case V16QImode: gen = gen_aarch64_extv16qi; break;
9844     case V8QImode: gen = gen_aarch64_extv8qi; break;
9845     case V4HImode: gen = gen_aarch64_extv4hi; break;
9846     case V8HImode: gen = gen_aarch64_extv8hi; break;
9847     case V2SImode: gen = gen_aarch64_extv2si; break;
9848     case V4SImode: gen = gen_aarch64_extv4si; break;
9849     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9850     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9851     case V2DImode: gen = gen_aarch64_extv2di; break;
9852     case V2DFmode: gen = gen_aarch64_extv2df; break;
9853     default:
9854       return false;
9855     }
9856
9857   /* Success! */
9858   if (d->testing_p)
9859     return true;
9860
9861   /* The case where (location == 0) is a no-op for both big- and little-endian,
9862      and is removed by the mid-end at optimization levels -O1 and higher.  */
9863
9864   if (BYTES_BIG_ENDIAN && (location != 0))
9865     {
9866       /* After setup, we want the high elements of the first vector (stored
9867          at the LSB end of the register), and the low elements of the second
9868          vector (stored at the MSB end of the register). So swap.  */
9869       rtx temp = d->op0;
9870       d->op0 = d->op1;
9871       d->op1 = temp;
9872       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9873       location = nelt - location;
9874     }
9875
9876   offset = GEN_INT (location);
9877   emit_insn (gen (d->target, d->op0, d->op1, offset));
9878   return true;
9879 }
9880
9881 /* Recognize patterns for the REV insns.  */
9882
9883 static bool
9884 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9885 {
9886   unsigned int i, j, diff, nelt = d->nelt;
9887   rtx (*gen) (rtx, rtx);
9888
9889   if (!d->one_vector_p)
9890     return false;
9891
9892   diff = d->perm[0];
9893   switch (diff)
9894     {
9895     case 7:
9896       switch (d->vmode)
9897         {
9898         case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9899         case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
9900         default:
9901           return false;
9902         }
9903       break;
9904     case 3:
9905       switch (d->vmode)
9906         {
9907         case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9908         case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
9909         case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
9910         case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
9911         default:
9912           return false;
9913         }
9914       break;
9915     case 1:
9916       switch (d->vmode)
9917         {
9918         case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9919         case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
9920         case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
9921         case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
9922         case V4SImode: gen = gen_aarch64_rev64v4si;  break;
9923         case V2SImode: gen = gen_aarch64_rev64v2si;  break;
9924         case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
9925         case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
9926         default:
9927           return false;
9928         }
9929       break;
9930     default:
9931       return false;
9932     }
9933
9934   for (i = 0; i < nelt ; i += diff + 1)
9935     for (j = 0; j <= diff; j += 1)
9936       {
9937         /* This is guaranteed to be true as the value of diff
9938            is 7, 3, 1 and we should have enough elements in the
9939            queue to generate this.  Getting a vector mask with a
9940            value of diff other than these values implies that
9941            something is wrong by the time we get here.  */
9942         gcc_assert (i + j < nelt);
9943         if (d->perm[i + j] != i + diff - j)
9944           return false;
9945       }
9946
9947   /* Success! */
9948   if (d->testing_p)
9949     return true;
9950
9951   emit_insn (gen (d->target, d->op0));
9952   return true;
9953 }
9954
9955 static bool
9956 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9957 {
9958   rtx (*gen) (rtx, rtx, rtx);
9959   rtx out = d->target;
9960   rtx in0;
9961   machine_mode vmode = d->vmode;
9962   unsigned int i, elt, nelt = d->nelt;
9963   rtx lane;
9964
9965   elt = d->perm[0];
9966   for (i = 1; i < nelt; i++)
9967     {
9968       if (elt != d->perm[i])
9969         return false;
9970     }
9971
9972   /* The generic preparation in aarch64_expand_vec_perm_const_1
9973      swaps the operand order and the permute indices if it finds
9974      d->perm[0] to be in the second operand.  Thus, we can always
9975      use d->op0 and need not do any extra arithmetic to get the
9976      correct lane number.  */
9977   in0 = d->op0;
9978   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
9979
9980   switch (vmode)
9981     {
9982     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9983     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9984     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9985     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9986     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9987     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9988     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9989     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9990     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9991     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9992     default:
9993       return false;
9994     }
9995
9996   emit_insn (gen (out, in0, lane));
9997   return true;
9998 }
9999
10000 static bool
10001 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
10002 {
10003   rtx rperm[MAX_VECT_LEN], sel;
10004   machine_mode vmode = d->vmode;
10005   unsigned int i, nelt = d->nelt;
10006
10007   if (d->testing_p)
10008     return true;
10009
10010   /* Generic code will try constant permutation twice.  Once with the
10011      original mode and again with the elements lowered to QImode.
10012      So wait and don't do the selector expansion ourselves.  */
10013   if (vmode != V8QImode && vmode != V16QImode)
10014     return false;
10015
10016   for (i = 0; i < nelt; ++i)
10017     {
10018       int nunits = GET_MODE_NUNITS (vmode);
10019
10020       /* If big-endian and two vectors we end up with a weird mixed-endian
10021          mode on NEON.  Reverse the index within each word but not the word
10022          itself.  */
10023       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
10024                                            : d->perm[i]);
10025     }
10026   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
10027   sel = force_reg (vmode, sel);
10028
10029   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
10030   return true;
10031 }
10032
10033 static bool
10034 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10035 {
10036   /* The pattern matching functions above are written to look for a small
10037      number to begin the sequence (0, 1, N/2).  If we begin with an index
10038      from the second operand, we can swap the operands.  */
10039   if (d->perm[0] >= d->nelt)
10040     {
10041       unsigned i, nelt = d->nelt;
10042       rtx x;
10043
10044       gcc_assert (nelt == (nelt & -nelt));
10045       for (i = 0; i < nelt; ++i)
10046         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
10047
10048       x = d->op0;
10049       d->op0 = d->op1;
10050       d->op1 = x;
10051     }
10052
10053   if (TARGET_SIMD)
10054     {
10055       if (aarch64_evpc_rev (d))
10056         return true;
10057       else if (aarch64_evpc_ext (d))
10058         return true;
10059       else if (aarch64_evpc_dup (d))
10060         return true;
10061       else if (aarch64_evpc_zip (d))
10062         return true;
10063       else if (aarch64_evpc_uzp (d))
10064         return true;
10065       else if (aarch64_evpc_trn (d))
10066         return true;
10067       return aarch64_evpc_tbl (d);
10068     }
10069   return false;
10070 }
10071
10072 /* Expand a vec_perm_const pattern.  */
10073
10074 bool
10075 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10076 {
10077   struct expand_vec_perm_d d;
10078   int i, nelt, which;
10079
10080   d.target = target;
10081   d.op0 = op0;
10082   d.op1 = op1;
10083
10084   d.vmode = GET_MODE (target);
10085   gcc_assert (VECTOR_MODE_P (d.vmode));
10086   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10087   d.testing_p = false;
10088
10089   for (i = which = 0; i < nelt; ++i)
10090     {
10091       rtx e = XVECEXP (sel, 0, i);
10092       int ei = INTVAL (e) & (2 * nelt - 1);
10093       which |= (ei < nelt ? 1 : 2);
10094       d.perm[i] = ei;
10095     }
10096
10097   switch (which)
10098     {
10099     default:
10100       gcc_unreachable ();
10101
10102     case 3:
10103       d.one_vector_p = false;
10104       if (!rtx_equal_p (op0, op1))
10105         break;
10106
10107       /* The elements of PERM do not suggest that only the first operand
10108          is used, but both operands are identical.  Allow easier matching
10109          of the permutation by folding the permutation into the single
10110          input vector.  */
10111       /* Fall Through.  */
10112     case 2:
10113       for (i = 0; i < nelt; ++i)
10114         d.perm[i] &= nelt - 1;
10115       d.op0 = op1;
10116       d.one_vector_p = true;
10117       break;
10118
10119     case 1:
10120       d.op1 = op0;
10121       d.one_vector_p = true;
10122       break;
10123     }
10124
10125   return aarch64_expand_vec_perm_const_1 (&d);
10126 }
10127
10128 static bool
10129 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
10130                                      const unsigned char *sel)
10131 {
10132   struct expand_vec_perm_d d;
10133   unsigned int i, nelt, which;
10134   bool ret;
10135
10136   d.vmode = vmode;
10137   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10138   d.testing_p = true;
10139   memcpy (d.perm, sel, nelt);
10140
10141   /* Calculate whether all elements are in one vector.  */
10142   for (i = which = 0; i < nelt; ++i)
10143     {
10144       unsigned char e = d.perm[i];
10145       gcc_assert (e < 2 * nelt);
10146       which |= (e < nelt ? 1 : 2);
10147     }
10148
10149   /* If all elements are from the second vector, reindex as if from the
10150      first vector.  */
10151   if (which == 2)
10152     for (i = 0; i < nelt; ++i)
10153       d.perm[i] -= nelt;
10154
10155   /* Check whether the mask can be applied to a single vector.  */
10156   d.one_vector_p = (which != 3);
10157
10158   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10159   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10160   if (!d.one_vector_p)
10161     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10162
10163   start_sequence ();
10164   ret = aarch64_expand_vec_perm_const_1 (&d);
10165   end_sequence ();
10166
10167   return ret;
10168 }
10169
10170 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
10171 bool
10172 aarch64_cannot_change_mode_class (machine_mode from,
10173                                   machine_mode to,
10174                                   enum reg_class rclass)
10175 {
10176   /* Full-reg subregs are allowed on general regs or any class if they are
10177      the same size.  */
10178   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10179       || !reg_classes_intersect_p (FP_REGS, rclass))
10180     return false;
10181
10182   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
10183      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10184      2. Scalar to Scalar for integer modes or same size float modes.
10185      3. Vector to Vector modes.
10186      4. On little-endian only, Vector-Structure to Vector modes.  */
10187   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
10188     {
10189       if (aarch64_vector_mode_supported_p (from)
10190           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
10191         return false;
10192
10193       if (GET_MODE_NUNITS (from) == 1
10194           && GET_MODE_NUNITS (to) == 1
10195           && (GET_MODE_CLASS (from) == MODE_INT
10196               || from == to))
10197         return false;
10198
10199       if (aarch64_vector_mode_supported_p (from)
10200           && aarch64_vector_mode_supported_p (to))
10201         return false;
10202
10203       /* Within an vector structure straddling multiple vector registers
10204          we are in a mixed-endian representation.  As such, we can't
10205          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
10206          switch between vectors and vector structures cheaply.  */
10207       if (!BYTES_BIG_ENDIAN)
10208         if ((aarch64_vector_mode_supported_p (from)
10209               && aarch64_vect_struct_mode_p (to))
10210             || (aarch64_vector_mode_supported_p (to)
10211               && aarch64_vect_struct_mode_p (from)))
10212           return false;
10213     }
10214
10215   return true;
10216 }
10217
10218 /* Implement MODES_TIEABLE_P.  */
10219
10220 bool
10221 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10222 {
10223   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10224     return true;
10225
10226   /* We specifically want to allow elements of "structure" modes to
10227      be tieable to the structure.  This more general condition allows
10228      other rarer situations too.  */
10229   if (TARGET_SIMD
10230       && aarch64_vector_mode_p (mode1)
10231       && aarch64_vector_mode_p (mode2))
10232     return true;
10233
10234   return false;
10235 }
10236
10237 /* Return a new RTX holding the result of moving POINTER forward by
10238    AMOUNT bytes.  */
10239
10240 static rtx
10241 aarch64_move_pointer (rtx pointer, int amount)
10242 {
10243   rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10244
10245   return adjust_automodify_address (pointer, GET_MODE (pointer),
10246                                     next, amount);
10247 }
10248
10249 /* Return a new RTX holding the result of moving POINTER forward by the
10250    size of the mode it points to.  */
10251
10252 static rtx
10253 aarch64_progress_pointer (rtx pointer)
10254 {
10255   HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10256
10257   return aarch64_move_pointer (pointer, amount);
10258 }
10259
10260 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10261    MODE bytes.  */
10262
10263 static void
10264 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
10265                                               machine_mode mode)
10266 {
10267   rtx reg = gen_reg_rtx (mode);
10268
10269   /* "Cast" the pointers to the correct mode.  */
10270   *src = adjust_address (*src, mode, 0);
10271   *dst = adjust_address (*dst, mode, 0);
10272   /* Emit the memcpy.  */
10273   emit_move_insn (reg, *src);
10274   emit_move_insn (*dst, reg);
10275   /* Move the pointers forward.  */
10276   *src = aarch64_progress_pointer (*src);
10277   *dst = aarch64_progress_pointer (*dst);
10278 }
10279
10280 /* Expand movmem, as if from a __builtin_memcpy.  Return true if
10281    we succeed, otherwise return false.  */
10282
10283 bool
10284 aarch64_expand_movmem (rtx *operands)
10285 {
10286   unsigned int n;
10287   rtx dst = operands[0];
10288   rtx src = operands[1];
10289   rtx base;
10290   bool speed_p = !optimize_function_for_size_p (cfun);
10291
10292   /* When optimizing for size, give a better estimate of the length of a
10293      memcpy call, but use the default otherwise.  */
10294   unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10295
10296   /* We can't do anything smart if the amount to copy is not constant.  */
10297   if (!CONST_INT_P (operands[2]))
10298     return false;
10299
10300   n = UINTVAL (operands[2]);
10301
10302   /* Try to keep the number of instructions low.  For cases below 16 bytes we
10303      need to make at most two moves.  For cases above 16 bytes it will be one
10304      move for each 16 byte chunk, then at most two additional moves.  */
10305   if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10306     return false;
10307
10308   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10309   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10310
10311   base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10312   src = adjust_automodify_address (src, VOIDmode, base, 0);
10313
10314   /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10315      1-byte chunk.  */
10316   if (n < 4)
10317     {
10318       if (n >= 2)
10319         {
10320           aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10321           n -= 2;
10322         }
10323
10324       if (n == 1)
10325         aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10326
10327       return true;
10328     }
10329
10330   /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
10331      4-byte chunk, partially overlapping with the previously copied chunk.  */
10332   if (n < 8)
10333     {
10334       aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10335       n -= 4;
10336       if (n > 0)
10337         {
10338           int move = n - 4;
10339
10340           src = aarch64_move_pointer (src, move);
10341           dst = aarch64_move_pointer (dst, move);
10342           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10343         }
10344       return true;
10345     }
10346
10347   /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
10348      them, then (if applicable) an 8-byte chunk.  */
10349   while (n >= 8)
10350     {
10351       if (n / 16)
10352         {
10353           aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10354           n -= 16;
10355         }
10356       else
10357         {
10358           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10359           n -= 8;
10360         }
10361     }
10362
10363   /* Finish the final bytes of the copy.  We can always do this in one
10364      instruction.  We either copy the exact amount we need, or partially
10365      overlap with the previous chunk we copied and copy 8-bytes.  */
10366   if (n == 0)
10367     return true;
10368   else if (n == 1)
10369     aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10370   else if (n == 2)
10371     aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10372   else if (n == 4)
10373     aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10374   else
10375     {
10376       if (n == 3)
10377         {
10378           src = aarch64_move_pointer (src, -1);
10379           dst = aarch64_move_pointer (dst, -1);
10380           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10381         }
10382       else
10383         {
10384           int move = n - 8;
10385
10386           src = aarch64_move_pointer (src, move);
10387           dst = aarch64_move_pointer (dst, move);
10388           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10389         }
10390     }
10391
10392   return true;
10393 }
10394
10395 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
10396
10397 static unsigned HOST_WIDE_INT
10398 aarch64_asan_shadow_offset (void)
10399 {
10400   return (HOST_WIDE_INT_1 << 36);
10401 }
10402
10403 static bool
10404 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10405                                         unsigned int align,
10406                                         enum by_pieces_operation op,
10407                                         bool speed_p)
10408 {
10409   /* STORE_BY_PIECES can be used when copying a constant string, but
10410      in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10411      For now we always fail this and let the move_by_pieces code copy
10412      the string from read-only memory.  */
10413   if (op == STORE_BY_PIECES)
10414     return false;
10415
10416   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10417 }
10418
10419 static enum machine_mode
10420 aarch64_code_to_ccmode (enum rtx_code code)
10421 {
10422   switch (code)
10423     {
10424     case NE:
10425       return CC_DNEmode;
10426
10427     case EQ:
10428       return CC_DEQmode;
10429
10430     case LE:
10431       return CC_DLEmode;
10432
10433     case LT:
10434       return CC_DLTmode;
10435
10436     case GE:
10437       return CC_DGEmode;
10438
10439     case GT:
10440       return CC_DGTmode;
10441
10442     case LEU:
10443       return CC_DLEUmode;
10444
10445     case LTU:
10446       return CC_DLTUmode;
10447
10448     case GEU:
10449       return CC_DGEUmode;
10450
10451     case GTU:
10452       return CC_DGTUmode;
10453
10454     default:
10455       return CCmode;
10456     }
10457 }
10458
10459 static rtx
10460 aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10461                         int code, tree treeop0, tree treeop1)
10462 {
10463   enum machine_mode op_mode, cmp_mode, cc_mode;
10464   rtx op0, op1, cmp, target;
10465   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10466   enum insn_code icode;
10467   struct expand_operand ops[4];
10468
10469   cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10470   if (cc_mode == CCmode)
10471     return NULL_RTX;
10472
10473   start_sequence ();
10474   expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10475
10476   op_mode = GET_MODE (op0);
10477   if (op_mode == VOIDmode)
10478     op_mode = GET_MODE (op1);
10479
10480   switch (op_mode)
10481     {
10482     case QImode:
10483     case HImode:
10484     case SImode:
10485       cmp_mode = SImode;
10486       icode = CODE_FOR_cmpsi;
10487       break;
10488
10489     case DImode:
10490       cmp_mode = DImode;
10491       icode = CODE_FOR_cmpdi;
10492       break;
10493
10494     default:
10495       end_sequence ();
10496       return NULL_RTX;
10497     }
10498
10499   op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10500   op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10501   if (!op0 || !op1)
10502     {
10503       end_sequence ();
10504       return NULL_RTX;
10505     }
10506   *prep_seq = get_insns ();
10507   end_sequence ();
10508
10509   cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10510   target = gen_rtx_REG (CCmode, CC_REGNUM);
10511
10512   create_output_operand (&ops[0], target, CCmode);
10513   create_fixed_operand (&ops[1], cmp);
10514   create_fixed_operand (&ops[2], op0);
10515   create_fixed_operand (&ops[3], op1);
10516
10517   start_sequence ();
10518   if (!maybe_expand_insn (icode, 4, ops))
10519     {
10520       end_sequence ();
10521       return NULL_RTX;
10522     }
10523   *gen_seq = get_insns ();
10524   end_sequence ();
10525
10526   return gen_rtx_REG (cc_mode, CC_REGNUM);
10527 }
10528
10529 static rtx
10530 aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10531                        tree treeop0, tree treeop1, int bit_code)
10532 {
10533   rtx op0, op1, cmp0, cmp1, target;
10534   enum machine_mode op_mode, cmp_mode, cc_mode;
10535   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10536   enum insn_code icode = CODE_FOR_ccmp_andsi;
10537   struct expand_operand ops[6];
10538
10539   cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10540   if (cc_mode == CCmode)
10541     return NULL_RTX;
10542
10543   push_to_sequence ((rtx_insn*) *prep_seq);
10544   expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10545
10546   op_mode = GET_MODE (op0);
10547   if (op_mode == VOIDmode)
10548     op_mode = GET_MODE (op1);
10549
10550   switch (op_mode)
10551     {
10552     case QImode:
10553     case HImode:
10554     case SImode:
10555       cmp_mode = SImode;
10556       icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10557                                                 : CODE_FOR_ccmp_iorsi;
10558       break;
10559
10560     case DImode:
10561       cmp_mode = DImode;
10562       icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10563                                                 : CODE_FOR_ccmp_iordi;
10564       break;
10565
10566     default:
10567       end_sequence ();
10568       return NULL_RTX;
10569     }
10570
10571   op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10572   op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10573   if (!op0 || !op1)
10574     {
10575       end_sequence ();
10576       return NULL_RTX;
10577     }
10578   *prep_seq = get_insns ();
10579   end_sequence ();
10580
10581   target = gen_rtx_REG (cc_mode, CC_REGNUM);
10582   cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10583   cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10584
10585   create_fixed_operand (&ops[0], prev);
10586   create_fixed_operand (&ops[1], target);
10587   create_fixed_operand (&ops[2], op0);
10588   create_fixed_operand (&ops[3], op1);
10589   create_fixed_operand (&ops[4], cmp0);
10590   create_fixed_operand (&ops[5], cmp1);
10591
10592   push_to_sequence ((rtx_insn*) *gen_seq);
10593   if (!maybe_expand_insn (icode, 6, ops))
10594     {
10595       end_sequence ();
10596       return NULL_RTX;
10597     }
10598
10599   *gen_seq = get_insns ();
10600   end_sequence ();
10601
10602   return target;
10603 }
10604
10605 #undef TARGET_GEN_CCMP_FIRST
10606 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10607
10608 #undef TARGET_GEN_CCMP_NEXT
10609 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10610
10611 /* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
10612    instruction fusion of some sort.  */
10613
10614 static bool
10615 aarch64_macro_fusion_p (void)
10616 {
10617   return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10618 }
10619
10620
10621 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
10622    should be kept together during scheduling.  */
10623
10624 static bool
10625 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10626 {
10627   rtx set_dest;
10628   rtx prev_set = single_set (prev);
10629   rtx curr_set = single_set (curr);
10630   /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
10631   bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10632
10633   if (!aarch64_macro_fusion_p ())
10634     return false;
10635
10636   if (simple_sets_p
10637       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10638     {
10639       /* We are trying to match:
10640          prev (mov)  == (set (reg r0) (const_int imm16))
10641          curr (movk) == (set (zero_extract (reg r0)
10642                                            (const_int 16)
10643                                            (const_int 16))
10644                              (const_int imm16_1))  */
10645
10646       set_dest = SET_DEST (curr_set);
10647
10648       if (GET_CODE (set_dest) == ZERO_EXTRACT
10649           && CONST_INT_P (SET_SRC (curr_set))
10650           && CONST_INT_P (SET_SRC (prev_set))
10651           && CONST_INT_P (XEXP (set_dest, 2))
10652           && INTVAL (XEXP (set_dest, 2)) == 16
10653           && REG_P (XEXP (set_dest, 0))
10654           && REG_P (SET_DEST (prev_set))
10655           && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10656         {
10657           return true;
10658         }
10659     }
10660
10661   if (simple_sets_p
10662       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10663     {
10664
10665       /*  We're trying to match:
10666           prev (adrp) == (set (reg r1)
10667                               (high (symbol_ref ("SYM"))))
10668           curr (add) == (set (reg r0)
10669                              (lo_sum (reg r1)
10670                                      (symbol_ref ("SYM"))))
10671           Note that r0 need not necessarily be the same as r1, especially
10672           during pre-regalloc scheduling.  */
10673
10674       if (satisfies_constraint_Ush (SET_SRC (prev_set))
10675           && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10676         {
10677           if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10678               && REG_P (XEXP (SET_SRC (curr_set), 0))
10679               && REGNO (XEXP (SET_SRC (curr_set), 0))
10680                  == REGNO (SET_DEST (prev_set))
10681               && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10682                               XEXP (SET_SRC (curr_set), 1)))
10683             return true;
10684         }
10685     }
10686
10687   if (simple_sets_p
10688       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10689     {
10690
10691       /* We're trying to match:
10692          prev (movk) == (set (zero_extract (reg r0)
10693                                            (const_int 16)
10694                                            (const_int 32))
10695                              (const_int imm16_1))
10696          curr (movk) == (set (zero_extract (reg r0)
10697                                            (const_int 16)
10698                                            (const_int 48))
10699                              (const_int imm16_2))  */
10700
10701       if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10702           && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10703           && REG_P (XEXP (SET_DEST (prev_set), 0))
10704           && REG_P (XEXP (SET_DEST (curr_set), 0))
10705           && REGNO (XEXP (SET_DEST (prev_set), 0))
10706              == REGNO (XEXP (SET_DEST (curr_set), 0))
10707           && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10708           && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10709           && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10710           && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10711           && CONST_INT_P (SET_SRC (prev_set))
10712           && CONST_INT_P (SET_SRC (curr_set)))
10713         return true;
10714
10715     }
10716   if (simple_sets_p
10717       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10718     {
10719       /* We're trying to match:
10720           prev (adrp) == (set (reg r0)
10721                               (high (symbol_ref ("SYM"))))
10722           curr (ldr) == (set (reg r1)
10723                              (mem (lo_sum (reg r0)
10724                                              (symbol_ref ("SYM")))))
10725                  or
10726           curr (ldr) == (set (reg r1)
10727                              (zero_extend (mem
10728                                            (lo_sum (reg r0)
10729                                                    (symbol_ref ("SYM"))))))  */
10730       if (satisfies_constraint_Ush (SET_SRC (prev_set))
10731           && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10732         {
10733           rtx curr_src = SET_SRC (curr_set);
10734
10735           if (GET_CODE (curr_src) == ZERO_EXTEND)
10736             curr_src = XEXP (curr_src, 0);
10737
10738           if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10739               && REG_P (XEXP (XEXP (curr_src, 0), 0))
10740               && REGNO (XEXP (XEXP (curr_src, 0), 0))
10741                  == REGNO (SET_DEST (prev_set))
10742               && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10743                               XEXP (SET_SRC (prev_set), 0)))
10744               return true;
10745         }
10746     }
10747
10748   if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10749       && any_condjump_p (curr))
10750     {
10751       enum attr_type prev_type = get_attr_type (prev);
10752
10753       /* FIXME: this misses some which is considered simple arthematic
10754          instructions for ThunderX.  Simple shifts are missed here.  */
10755       if (prev_type == TYPE_ALUS_SREG
10756           || prev_type == TYPE_ALUS_IMM
10757           || prev_type == TYPE_LOGICS_REG
10758           || prev_type == TYPE_LOGICS_IMM)
10759         return true;
10760     }
10761
10762   return false;
10763 }
10764
10765 /* If MEM is in the form of [base+offset], extract the two parts
10766    of address and set to BASE and OFFSET, otherwise return false
10767    after clearing BASE and OFFSET.  */
10768
10769 bool
10770 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10771 {
10772   rtx addr;
10773
10774   gcc_assert (MEM_P (mem));
10775
10776   addr = XEXP (mem, 0);
10777
10778   if (REG_P (addr))
10779     {
10780       *base = addr;
10781       *offset = const0_rtx;
10782       return true;
10783     }
10784
10785   if (GET_CODE (addr) == PLUS
10786       && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10787     {
10788       *base = XEXP (addr, 0);
10789       *offset = XEXP (addr, 1);
10790       return true;
10791     }
10792
10793   *base = NULL_RTX;
10794   *offset = NULL_RTX;
10795
10796   return false;
10797 }
10798
10799 /* Types for scheduling fusion.  */
10800 enum sched_fusion_type
10801 {
10802   SCHED_FUSION_NONE = 0,
10803   SCHED_FUSION_LD_SIGN_EXTEND,
10804   SCHED_FUSION_LD_ZERO_EXTEND,
10805   SCHED_FUSION_LD,
10806   SCHED_FUSION_ST,
10807   SCHED_FUSION_NUM
10808 };
10809
10810 /* If INSN is a load or store of address in the form of [base+offset],
10811    extract the two parts and set to BASE and OFFSET.  Return scheduling
10812    fusion type this INSN is.  */
10813
10814 static enum sched_fusion_type
10815 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10816 {
10817   rtx x, dest, src;
10818   enum sched_fusion_type fusion = SCHED_FUSION_LD;
10819
10820   gcc_assert (INSN_P (insn));
10821   x = PATTERN (insn);
10822   if (GET_CODE (x) != SET)
10823     return SCHED_FUSION_NONE;
10824
10825   src = SET_SRC (x);
10826   dest = SET_DEST (x);
10827
10828   if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10829       && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
10830     return SCHED_FUSION_NONE;
10831
10832   if (GET_CODE (src) == SIGN_EXTEND)
10833     {
10834       fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10835       src = XEXP (src, 0);
10836       if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10837         return SCHED_FUSION_NONE;
10838     }
10839   else if (GET_CODE (src) == ZERO_EXTEND)
10840     {
10841       fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10842       src = XEXP (src, 0);
10843       if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10844         return SCHED_FUSION_NONE;
10845     }
10846
10847   if (GET_CODE (src) == MEM && REG_P (dest))
10848     extract_base_offset_in_addr (src, base, offset);
10849   else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10850     {
10851       fusion = SCHED_FUSION_ST;
10852       extract_base_offset_in_addr (dest, base, offset);
10853     }
10854   else
10855     return SCHED_FUSION_NONE;
10856
10857   if (*base == NULL_RTX || *offset == NULL_RTX)
10858     fusion = SCHED_FUSION_NONE;
10859
10860   return fusion;
10861 }
10862
10863 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10864
10865    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10866    and PRI are only calculated for these instructions.  For other instruction,
10867    FUSION_PRI and PRI are simply set to MAX_PRI - 1.  In the future, other
10868    type instruction fusion can be added by returning different priorities.
10869
10870    It's important that irrelevant instructions get the largest FUSION_PRI.  */
10871
10872 static void
10873 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10874                                int *fusion_pri, int *pri)
10875 {
10876   int tmp, off_val;
10877   rtx base, offset;
10878   enum sched_fusion_type fusion;
10879
10880   gcc_assert (INSN_P (insn));
10881
10882   tmp = max_pri - 1;
10883   fusion = fusion_load_store (insn, &base, &offset);
10884   if (fusion == SCHED_FUSION_NONE)
10885     {
10886       *pri = tmp;
10887       *fusion_pri = tmp;
10888       return;
10889     }
10890
10891   /* Set FUSION_PRI according to fusion type and base register.  */
10892   *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10893
10894   /* Calculate PRI.  */
10895   tmp /= 2;
10896
10897   /* INSN with smaller offset goes first.  */
10898   off_val = (int)(INTVAL (offset));
10899   if (off_val >= 0)
10900     tmp -= (off_val & 0xfffff);
10901   else
10902     tmp += ((- off_val) & 0xfffff);
10903
10904   *pri = tmp;
10905   return;
10906 }
10907
10908 /* Given OPERANDS of consecutive load/store, check if we can merge
10909    them into ldp/stp.  LOAD is true if they are load instructions.
10910    MODE is the mode of memory operands.  */
10911
10912 bool
10913 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10914                                 enum machine_mode mode)
10915 {
10916   HOST_WIDE_INT offval_1, offval_2, msize;
10917   enum reg_class rclass_1, rclass_2;
10918   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10919
10920   if (load)
10921     {
10922       mem_1 = operands[1];
10923       mem_2 = operands[3];
10924       reg_1 = operands[0];
10925       reg_2 = operands[2];
10926       gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10927       if (REGNO (reg_1) == REGNO (reg_2))
10928         return false;
10929     }
10930   else
10931     {
10932       mem_1 = operands[0];
10933       mem_2 = operands[2];
10934       reg_1 = operands[1];
10935       reg_2 = operands[3];
10936     }
10937
10938   /* The mems cannot be volatile.  */
10939   if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10940     return false;
10941
10942   /* Check if the addresses are in the form of [base+offset].  */
10943   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10944   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10945     return false;
10946   extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10947   if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10948     return false;
10949
10950   /* Check if the bases are same.  */
10951   if (!rtx_equal_p (base_1, base_2))
10952     return false;
10953
10954   offval_1 = INTVAL (offset_1);
10955   offval_2 = INTVAL (offset_2);
10956   msize = GET_MODE_SIZE (mode);
10957   /* Check if the offsets are consecutive.  */
10958   if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10959     return false;
10960
10961   /* Check if the addresses are clobbered by load.  */
10962   if (load)
10963     {
10964       if (reg_mentioned_p (reg_1, mem_1))
10965         return false;
10966
10967       /* In increasing order, the last load can clobber the address.  */
10968       if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10969       return false;
10970     }
10971
10972   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10973     rclass_1 = FP_REGS;
10974   else
10975     rclass_1 = GENERAL_REGS;
10976
10977   if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10978     rclass_2 = FP_REGS;
10979   else
10980     rclass_2 = GENERAL_REGS;
10981
10982   /* Check if the registers are of same class.  */
10983   if (rclass_1 != rclass_2)
10984     return false;
10985
10986   return true;
10987 }
10988
10989 /* Given OPERANDS of consecutive load/store, check if we can merge
10990    them into ldp/stp by adjusting the offset.  LOAD is true if they
10991    are load instructions.  MODE is the mode of memory operands.
10992
10993    Given below consecutive stores:
10994
10995      str  w1, [xb, 0x100]
10996      str  w1, [xb, 0x104]
10997      str  w1, [xb, 0x108]
10998      str  w1, [xb, 0x10c]
10999
11000    Though the offsets are out of the range supported by stp, we can
11001    still pair them after adjusting the offset, like:
11002
11003      add  scratch, xb, 0x100
11004      stp  w1, w1, [scratch]
11005      stp  w1, w1, [scratch, 0x8]
11006
11007    The peephole patterns detecting this opportunity should guarantee
11008    the scratch register is avaliable.  */
11009
11010 bool
11011 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
11012                                        enum machine_mode mode)
11013 {
11014   enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
11015   HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
11016   rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
11017   rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
11018
11019   if (load)
11020     {
11021       reg_1 = operands[0];
11022       mem_1 = operands[1];
11023       reg_2 = operands[2];
11024       mem_2 = operands[3];
11025       reg_3 = operands[4];
11026       mem_3 = operands[5];
11027       reg_4 = operands[6];
11028       mem_4 = operands[7];
11029       gcc_assert (REG_P (reg_1) && REG_P (reg_2)
11030                   && REG_P (reg_3) && REG_P (reg_4));
11031       if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
11032         return false;
11033     }
11034   else
11035     {
11036       mem_1 = operands[0];
11037       reg_1 = operands[1];
11038       mem_2 = operands[2];
11039       reg_2 = operands[3];
11040       mem_3 = operands[4];
11041       reg_3 = operands[5];
11042       mem_4 = operands[6];
11043       reg_4 = operands[7];
11044     }
11045   /* Skip if memory operand is by itslef valid for ldp/stp.  */
11046   if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11047     return false;
11048
11049   /* The mems cannot be volatile.  */
11050   if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11051       || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11052     return false;
11053
11054   /* Check if the addresses are in the form of [base+offset].  */
11055   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11056   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11057     return false;
11058   extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11059   if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11060     return false;
11061   extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11062   if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11063     return false;
11064   extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11065   if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11066     return false;
11067
11068   /* Check if the bases are same.  */
11069   if (!rtx_equal_p (base_1, base_2)
11070       || !rtx_equal_p (base_2, base_3)
11071       || !rtx_equal_p (base_3, base_4))
11072     return false;
11073
11074   offval_1 = INTVAL (offset_1);
11075   offval_2 = INTVAL (offset_2);
11076   offval_3 = INTVAL (offset_3);
11077   offval_4 = INTVAL (offset_4);
11078   msize = GET_MODE_SIZE (mode);
11079   /* Check if the offsets are consecutive.  */
11080   if ((offval_1 != (offval_2 + msize)
11081        || offval_1 != (offval_3 + msize * 2)
11082        || offval_1 != (offval_4 + msize * 3))
11083       && (offval_4 != (offval_3 + msize)
11084           || offval_4 != (offval_2 + msize * 2)
11085           || offval_4 != (offval_1 + msize * 3)))
11086     return false;
11087
11088   /* Check if the addresses are clobbered by load.  */
11089   if (load)
11090     {
11091       if (reg_mentioned_p (reg_1, mem_1)
11092           || reg_mentioned_p (reg_2, mem_2)
11093           || reg_mentioned_p (reg_3, mem_3))
11094         return false;
11095
11096       /* In increasing order, the last load can clobber the address.  */
11097       if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11098         return false;
11099     }
11100
11101   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11102     rclass_1 = FP_REGS;
11103   else
11104     rclass_1 = GENERAL_REGS;
11105
11106   if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11107     rclass_2 = FP_REGS;
11108   else
11109     rclass_2 = GENERAL_REGS;
11110
11111   if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11112     rclass_3 = FP_REGS;
11113   else
11114     rclass_3 = GENERAL_REGS;
11115
11116   if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11117     rclass_4 = FP_REGS;
11118   else
11119     rclass_4 = GENERAL_REGS;
11120
11121   /* Check if the registers are of same class.  */
11122   if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11123     return false;
11124
11125   return true;
11126 }
11127
11128 /* Given OPERANDS of consecutive load/store, this function pairs them
11129    into ldp/stp after adjusting the offset.  It depends on the fact
11130    that addresses of load/store instructions are in increasing order.
11131    MODE is the mode of memory operands.  CODE is the rtl operator
11132    which should be applied to all memory operands, it's SIGN_EXTEND,
11133    ZERO_EXTEND or UNKNOWN.  */
11134
11135 bool
11136 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11137                              enum machine_mode mode, RTX_CODE code)
11138 {
11139   rtx base, offset, t1, t2;
11140   rtx mem_1, mem_2, mem_3, mem_4;
11141   HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11142
11143   if (load)
11144     {
11145       mem_1 = operands[1];
11146       mem_2 = operands[3];
11147       mem_3 = operands[5];
11148       mem_4 = operands[7];
11149     }
11150   else
11151     {
11152       mem_1 = operands[0];
11153       mem_2 = operands[2];
11154       mem_3 = operands[4];
11155       mem_4 = operands[6];
11156       gcc_assert (code == UNKNOWN);
11157     }
11158
11159   extract_base_offset_in_addr (mem_1, &base, &offset);
11160   gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11161
11162   /* Adjust offset thus it can fit in ldp/stp instruction.  */
11163   msize = GET_MODE_SIZE (mode);
11164   stp_off_limit = msize * 0x40;
11165   off_val = INTVAL (offset);
11166   abs_off = (off_val < 0) ? -off_val : off_val;
11167   new_off = abs_off % stp_off_limit;
11168   adj_off = abs_off - new_off;
11169
11170   /* Further adjust to make sure all offsets are OK.  */
11171   if ((new_off + msize * 2) >= stp_off_limit)
11172     {
11173       adj_off += stp_off_limit;
11174       new_off -= stp_off_limit;
11175     }
11176
11177   /* Make sure the adjustment can be done with ADD/SUB instructions.  */
11178   if (adj_off >= 0x1000)
11179     return false;
11180
11181   if (off_val < 0)
11182     {
11183       adj_off = -adj_off;
11184       new_off = -new_off;
11185     }
11186
11187   /* Create new memory references.  */
11188   mem_1 = change_address (mem_1, VOIDmode,
11189                           plus_constant (DImode, operands[8], new_off));
11190
11191   /* Check if the adjusted address is OK for ldp/stp.  */
11192   if (!aarch64_mem_pair_operand (mem_1, mode))
11193     return false;
11194
11195   msize = GET_MODE_SIZE (mode);
11196   mem_2 = change_address (mem_2, VOIDmode,
11197                           plus_constant (DImode,
11198                                          operands[8],
11199                                          new_off + msize));
11200   mem_3 = change_address (mem_3, VOIDmode,
11201                           plus_constant (DImode,
11202                                          operands[8],
11203                                          new_off + msize * 2));
11204   mem_4 = change_address (mem_4, VOIDmode,
11205                           plus_constant (DImode,
11206                                          operands[8],
11207                                          new_off + msize * 3));
11208
11209   if (code == ZERO_EXTEND)
11210     {
11211       mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11212       mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11213       mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11214       mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11215     }
11216   else if (code == SIGN_EXTEND)
11217     {
11218       mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11219       mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11220       mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11221       mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11222     }
11223
11224   if (load)
11225     {
11226       operands[1] = mem_1;
11227       operands[3] = mem_2;
11228       operands[5] = mem_3;
11229       operands[7] = mem_4;
11230     }
11231   else
11232     {
11233       operands[0] = mem_1;
11234       operands[2] = mem_2;
11235       operands[4] = mem_3;
11236       operands[6] = mem_4;
11237     }
11238
11239   /* Emit adjusting instruction.  */
11240   emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11241                           plus_constant (DImode, base, adj_off)));
11242   /* Emit ldp/stp instructions.  */
11243   t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11244   t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11245   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11246   t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11247   t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11248   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11249   return true;
11250 }
11251
11252 #undef TARGET_ADDRESS_COST
11253 #define TARGET_ADDRESS_COST aarch64_address_cost
11254
11255 /* This hook will determines whether unnamed bitfields affect the alignment
11256    of the containing structure.  The hook returns true if the structure
11257    should inherit the alignment requirements of an unnamed bitfield's
11258    type.  */
11259 #undef TARGET_ALIGN_ANON_BITFIELD
11260 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11261
11262 #undef TARGET_ASM_ALIGNED_DI_OP
11263 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11264
11265 #undef TARGET_ASM_ALIGNED_HI_OP
11266 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11267
11268 #undef TARGET_ASM_ALIGNED_SI_OP
11269 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11270
11271 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11272 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11273   hook_bool_const_tree_hwi_hwi_const_tree_true
11274
11275 #undef TARGET_ASM_FILE_START
11276 #define TARGET_ASM_FILE_START aarch64_start_file
11277
11278 #undef TARGET_ASM_OUTPUT_MI_THUNK
11279 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11280
11281 #undef TARGET_ASM_SELECT_RTX_SECTION
11282 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11283
11284 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11285 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11286
11287 #undef TARGET_BUILD_BUILTIN_VA_LIST
11288 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11289
11290 #undef TARGET_CALLEE_COPIES
11291 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11292
11293 #undef TARGET_CAN_ELIMINATE
11294 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11295
11296 #undef TARGET_CANNOT_FORCE_CONST_MEM
11297 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11298
11299 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11300 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11301
11302 /* Only the least significant bit is used for initialization guard
11303    variables.  */
11304 #undef TARGET_CXX_GUARD_MASK_BIT
11305 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11306
11307 #undef TARGET_C_MODE_FOR_SUFFIX
11308 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11309
11310 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11311 #undef  TARGET_DEFAULT_TARGET_FLAGS
11312 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11313 #endif
11314
11315 #undef TARGET_CLASS_MAX_NREGS
11316 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11317
11318 #undef TARGET_BUILTIN_DECL
11319 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11320
11321 #undef  TARGET_EXPAND_BUILTIN
11322 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11323
11324 #undef TARGET_EXPAND_BUILTIN_VA_START
11325 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11326
11327 #undef TARGET_FOLD_BUILTIN
11328 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11329
11330 #undef TARGET_FUNCTION_ARG
11331 #define TARGET_FUNCTION_ARG aarch64_function_arg
11332
11333 #undef TARGET_FUNCTION_ARG_ADVANCE
11334 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11335
11336 #undef TARGET_FUNCTION_ARG_BOUNDARY
11337 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11338
11339 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11340 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11341
11342 #undef TARGET_FUNCTION_VALUE
11343 #define TARGET_FUNCTION_VALUE aarch64_function_value
11344
11345 #undef TARGET_FUNCTION_VALUE_REGNO_P
11346 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11347
11348 #undef TARGET_FRAME_POINTER_REQUIRED
11349 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11350
11351 #undef TARGET_GIMPLE_FOLD_BUILTIN
11352 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11353
11354 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11355 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11356
11357 #undef  TARGET_INIT_BUILTINS
11358 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
11359
11360 #undef TARGET_LEGITIMATE_ADDRESS_P
11361 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11362
11363 #undef TARGET_LEGITIMATE_CONSTANT_P
11364 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11365
11366 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11367 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11368
11369 #undef TARGET_LRA_P
11370 #define TARGET_LRA_P hook_bool_void_true
11371
11372 #undef TARGET_MANGLE_TYPE
11373 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11374
11375 #undef TARGET_MEMORY_MOVE_COST
11376 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11377
11378 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11379 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11380
11381 #undef TARGET_MUST_PASS_IN_STACK
11382 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11383
11384 /* This target hook should return true if accesses to volatile bitfields
11385    should use the narrowest mode possible.  It should return false if these
11386    accesses should use the bitfield container type.  */
11387 #undef TARGET_NARROW_VOLATILE_BITFIELD
11388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11389
11390 #undef  TARGET_OPTION_OVERRIDE
11391 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11392
11393 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11394 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11395   aarch64_override_options_after_change
11396
11397 #undef TARGET_PASS_BY_REFERENCE
11398 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11399
11400 #undef TARGET_PREFERRED_RELOAD_CLASS
11401 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11402
11403 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11404 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11405
11406 #undef TARGET_SECONDARY_RELOAD
11407 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11408
11409 #undef TARGET_SHIFT_TRUNCATION_MASK
11410 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11411
11412 #undef TARGET_SETUP_INCOMING_VARARGS
11413 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11414
11415 #undef TARGET_STRUCT_VALUE_RTX
11416 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
11417
11418 #undef TARGET_REGISTER_MOVE_COST
11419 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11420
11421 #undef TARGET_RETURN_IN_MEMORY
11422 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11423
11424 #undef TARGET_RETURN_IN_MSB
11425 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11426
11427 #undef TARGET_RTX_COSTS
11428 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11429
11430 #undef TARGET_SCHED_ISSUE_RATE
11431 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11432
11433 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11434 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11435   aarch64_sched_first_cycle_multipass_dfa_lookahead
11436
11437 #undef TARGET_TRAMPOLINE_INIT
11438 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11439
11440 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11441 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11442
11443 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11444 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11445
11446 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11447 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11448
11449 #undef TARGET_VECTORIZE_ADD_STMT_COST
11450 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11451
11452 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11453 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11454   aarch64_builtin_vectorization_cost
11455
11456 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11457 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11458
11459 #undef TARGET_VECTORIZE_BUILTINS
11460 #define TARGET_VECTORIZE_BUILTINS
11461
11462 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11463 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11464   aarch64_builtin_vectorized_function
11465
11466 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11467 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11468   aarch64_autovectorize_vector_sizes
11469
11470 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11471 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11472   aarch64_atomic_assign_expand_fenv
11473
11474 /* Section anchor support.  */
11475
11476 #undef TARGET_MIN_ANCHOR_OFFSET
11477 #define TARGET_MIN_ANCHOR_OFFSET -256
11478
11479 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11480    byte offset; we can do much more for larger data types, but have no way
11481    to determine the size of the access.  We assume accesses are aligned.  */
11482 #undef TARGET_MAX_ANCHOR_OFFSET
11483 #define TARGET_MAX_ANCHOR_OFFSET 4095
11484
11485 #undef TARGET_VECTOR_ALIGNMENT
11486 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11487
11488 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11489 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11490   aarch64_simd_vector_alignment_reachable
11491
11492 /* vec_perm support.  */
11493
11494 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11495 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11496   aarch64_vectorize_vec_perm_const_ok
11497
11498
11499 #undef TARGET_FIXED_CONDITION_CODE_REGS
11500 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11501
11502 #undef TARGET_FLAGS_REGNUM
11503 #define TARGET_FLAGS_REGNUM CC_REGNUM
11504
11505 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11506 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11507
11508 #undef TARGET_ASAN_SHADOW_OFFSET
11509 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11510
11511 #undef TARGET_LEGITIMIZE_ADDRESS
11512 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11513
11514 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11515 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11516   aarch64_use_by_pieces_infrastructure_p
11517
11518 #undef TARGET_CAN_USE_DOLOOP_P
11519 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11520
11521 #undef TARGET_SCHED_MACRO_FUSION_P
11522 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11523
11524 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11525 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11526
11527 #undef TARGET_SCHED_FUSION_PRIORITY
11528 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11529
11530 struct gcc_target targetm = TARGET_INITIALIZER;
11531
11532 #include "gt-aarch64.h"