gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2015 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stringpool.h"
  40 #include "stor-layout.h"
  41 #include "calls.h"
  42 #include "varasm.h"
  43 #include "regs.h"
  44 #include "dominance.h"
  45 #include "cfg.h"
  46 #include "cfgrtl.h"
  47 #include "cfganal.h"
  48 #include "lcm.h"
  49 #include "cfgbuild.h"
  50 #include "cfgcleanup.h"
  51 #include "predict.h"
  52 #include "basic-block.h"
  53 #include "df.h"
  54 #include "hard-reg-set.h"
  55 #include "output.h"
  56 #include "hashtab.h"
  57 #include "function.h"
  58 #include "flags.h"
  59 #include "statistics.h"
  60 #include "real.h"
  61 #include "fixed-value.h"
  62 #include "insn-config.h"
  63 #include "expmed.h"
  64 #include "dojump.h"
  65 #include "explow.h"
  66 #include "emit-rtl.h"
  67 #include "stmt.h"
  68 #include "expr.h"
  69 #include "reload.h"
  70 #include "toplev.h"
  71 #include "target.h"
  72 #include "target-def.h"
  73 #include "targhooks.h"
  74 #include "ggc.h"
  75 #include "tm_p.h"
  76 #include "recog.h"
  77 #include "langhooks.h"
  78 #include "diagnostic-core.h"
  79 #include "hash-table.h"
  80 #include "tree-ssa-alias.h"
  81 #include "internal-fn.h"
  82 #include "gimple-fold.h"
  83 #include "tree-eh.h"
  84 #include "gimple-expr.h"
  85 #include "is-a.h"
  86 #include "gimple.h"
  87 #include "gimplify.h"
  88 #include "optabs.h"
  89 #include "dwarf2.h"
  90 #include "cfgloop.h"
  91 #include "tree-vectorizer.h"
  92 #include "aarch64-cost-tables.h"
  93 #include "dumpfile.h"
  94 #include "builtins.h"
  95 #include "rtl-iter.h"
  96 #include "tm-constrs.h"
  97
  98 /* Defined for convenience.  */
  99 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
 100
 101 /* Classifies an address.
 102
 103    ADDRESS_REG_IMM
 104        A simple base register plus immediate offset.
 105
 106    ADDRESS_REG_WB
 107        A base register indexed by immediate offset with writeback.
 108
 109    ADDRESS_REG_REG
 110        A base register indexed by (optionally scaled) register.
 111
 112    ADDRESS_REG_UXTW
 113        A base register indexed by (optionally scaled) zero-extended register.
 114
 115    ADDRESS_REG_SXTW
 116        A base register indexed by (optionally scaled) sign-extended register.
 117
 118    ADDRESS_LO_SUM
 119        A LO_SUM rtx with a base register and "LO12" symbol relocation.
 120
 121    ADDRESS_SYMBOLIC:
 122        A constant symbolic address, in pc-relative literal pool.  */
 123
 124 enum aarch64_address_type {
 125   ADDRESS_REG_IMM,
 126   ADDRESS_REG_WB,
 127   ADDRESS_REG_REG,
 128   ADDRESS_REG_UXTW,
 129   ADDRESS_REG_SXTW,
 130   ADDRESS_LO_SUM,
 131   ADDRESS_SYMBOLIC
 132 };
 133
 134 struct aarch64_address_info {
 135   enum aarch64_address_type type;
 136   rtx base;
 137   rtx offset;
 138   int shift;
 139   enum aarch64_symbol_type symbol_type;
 140 };
 141
 142 struct simd_immediate_info
 143 {
 144   rtx value;
 145   int shift;
 146   int element_width;
 147   bool mvn;
 148   bool msl;
 149 };
 150
 151 /* The current code model.  */
 152 enum aarch64_code_model aarch64_cmodel;
 153
 154 #ifdef HAVE_AS_TLS
 155 #undef TARGET_HAVE_TLS
 156 #define TARGET_HAVE_TLS 1
 157 #endif
 158
 159 static bool aarch64_composite_type_p (const_tree, machine_mode);
 160 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
 161                                                      const_tree,
 162                                                      machine_mode *, int *,
 163                                                      bool *);
 164 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 165 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 166 static void aarch64_override_options_after_change (void);
 167 static bool aarch64_vector_mode_supported_p (machine_mode);
 168 static unsigned bit_count (unsigned HOST_WIDE_INT);
 169 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 170                                                  const unsigned char *sel);
 171 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
 172
 173 /* Major revision number of the ARM Architecture implemented by the target.  */
 174 unsigned aarch64_architecture_version;
 175
 176 /* The processor for which instructions should be scheduled.  */
 177 enum aarch64_processor aarch64_tune = cortexa53;
 178
 179 /* The current tuning set.  */
 180 const struct tune_params *aarch64_tune_params;
 181
 182 /* Mask to specify which instructions we are allowed to generate.  */
 183 unsigned long aarch64_isa_flags = 0;
 184
 185 /* Mask to specify which instruction scheduling options should be used.  */
 186 unsigned long aarch64_tune_flags = 0;
 187
 188 /* Tuning parameters.  */
 189
 190 #if HAVE_DESIGNATED_INITIALIZERS
 191 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 192 #else
 193 #define NAMED_PARAM(NAME, VAL) (VAL)
 194 #endif
 195
 196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 197 __extension__
 198 #endif
 199
 200 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 201 __extension__
 202 #endif
 203 static const struct cpu_addrcost_table generic_addrcost_table =
 204 {
 205 #if HAVE_DESIGNATED_INITIALIZERS
 206   .addr_scale_costs =
 207 #endif
 208     {
 209       NAMED_PARAM (hi, 0),
 210       NAMED_PARAM (si, 0),
 211       NAMED_PARAM (di, 0),
 212       NAMED_PARAM (ti, 0),
 213     },
 214   NAMED_PARAM (pre_modify, 0),
 215   NAMED_PARAM (post_modify, 0),
 216   NAMED_PARAM (register_offset, 0),
 217   NAMED_PARAM (register_extend, 0),
 218   NAMED_PARAM (imm_offset, 0)
 219 };
 220
 221 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 222 __extension__
 223 #endif
 224 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 225 {
 226 #if HAVE_DESIGNATED_INITIALIZERS
 227   .addr_scale_costs =
 228 #endif
 229     {
 230       NAMED_PARAM (hi, 1),
 231       NAMED_PARAM (si, 0),
 232       NAMED_PARAM (di, 0),
 233       NAMED_PARAM (ti, 1),
 234     },
 235   NAMED_PARAM (pre_modify, 0),
 236   NAMED_PARAM (post_modify, 0),
 237   NAMED_PARAM (register_offset, 0),
 238   NAMED_PARAM (register_extend, 0),
 239   NAMED_PARAM (imm_offset, 0),
 240 };
 241
 242 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 243 __extension__
 244 #endif
 245 static const struct cpu_addrcost_table xgene1_addrcost_table =
 246 {
 247 #if HAVE_DESIGNATED_INITIALIZERS
 248   .addr_scale_costs =
 249 #endif
 250     {
 251       NAMED_PARAM (hi, 1),
 252       NAMED_PARAM (si, 0),
 253       NAMED_PARAM (di, 0),
 254       NAMED_PARAM (ti, 1),
 255     },
 256   NAMED_PARAM (pre_modify, 1),
 257   NAMED_PARAM (post_modify, 0),
 258   NAMED_PARAM (register_offset, 0),
 259   NAMED_PARAM (register_extend, 1),
 260   NAMED_PARAM (imm_offset, 0),
 261 };
 262
 263 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 264 __extension__
 265 #endif
 266 static const struct cpu_regmove_cost generic_regmove_cost =
 267 {
 268   NAMED_PARAM (GP2GP, 1),
 269   /* Avoid the use of slow int<->fp moves for spilling by setting
 270      their cost higher than memmov_cost.  */
 271   NAMED_PARAM (GP2FP, 5),
 272   NAMED_PARAM (FP2GP, 5),
 273   NAMED_PARAM (FP2FP, 2)
 274 };
 275
 276 static const struct cpu_regmove_cost cortexa57_regmove_cost =
 277 {
 278   NAMED_PARAM (GP2GP, 1),
 279   /* Avoid the use of slow int<->fp moves for spilling by setting
 280      their cost higher than memmov_cost.  */
 281   NAMED_PARAM (GP2FP, 5),
 282   NAMED_PARAM (FP2GP, 5),
 283   NAMED_PARAM (FP2FP, 2)
 284 };
 285
 286 static const struct cpu_regmove_cost cortexa53_regmove_cost =
 287 {
 288   NAMED_PARAM (GP2GP, 1),
 289   /* Avoid the use of slow int<->fp moves for spilling by setting
 290      their cost higher than memmov_cost.  */
 291   NAMED_PARAM (GP2FP, 5),
 292   NAMED_PARAM (FP2GP, 5),
 293   NAMED_PARAM (FP2FP, 2)
 294 };
 295
 296 static const struct cpu_regmove_cost thunderx_regmove_cost =
 297 {
 298   NAMED_PARAM (GP2GP, 2),
 299   NAMED_PARAM (GP2FP, 2),
 300   NAMED_PARAM (FP2GP, 6),
 301   NAMED_PARAM (FP2FP, 4)
 302 };
 303
 304 static const struct cpu_regmove_cost xgene1_regmove_cost =
 305 {
 306   NAMED_PARAM (GP2GP, 1),
 307   /* Avoid the use of slow int<->fp moves for spilling by setting
 308      their cost higher than memmov_cost.  */
 309   NAMED_PARAM (GP2FP, 8),
 310   NAMED_PARAM (FP2GP, 8),
 311   NAMED_PARAM (FP2FP, 2)
 312 };
 313
 314 /* Generic costs for vector insn classes.  */
 315 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 316 __extension__
 317 #endif
 318 static const struct cpu_vector_cost generic_vector_cost =
 319 {
 320   NAMED_PARAM (scalar_stmt_cost, 1),
 321   NAMED_PARAM (scalar_load_cost, 1),
 322   NAMED_PARAM (scalar_store_cost, 1),
 323   NAMED_PARAM (vec_stmt_cost, 1),
 324   NAMED_PARAM (vec_to_scalar_cost, 1),
 325   NAMED_PARAM (scalar_to_vec_cost, 1),
 326   NAMED_PARAM (vec_align_load_cost, 1),
 327   NAMED_PARAM (vec_unalign_load_cost, 1),
 328   NAMED_PARAM (vec_unalign_store_cost, 1),
 329   NAMED_PARAM (vec_store_cost, 1),
 330   NAMED_PARAM (cond_taken_branch_cost, 3),
 331   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 332 };
 333
 334 /* Generic costs for vector insn classes.  */
 335 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 336 __extension__
 337 #endif
 338 static const struct cpu_vector_cost cortexa57_vector_cost =
 339 {
 340   NAMED_PARAM (scalar_stmt_cost, 1),
 341   NAMED_PARAM (scalar_load_cost, 4),
 342   NAMED_PARAM (scalar_store_cost, 1),
 343   NAMED_PARAM (vec_stmt_cost, 3),
 344   NAMED_PARAM (vec_to_scalar_cost, 8),
 345   NAMED_PARAM (scalar_to_vec_cost, 8),
 346   NAMED_PARAM (vec_align_load_cost, 5),
 347   NAMED_PARAM (vec_unalign_load_cost, 5),
 348   NAMED_PARAM (vec_unalign_store_cost, 1),
 349   NAMED_PARAM (vec_store_cost, 1),
 350   NAMED_PARAM (cond_taken_branch_cost, 1),
 351   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 352 };
 353
 354 /* Generic costs for vector insn classes.  */
 355 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 356 __extension__
 357 #endif
 358 static const struct cpu_vector_cost xgene1_vector_cost =
 359 {
 360   NAMED_PARAM (scalar_stmt_cost, 1),
 361   NAMED_PARAM (scalar_load_cost, 5),
 362   NAMED_PARAM (scalar_store_cost, 1),
 363   NAMED_PARAM (vec_stmt_cost, 2),
 364   NAMED_PARAM (vec_to_scalar_cost, 4),
 365   NAMED_PARAM (scalar_to_vec_cost, 4),
 366   NAMED_PARAM (vec_align_load_cost, 10),
 367   NAMED_PARAM (vec_unalign_load_cost, 10),
 368   NAMED_PARAM (vec_unalign_store_cost, 2),
 369   NAMED_PARAM (vec_store_cost, 2),
 370   NAMED_PARAM (cond_taken_branch_cost, 2),
 371   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 372 };
 373
 374 #define AARCH64_FUSE_NOTHING    (0)
 375 #define AARCH64_FUSE_MOV_MOVK   (1 << 0)
 376 #define AARCH64_FUSE_ADRP_ADD   (1 << 1)
 377 #define AARCH64_FUSE_MOVK_MOVK  (1 << 2)
 378 #define AARCH64_FUSE_ADRP_LDR   (1 << 3)
 379 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
 380
 381 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 382 __extension__
 383 #endif
 384 static const struct tune_params generic_tunings =
 385 {
 386   &cortexa57_extra_costs,
 387   &generic_addrcost_table,
 388   &generic_regmove_cost,
 389   &generic_vector_cost,
 390   NAMED_PARAM (memmov_cost, 4),
 391   NAMED_PARAM (issue_rate, 2),
 392   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
 393   8,    /* function_align.  */
 394   8,    /* jump_align.  */
 395   4,    /* loop_align.  */
 396   2,    /* int_reassoc_width.  */
 397   4,    /* fp_reassoc_width.  */
 398   1     /* vec_reassoc_width.  */
 399 };
 400
 401 static const struct tune_params cortexa53_tunings =
 402 {
 403   &cortexa53_extra_costs,
 404   &generic_addrcost_table,
 405   &cortexa53_regmove_cost,
 406   &generic_vector_cost,
 407   NAMED_PARAM (memmov_cost, 4),
 408   NAMED_PARAM (issue_rate, 2),
 409   NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
 410                              | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
 411   8,    /* function_align.  */
 412   8,    /* jump_align.  */
 413   4,    /* loop_align.  */
 414   2,    /* int_reassoc_width.  */
 415   4,    /* fp_reassoc_width.  */
 416   1     /* vec_reassoc_width.  */
 417 };
 418
 419 static const struct tune_params cortexa57_tunings =
 420 {
 421   &cortexa57_extra_costs,
 422   &cortexa57_addrcost_table,
 423   &cortexa57_regmove_cost,
 424   &cortexa57_vector_cost,
 425   NAMED_PARAM (memmov_cost, 4),
 426   NAMED_PARAM (issue_rate, 3),
 427   NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
 428   16,   /* function_align.  */
 429   8,    /* jump_align.  */
 430   4,    /* loop_align.  */
 431   2,    /* int_reassoc_width.  */
 432   4,    /* fp_reassoc_width.  */
 433   1     /* vec_reassoc_width.  */
 434 };
 435
 436 static const struct tune_params thunderx_tunings =
 437 {
 438   &thunderx_extra_costs,
 439   &generic_addrcost_table,
 440   &thunderx_regmove_cost,
 441   &generic_vector_cost,
 442   NAMED_PARAM (memmov_cost, 6),
 443   NAMED_PARAM (issue_rate, 2),
 444   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
 445   8,    /* function_align.  */
 446   8,    /* jump_align.  */
 447   8,    /* loop_align.  */
 448   2,    /* int_reassoc_width.  */
 449   4,    /* fp_reassoc_width.  */
 450   1     /* vec_reassoc_width.  */
 451 };
 452
 453 static const struct tune_params xgene1_tunings =
 454 {
 455   &xgene1_extra_costs,
 456   &xgene1_addrcost_table,
 457   &xgene1_regmove_cost,
 458   &xgene1_vector_cost,
 459   NAMED_PARAM (memmov_cost, 6),
 460   NAMED_PARAM (issue_rate, 4),
 461   NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
 462   16,   /* function_align.  */
 463   8,    /* jump_align.  */
 464   16,   /* loop_align.  */
 465   2,    /* int_reassoc_width.  */
 466   4,    /* fp_reassoc_width.  */
 467   1     /* vec_reassoc_width.  */
 468 };
 469
 470 /* A processor implementing AArch64.  */
 471 struct processor
 472 {
 473   const char *const name;
 474   enum aarch64_processor core;
 475   const char *arch;
 476   unsigned architecture_version;
 477   const unsigned long flags;
 478   const struct tune_params *const tune;
 479 };
 480
 481 /* Processor cores implementing AArch64.  */
 482 static const struct processor all_cores[] =
 483 {
 484 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
 485   {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
 486 #include "aarch64-cores.def"
 487 #undef AARCH64_CORE
 488   {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
 489   {NULL, aarch64_none, NULL, 0, 0, NULL}
 490 };
 491
 492 /* Architectures implementing AArch64.  */
 493 static const struct processor all_architectures[] =
 494 {
 495 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 496   {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
 497 #include "aarch64-arches.def"
 498 #undef AARCH64_ARCH
 499   {NULL, aarch64_none, NULL, 0, 0, NULL}
 500 };
 501
 502 /* Target specification.  These are populated as commandline arguments
 503    are processed, or NULL if not specified.  */
 504 static const struct processor *selected_arch;
 505 static const struct processor *selected_cpu;
 506 static const struct processor *selected_tune;
 507
 508 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 509
 510 /* An ISA extension in the co-processor and main instruction set space.  */
 511 struct aarch64_option_extension
 512 {
 513   const char *const name;
 514   const unsigned long flags_on;
 515   const unsigned long flags_off;
 516 };
 517
 518 /* ISA extensions in AArch64.  */
 519 static const struct aarch64_option_extension all_extensions[] =
 520 {
 521 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 522   {NAME, FLAGS_ON, FLAGS_OFF},
 523 #include "aarch64-option-extensions.def"
 524 #undef AARCH64_OPT_EXTENSION
 525   {NULL, 0, 0}
 526 };
 527
 528 /* Used to track the size of an address when generating a pre/post
 529    increment address.  */
 530 static machine_mode aarch64_memory_reference_mode;
 531
 532 /* Used to force GTY into this file.  */
 533 static GTY(()) int gty_dummy;
 534
 535 /* A table of valid AArch64 "bitmask immediate" values for
 536    logical instructions.  */
 537
 538 #define AARCH64_NUM_BITMASKS  5334
 539 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 540
 541 typedef enum aarch64_cond_code
 542 {
 543   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 544   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 545   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 546 }
 547 aarch64_cc;
 548
 549 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 550
 551 /* The condition codes of the processor, and the inverse function.  */
 552 static const char * const aarch64_condition_codes[] =
 553 {
 554   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 555   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 556 };
 557
 558 static unsigned int
 559 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
 560 {
 561   return 2;
 562 }
 563
 564 static int
 565 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
 566                              enum machine_mode mode)
 567 {
 568   if (VECTOR_MODE_P (mode))
 569     return aarch64_tune_params->vec_reassoc_width;
 570   if (INTEGRAL_MODE_P (mode))
 571     return aarch64_tune_params->int_reassoc_width;
 572   if (FLOAT_MODE_P (mode))
 573     return aarch64_tune_params->fp_reassoc_width;
 574   return 1;
 575 }
 576
 577 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 578 unsigned
 579 aarch64_dbx_register_number (unsigned regno)
 580 {
 581    if (GP_REGNUM_P (regno))
 582      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 583    else if (regno == SP_REGNUM)
 584      return AARCH64_DWARF_SP;
 585    else if (FP_REGNUM_P (regno))
 586      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 587
 588    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 589       equivalent DWARF register.  */
 590    return DWARF_FRAME_REGISTERS;
 591 }
 592
 593 /* Return TRUE if MODE is any of the large INT modes.  */
 594 static bool
 595 aarch64_vect_struct_mode_p (machine_mode mode)
 596 {
 597   return mode == OImode || mode == CImode || mode == XImode;
 598 }
 599
 600 /* Return TRUE if MODE is any of the vector modes.  */
 601 static bool
 602 aarch64_vector_mode_p (machine_mode mode)
 603 {
 604   return aarch64_vector_mode_supported_p (mode)
 605          || aarch64_vect_struct_mode_p (mode);
 606 }
 607
 608 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 609 static bool
 610 aarch64_array_mode_supported_p (machine_mode mode,
 611                                 unsigned HOST_WIDE_INT nelems)
 612 {
 613   if (TARGET_SIMD
 614       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 615       && (nelems >= 2 && nelems <= 4))
 616     return true;
 617
 618   return false;
 619 }
 620
 621 /* Implement HARD_REGNO_NREGS.  */
 622
 623 int
 624 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
 625 {
 626   switch (aarch64_regno_regclass (regno))
 627     {
 628     case FP_REGS:
 629     case FP_LO_REGS:
 630       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 631     default:
 632       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 633     }
 634   gcc_unreachable ();
 635 }
 636
 637 /* Implement HARD_REGNO_MODE_OK.  */
 638
 639 int
 640 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
 641 {
 642   if (GET_MODE_CLASS (mode) == MODE_CC)
 643     return regno == CC_REGNUM;
 644
 645   if (regno == SP_REGNUM)
 646     /* The purpose of comparing with ptr_mode is to support the
 647        global register variable associated with the stack pointer
 648        register via the syntax of asm ("wsp") in ILP32.  */
 649     return mode == Pmode || mode == ptr_mode;
 650
 651   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 652     return mode == Pmode;
 653
 654   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 655     return 1;
 656
 657   if (FP_REGNUM_P (regno))
 658     {
 659       if (aarch64_vect_struct_mode_p (mode))
 660         return
 661           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 662       else
 663         return 1;
 664     }
 665
 666   return 0;
 667 }
 668
 669 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 670 machine_mode
 671 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 672                                      machine_mode mode)
 673 {
 674   /* Handle modes that fit within single registers.  */
 675   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 676     {
 677       if (GET_MODE_SIZE (mode) >= 4)
 678         return mode;
 679       else
 680         return SImode;
 681     }
 682   /* Fall back to generic for multi-reg and very large modes.  */
 683   else
 684     return choose_hard_reg_mode (regno, nregs, false);
 685 }
 686
 687 /* Return true if calls to DECL should be treated as
 688    long-calls (ie called via a register).  */
 689 static bool
 690 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 691 {
 692   return false;
 693 }
 694
 695 /* Return true if calls to symbol-ref SYM should be treated as
 696    long-calls (ie called via a register).  */
 697 bool
 698 aarch64_is_long_call_p (rtx sym)
 699 {
 700   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 701 }
 702
 703 /* Return true if the offsets to a zero/sign-extract operation
 704    represent an expression that matches an extend operation.  The
 705    operands represent the paramters from
 706
 707    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 708 bool
 709 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
 710                                 rtx extract_imm)
 711 {
 712   HOST_WIDE_INT mult_val, extract_val;
 713
 714   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 715     return false;
 716
 717   mult_val = INTVAL (mult_imm);
 718   extract_val = INTVAL (extract_imm);
 719
 720   if (extract_val > 8
 721       && extract_val < GET_MODE_BITSIZE (mode)
 722       && exact_log2 (extract_val & ~7) > 0
 723       && (extract_val & 7) <= 4
 724       && mult_val == (1 << (extract_val & 7)))
 725     return true;
 726
 727   return false;
 728 }
 729
 730 /* Emit an insn that's a simple single-set.  Both the operands must be
 731    known to be valid.  */
 732 inline static rtx
 733 emit_set_insn (rtx x, rtx y)
 734 {
 735   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 736 }
 737
 738 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 739    return the rtx for register 0 in the proper mode.  */
 740 rtx
 741 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 742 {
 743   machine_mode mode = SELECT_CC_MODE (code, x, y);
 744   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 745
 746   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 747   return cc_reg;
 748 }
 749
 750 /* Build the SYMBOL_REF for __tls_get_addr.  */
 751
 752 static GTY(()) rtx tls_get_addr_libfunc;
 753
 754 rtx
 755 aarch64_tls_get_addr (void)
 756 {
 757   if (!tls_get_addr_libfunc)
 758     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 759   return tls_get_addr_libfunc;
 760 }
 761
 762 /* Return the TLS model to use for ADDR.  */
 763
 764 static enum tls_model
 765 tls_symbolic_operand_type (rtx addr)
 766 {
 767   enum tls_model tls_kind = TLS_MODEL_NONE;
 768   rtx sym, addend;
 769
 770   if (GET_CODE (addr) == CONST)
 771     {
 772       split_const (addr, &sym, &addend);
 773       if (GET_CODE (sym) == SYMBOL_REF)
 774         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 775     }
 776   else if (GET_CODE (addr) == SYMBOL_REF)
 777     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 778
 779   return tls_kind;
 780 }
 781
 782 /* We'll allow lo_sum's in addresses in our legitimate addresses
 783    so that combine would take care of combining addresses where
 784    necessary, but for generation purposes, we'll generate the address
 785    as :
 786    RTL                               Absolute
 787    tmp = hi (symbol_ref);            adrp  x1, foo
 788    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 789                                      nop
 790
 791    PIC                               TLS
 792    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 793    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 794                                      bl   __tls_get_addr
 795                                      nop
 796
 797    Load TLS symbol, depending on TLS mechanism and TLS access model.
 798
 799    Global Dynamic - Traditional TLS:
 800    adrp tmp, :tlsgd:imm
 801    add  dest, tmp, #:tlsgd_lo12:imm
 802    bl   __tls_get_addr
 803
 804    Global Dynamic - TLS Descriptors:
 805    adrp dest, :tlsdesc:imm
 806    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 807    add  dest, dest, #:tlsdesc_lo12:imm
 808    blr  tmp
 809    mrs  tp, tpidr_el0
 810    add  dest, dest, tp
 811
 812    Initial Exec:
 813    mrs  tp, tpidr_el0
 814    adrp tmp, :gottprel:imm
 815    ldr  dest, [tmp, #:gottprel_lo12:imm]
 816    add  dest, dest, tp
 817
 818    Local Exec:
 819    mrs  tp, tpidr_el0
 820    add  t0, tp, #:tprel_hi12:imm
 821    add  t0, #:tprel_lo12_nc:imm
 822 */
 823
 824 static void
 825 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 826                                    enum aarch64_symbol_type type)
 827 {
 828   switch (type)
 829     {
 830     case SYMBOL_SMALL_ABSOLUTE:
 831       {
 832         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 833         rtx tmp_reg = dest;
 834         machine_mode mode = GET_MODE (dest);
 835
 836         gcc_assert (mode == Pmode || mode == ptr_mode);
 837
 838         if (can_create_pseudo_p ())
 839           tmp_reg = gen_reg_rtx (mode);
 840
 841         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 842         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 843         return;
 844       }
 845
 846     case SYMBOL_TINY_ABSOLUTE:
 847       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 848       return;
 849
 850     case SYMBOL_SMALL_GOT:
 851       {
 852         /* In ILP32, the mode of dest can be either SImode or DImode,
 853            while the got entry is always of SImode size.  The mode of
 854            dest depends on how dest is used: if dest is assigned to a
 855            pointer (e.g. in the memory), it has SImode; it may have
 856            DImode if dest is dereferenced to access the memeory.
 857            This is why we have to handle three different ldr_got_small
 858            patterns here (two patterns for ILP32).  */
 859         rtx tmp_reg = dest;
 860         machine_mode mode = GET_MODE (dest);
 861
 862         if (can_create_pseudo_p ())
 863           tmp_reg = gen_reg_rtx (mode);
 864
 865         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 866         if (mode == ptr_mode)
 867           {
 868             if (mode == DImode)
 869               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 870             else
 871               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 872           }
 873         else
 874           {
 875             gcc_assert (mode == Pmode);
 876             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 877           }
 878
 879         return;
 880       }
 881
 882     case SYMBOL_SMALL_TLSGD:
 883       {
 884         rtx_insn *insns;
 885         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 886
 887         start_sequence ();
 888         aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
 889         insns = get_insns ();
 890         end_sequence ();
 891
 892         RTL_CONST_CALL_P (insns) = 1;
 893         emit_libcall_block (insns, dest, result, imm);
 894         return;
 895       }
 896
 897     case SYMBOL_SMALL_TLSDESC:
 898       {
 899         machine_mode mode = GET_MODE (dest);
 900         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 901         rtx tp;
 902
 903         gcc_assert (mode == Pmode || mode == ptr_mode);
 904
 905         /* In ILP32, the got entry is always of SImode size.  Unlike
 906            small GOT, the dest is fixed at reg 0.  */
 907         if (TARGET_ILP32)
 908           emit_insn (gen_tlsdesc_small_si (imm));
 909         else
 910           emit_insn (gen_tlsdesc_small_di (imm));
 911         tp = aarch64_load_tp (NULL);
 912
 913         if (mode != Pmode)
 914           tp = gen_lowpart (mode, tp);
 915
 916         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 917         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 918         return;
 919       }
 920
 921     case SYMBOL_SMALL_GOTTPREL:
 922       {
 923         /* In ILP32, the mode of dest can be either SImode or DImode,
 924            while the got entry is always of SImode size.  The mode of
 925            dest depends on how dest is used: if dest is assigned to a
 926            pointer (e.g. in the memory), it has SImode; it may have
 927            DImode if dest is dereferenced to access the memeory.
 928            This is why we have to handle three different tlsie_small
 929            patterns here (two patterns for ILP32).  */
 930         machine_mode mode = GET_MODE (dest);
 931         rtx tmp_reg = gen_reg_rtx (mode);
 932         rtx tp = aarch64_load_tp (NULL);
 933
 934         if (mode == ptr_mode)
 935           {
 936             if (mode == DImode)
 937               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 938             else
 939               {
 940                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 941                 tp = gen_lowpart (mode, tp);
 942               }
 943           }
 944         else
 945           {
 946             gcc_assert (mode == Pmode);
 947             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 948           }
 949
 950         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 951         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 952         return;
 953       }
 954
 955     case SYMBOL_SMALL_TPREL:
 956       {
 957         rtx tp = aarch64_load_tp (NULL);
 958         emit_insn (gen_tlsle_small (dest, tp, imm));
 959         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 960         return;
 961       }
 962
 963     case SYMBOL_TINY_GOT:
 964       emit_insn (gen_ldr_got_tiny (dest, imm));
 965       return;
 966
 967     default:
 968       gcc_unreachable ();
 969     }
 970 }
 971
 972 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 973    handle all moves if !can_create_pseudo_p ().  The distinction is
 974    important because, unlike emit_move_insn, the move expanders know
 975    how to force Pmode objects into the constant pool even when the
 976    constant pool address is not itself legitimate.  */
 977 static rtx
 978 aarch64_emit_move (rtx dest, rtx src)
 979 {
 980   return (can_create_pseudo_p ()
 981           ? emit_move_insn (dest, src)
 982           : emit_move_insn_1 (dest, src));
 983 }
 984
 985 /* Split a 128-bit move operation into two 64-bit move operations,
 986    taking care to handle partial overlap of register to register
 987    copies.  Special cases are needed when moving between GP regs and
 988    FP regs.  SRC can be a register, constant or memory; DST a register
 989    or memory.  If either operand is memory it must not have any side
 990    effects.  */
 991 void
 992 aarch64_split_128bit_move (rtx dst, rtx src)
 993 {
 994   rtx dst_lo, dst_hi;
 995   rtx src_lo, src_hi;
 996
 997   machine_mode mode = GET_MODE (dst);
 998
 999   gcc_assert (mode == TImode || mode == TFmode);
1000   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1001   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
1002
1003   if (REG_P (dst) && REG_P (src))
1004     {
1005       int src_regno = REGNO (src);
1006       int dst_regno = REGNO (dst);
1007
1008       /* Handle FP <-> GP regs.  */
1009       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1010         {
1011           src_lo = gen_lowpart (word_mode, src);
1012           src_hi = gen_highpart (word_mode, src);
1013
1014           if (mode == TImode)
1015             {
1016               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1017               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1018             }
1019           else
1020             {
1021               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1022               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1023             }
1024           return;
1025         }
1026       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1027         {
1028           dst_lo = gen_lowpart (word_mode, dst);
1029           dst_hi = gen_highpart (word_mode, dst);
1030
1031           if (mode == TImode)
1032             {
1033               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1034               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1035             }
1036           else
1037             {
1038               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1039               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1040             }
1041           return;
1042         }
1043     }
1044
1045   dst_lo = gen_lowpart (word_mode, dst);
1046   dst_hi = gen_highpart (word_mode, dst);
1047   src_lo = gen_lowpart (word_mode, src);
1048   src_hi = gen_highpart_mode (word_mode, mode, src);
1049
1050   /* At most one pairing may overlap.  */
1051   if (reg_overlap_mentioned_p (dst_lo, src_hi))
1052     {
1053       aarch64_emit_move (dst_hi, src_hi);
1054       aarch64_emit_move (dst_lo, src_lo);
1055     }
1056   else
1057     {
1058       aarch64_emit_move (dst_lo, src_lo);
1059       aarch64_emit_move (dst_hi, src_hi);
1060     }
1061 }
1062
1063 bool
1064 aarch64_split_128bit_move_p (rtx dst, rtx src)
1065 {
1066   return (! REG_P (src)
1067           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1068 }
1069
1070 /* Split a complex SIMD combine.  */
1071
1072 void
1073 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1074 {
1075   machine_mode src_mode = GET_MODE (src1);
1076   machine_mode dst_mode = GET_MODE (dst);
1077
1078   gcc_assert (VECTOR_MODE_P (dst_mode));
1079
1080   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1081     {
1082       rtx (*gen) (rtx, rtx, rtx);
1083
1084       switch (src_mode)
1085         {
1086         case V8QImode:
1087           gen = gen_aarch64_simd_combinev8qi;
1088           break;
1089         case V4HImode:
1090           gen = gen_aarch64_simd_combinev4hi;
1091           break;
1092         case V2SImode:
1093           gen = gen_aarch64_simd_combinev2si;
1094           break;
1095         case V2SFmode:
1096           gen = gen_aarch64_simd_combinev2sf;
1097           break;
1098         case DImode:
1099           gen = gen_aarch64_simd_combinedi;
1100           break;
1101         case DFmode:
1102           gen = gen_aarch64_simd_combinedf;
1103           break;
1104         default:
1105           gcc_unreachable ();
1106         }
1107
1108       emit_insn (gen (dst, src1, src2));
1109       return;
1110     }
1111 }
1112
1113 /* Split a complex SIMD move.  */
1114
1115 void
1116 aarch64_split_simd_move (rtx dst, rtx src)
1117 {
1118   machine_mode src_mode = GET_MODE (src);
1119   machine_mode dst_mode = GET_MODE (dst);
1120
1121   gcc_assert (VECTOR_MODE_P (dst_mode));
1122
1123   if (REG_P (dst) && REG_P (src))
1124     {
1125       rtx (*gen) (rtx, rtx);
1126
1127       gcc_assert (VECTOR_MODE_P (src_mode));
1128
1129       switch (src_mode)
1130         {
1131         case V16QImode:
1132           gen = gen_aarch64_split_simd_movv16qi;
1133           break;
1134         case V8HImode:
1135           gen = gen_aarch64_split_simd_movv8hi;
1136           break;
1137         case V4SImode:
1138           gen = gen_aarch64_split_simd_movv4si;
1139           break;
1140         case V2DImode:
1141           gen = gen_aarch64_split_simd_movv2di;
1142           break;
1143         case V4SFmode:
1144           gen = gen_aarch64_split_simd_movv4sf;
1145           break;
1146         case V2DFmode:
1147           gen = gen_aarch64_split_simd_movv2df;
1148           break;
1149         default:
1150           gcc_unreachable ();
1151         }
1152
1153       emit_insn (gen (dst, src));
1154       return;
1155     }
1156 }
1157
1158 static rtx
1159 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1160 {
1161   if (can_create_pseudo_p ())
1162     return force_reg (mode, value);
1163   else
1164     {
1165       x = aarch64_emit_move (x, value);
1166       return x;
1167     }
1168 }
1169
1170
1171 static rtx
1172 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1173 {
1174   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1175     {
1176       rtx high;
1177       /* Load the full offset into a register.  This
1178          might be improvable in the future.  */
1179       high = GEN_INT (offset);
1180       offset = 0;
1181       high = aarch64_force_temporary (mode, temp, high);
1182       reg = aarch64_force_temporary (mode, temp,
1183                                      gen_rtx_PLUS (mode, high, reg));
1184     }
1185   return plus_constant (mode, reg, offset);
1186 }
1187
1188 static int
1189 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1190                                 machine_mode mode)
1191 {
1192   unsigned HOST_WIDE_INT mask;
1193   int i;
1194   bool first;
1195   unsigned HOST_WIDE_INT val;
1196   bool subtargets;
1197   rtx subtarget;
1198   int one_match, zero_match, first_not_ffff_match;
1199   int num_insns = 0;
1200
1201   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1202     {
1203       if (generate)
1204         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1205       num_insns++;
1206       return num_insns;
1207     }
1208
1209   if (mode == SImode)
1210     {
1211       /* We know we can't do this in 1 insn, and we must be able to do it
1212          in two; so don't mess around looking for sequences that don't buy
1213          us anything.  */
1214       if (generate)
1215         {
1216           emit_insn (gen_rtx_SET (VOIDmode, dest,
1217                                   GEN_INT (INTVAL (imm) & 0xffff)));
1218           emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1219                                      GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1220         }
1221       num_insns += 2;
1222       return num_insns;
1223     }
1224
1225   /* Remaining cases are all for DImode.  */
1226
1227   val = INTVAL (imm);
1228   subtargets = optimize && can_create_pseudo_p ();
1229
1230   one_match = 0;
1231   zero_match = 0;
1232   mask = 0xffff;
1233   first_not_ffff_match = -1;
1234
1235   for (i = 0; i < 64; i += 16, mask <<= 16)
1236     {
1237       if ((val & mask) == mask)
1238         one_match++;
1239       else
1240         {
1241           if (first_not_ffff_match < 0)
1242             first_not_ffff_match = i;
1243           if ((val & mask) == 0)
1244             zero_match++;
1245         }
1246     }
1247
1248   if (one_match == 2)
1249     {
1250       /* Set one of the quarters and then insert back into result.  */
1251       mask = 0xffffll << first_not_ffff_match;
1252       if (generate)
1253         {
1254           emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1255           emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1256                                      GEN_INT ((val >> first_not_ffff_match)
1257                                               & 0xffff)));
1258         }
1259       num_insns += 2;
1260       return num_insns;
1261     }
1262
1263   if (zero_match == 2)
1264     goto simple_sequence;
1265
1266   mask = 0x0ffff0000UL;
1267   for (i = 16; i < 64; i += 16, mask <<= 16)
1268     {
1269       HOST_WIDE_INT comp = mask & ~(mask - 1);
1270
1271       if (aarch64_uimm12_shift (val - (val & mask)))
1272         {
1273           if (generate)
1274             {
1275               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1276               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1277                                       GEN_INT (val & mask)));
1278               emit_insn (gen_adddi3 (dest, subtarget,
1279                                      GEN_INT (val - (val & mask))));
1280             }
1281           num_insns += 2;
1282           return num_insns;
1283         }
1284       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1285         {
1286           if (generate)
1287             {
1288               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1289               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1290                                       GEN_INT ((val + comp) & mask)));
1291               emit_insn (gen_adddi3 (dest, subtarget,
1292                                      GEN_INT (val - ((val + comp) & mask))));
1293             }
1294           num_insns += 2;
1295           return num_insns;
1296         }
1297       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1298         {
1299           if (generate)
1300             {
1301               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1302               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1303                                       GEN_INT ((val - comp) | ~mask)));
1304               emit_insn (gen_adddi3 (dest, subtarget,
1305                                      GEN_INT (val - ((val - comp) | ~mask))));
1306             }
1307           num_insns += 2;
1308           return num_insns;
1309         }
1310       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1311         {
1312           if (generate)
1313             {
1314               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1315               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1316                                       GEN_INT (val | ~mask)));
1317               emit_insn (gen_adddi3 (dest, subtarget,
1318                                      GEN_INT (val - (val | ~mask))));
1319             }
1320           num_insns += 2;
1321           return num_insns;
1322         }
1323     }
1324
1325   /* See if we can do it by arithmetically combining two
1326      immediates.  */
1327   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1328     {
1329       int j;
1330       mask = 0xffff;
1331
1332       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1333           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1334         {
1335           if (generate)
1336             {
1337               subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1338               emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1339                                       GEN_INT (aarch64_bitmasks[i])));
1340               emit_insn (gen_adddi3 (dest, subtarget,
1341                                      GEN_INT (val - aarch64_bitmasks[i])));
1342             }
1343           num_insns += 2;
1344           return num_insns;
1345         }
1346
1347       for (j = 0; j < 64; j += 16, mask <<= 16)
1348         {
1349           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1350             {
1351               if (generate)
1352                 {
1353                   emit_insn (gen_rtx_SET (VOIDmode, dest,
1354                                           GEN_INT (aarch64_bitmasks[i])));
1355                   emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1356                                              GEN_INT ((val >> j) & 0xffff)));
1357                 }
1358               num_insns += 2;
1359               return num_insns;
1360             }
1361         }
1362     }
1363
1364   /* See if we can do it by logically combining two immediates.  */
1365   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1366     {
1367       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1368         {
1369           int j;
1370
1371           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1372             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1373               {
1374                 if (generate)
1375                   {
1376                     subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1377                     emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1378                                             GEN_INT (aarch64_bitmasks[i])));
1379                     emit_insn (gen_iordi3 (dest, subtarget,
1380                                            GEN_INT (aarch64_bitmasks[j])));
1381                   }
1382                 num_insns += 2;
1383                 return num_insns;
1384               }
1385         }
1386       else if ((val & aarch64_bitmasks[i]) == val)
1387         {
1388           int j;
1389
1390           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1391             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1392               {
1393                 if (generate)
1394                   {
1395                     subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1396                     emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1397                                             GEN_INT (aarch64_bitmasks[j])));
1398                     emit_insn (gen_anddi3 (dest, subtarget,
1399                                            GEN_INT (aarch64_bitmasks[i])));
1400                   }
1401                 num_insns += 2;
1402                 return num_insns;
1403               }
1404         }
1405     }
1406
1407   if (one_match > zero_match)
1408     {
1409       /* Set either first three quarters or all but the third.   */
1410       mask = 0xffffll << (16 - first_not_ffff_match);
1411       if (generate)
1412         emit_insn (gen_rtx_SET (VOIDmode, dest,
1413                                 GEN_INT (val | mask | 0xffffffff00000000ull)));
1414       num_insns ++;
1415
1416       /* Now insert other two quarters.  */
1417       for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1418            i < 64; i += 16, mask <<= 16)
1419         {
1420           if ((val & mask) != mask)
1421             {
1422               if (generate)
1423                 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1424                                            GEN_INT ((val >> i) & 0xffff)));
1425               num_insns ++;
1426             }
1427         }
1428       return num_insns;
1429     }
1430
1431  simple_sequence:
1432   first = true;
1433   mask = 0xffff;
1434   for (i = 0; i < 64; i += 16, mask <<= 16)
1435     {
1436       if ((val & mask) != 0)
1437         {
1438           if (first)
1439             {
1440               if (generate)
1441                 emit_insn (gen_rtx_SET (VOIDmode, dest,
1442                                         GEN_INT (val & mask)));
1443               num_insns ++;
1444               first = false;
1445             }
1446           else
1447             {
1448               if (generate)
1449                 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1450                                            GEN_INT ((val >> i) & 0xffff)));
1451               num_insns ++;
1452             }
1453         }
1454     }
1455
1456   return num_insns;
1457 }
1458
1459
1460 void
1461 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1462 {
1463   machine_mode mode = GET_MODE (dest);
1464
1465   gcc_assert (mode == SImode || mode == DImode);
1466
1467   /* Check on what type of symbol it is.  */
1468   if (GET_CODE (imm) == SYMBOL_REF
1469       || GET_CODE (imm) == LABEL_REF
1470       || GET_CODE (imm) == CONST)
1471     {
1472       rtx mem, base, offset;
1473       enum aarch64_symbol_type sty;
1474
1475       /* If we have (const (plus symbol offset)), separate out the offset
1476          before we start classifying the symbol.  */
1477       split_const (imm, &base, &offset);
1478
1479       sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1480       switch (sty)
1481         {
1482         case SYMBOL_FORCE_TO_MEM:
1483           if (offset != const0_rtx
1484               && targetm.cannot_force_const_mem (mode, imm))
1485             {
1486               gcc_assert (can_create_pseudo_p ());
1487               base = aarch64_force_temporary (mode, dest, base);
1488               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1489               aarch64_emit_move (dest, base);
1490               return;
1491             }
1492           mem = force_const_mem (ptr_mode, imm);
1493           gcc_assert (mem);
1494           if (mode != ptr_mode)
1495             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1496           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1497           return;
1498
1499         case SYMBOL_SMALL_TLSGD:
1500         case SYMBOL_SMALL_TLSDESC:
1501         case SYMBOL_SMALL_GOTTPREL:
1502         case SYMBOL_SMALL_GOT:
1503         case SYMBOL_TINY_GOT:
1504           if (offset != const0_rtx)
1505             {
1506               gcc_assert(can_create_pseudo_p ());
1507               base = aarch64_force_temporary (mode, dest, base);
1508               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1509               aarch64_emit_move (dest, base);
1510               return;
1511             }
1512           /* FALLTHRU */
1513
1514         case SYMBOL_SMALL_TPREL:
1515         case SYMBOL_SMALL_ABSOLUTE:
1516         case SYMBOL_TINY_ABSOLUTE:
1517           aarch64_load_symref_appropriately (dest, imm, sty);
1518           return;
1519
1520         default:
1521           gcc_unreachable ();
1522         }
1523     }
1524
1525   if (!CONST_INT_P (imm))
1526     {
1527       if (GET_CODE (imm) == HIGH)
1528         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1529       else
1530         {
1531           rtx mem = force_const_mem (mode, imm);
1532           gcc_assert (mem);
1533           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1534         }
1535
1536       return;
1537     }
1538
1539   aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1540 }
1541
1542 static bool
1543 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1544                                  tree exp ATTRIBUTE_UNUSED)
1545 {
1546   /* Currently, always true.  */
1547   return true;
1548 }
1549
1550 /* Implement TARGET_PASS_BY_REFERENCE.  */
1551
1552 static bool
1553 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1554                            machine_mode mode,
1555                            const_tree type,
1556                            bool named ATTRIBUTE_UNUSED)
1557 {
1558   HOST_WIDE_INT size;
1559   machine_mode dummymode;
1560   int nregs;
1561
1562   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1563   size = (mode == BLKmode && type)
1564     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1565
1566   /* Aggregates are passed by reference based on their size.  */
1567   if (type && AGGREGATE_TYPE_P (type))
1568     {
1569       size = int_size_in_bytes (type);
1570     }
1571
1572   /* Variable sized arguments are always returned by reference.  */
1573   if (size < 0)
1574     return true;
1575
1576   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1577   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1578                                                &dummymode, &nregs,
1579                                                NULL))
1580     return false;
1581
1582   /* Arguments which are variable sized or larger than 2 registers are
1583      passed by reference unless they are a homogenous floating point
1584      aggregate.  */
1585   return size > 2 * UNITS_PER_WORD;
1586 }
1587
1588 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1589 static bool
1590 aarch64_return_in_msb (const_tree valtype)
1591 {
1592   machine_mode dummy_mode;
1593   int dummy_int;
1594
1595   /* Never happens in little-endian mode.  */
1596   if (!BYTES_BIG_ENDIAN)
1597     return false;
1598
1599   /* Only composite types smaller than or equal to 16 bytes can
1600      be potentially returned in registers.  */
1601   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1602       || int_size_in_bytes (valtype) <= 0
1603       || int_size_in_bytes (valtype) > 16)
1604     return false;
1605
1606   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1607      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1608      is always passed/returned in the least significant bits of fp/simd
1609      register(s).  */
1610   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1611                                                &dummy_mode, &dummy_int, NULL))
1612     return false;
1613
1614   return true;
1615 }
1616
1617 /* Implement TARGET_FUNCTION_VALUE.
1618    Define how to find the value returned by a function.  */
1619
1620 static rtx
1621 aarch64_function_value (const_tree type, const_tree func,
1622                         bool outgoing ATTRIBUTE_UNUSED)
1623 {
1624   machine_mode mode;
1625   int unsignedp;
1626   int count;
1627   machine_mode ag_mode;
1628
1629   mode = TYPE_MODE (type);
1630   if (INTEGRAL_TYPE_P (type))
1631     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1632
1633   if (aarch64_return_in_msb (type))
1634     {
1635       HOST_WIDE_INT size = int_size_in_bytes (type);
1636
1637       if (size % UNITS_PER_WORD != 0)
1638         {
1639           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1640           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1641         }
1642     }
1643
1644   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1645                                                &ag_mode, &count, NULL))
1646     {
1647       if (!aarch64_composite_type_p (type, mode))
1648         {
1649           gcc_assert (count == 1 && mode == ag_mode);
1650           return gen_rtx_REG (mode, V0_REGNUM);
1651         }
1652       else
1653         {
1654           int i;
1655           rtx par;
1656
1657           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1658           for (i = 0; i < count; i++)
1659             {
1660               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1661               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1662                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1663               XVECEXP (par, 0, i) = tmp;
1664             }
1665           return par;
1666         }
1667     }
1668   else
1669     return gen_rtx_REG (mode, R0_REGNUM);
1670 }
1671
1672 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1673    Return true if REGNO is the number of a hard register in which the values
1674    of called function may come back.  */
1675
1676 static bool
1677 aarch64_function_value_regno_p (const unsigned int regno)
1678 {
1679   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1680      of 16-byte return values are: 128-bit integers and 16-byte small
1681      structures (excluding homogeneous floating-point aggregates).  */
1682   if (regno == R0_REGNUM || regno == R1_REGNUM)
1683     return true;
1684
1685   /* Up to four fp/simd registers can return a function value, e.g. a
1686      homogeneous floating-point aggregate having four members.  */
1687   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1688     return !TARGET_GENERAL_REGS_ONLY;
1689
1690   return false;
1691 }
1692
1693 /* Implement TARGET_RETURN_IN_MEMORY.
1694
1695    If the type T of the result of a function is such that
1696      void func (T arg)
1697    would require that arg be passed as a value in a register (or set of
1698    registers) according to the parameter passing rules, then the result
1699    is returned in the same registers as would be used for such an
1700    argument.  */
1701
1702 static bool
1703 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1704 {
1705   HOST_WIDE_INT size;
1706   machine_mode ag_mode;
1707   int count;
1708
1709   if (!AGGREGATE_TYPE_P (type)
1710       && TREE_CODE (type) != COMPLEX_TYPE
1711       && TREE_CODE (type) != VECTOR_TYPE)
1712     /* Simple scalar types always returned in registers.  */
1713     return false;
1714
1715   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1716                                                type,
1717                                                &ag_mode,
1718                                                &count,
1719                                                NULL))
1720     return false;
1721
1722   /* Types larger than 2 registers returned in memory.  */
1723   size = int_size_in_bytes (type);
1724   return (size < 0 || size > 2 * UNITS_PER_WORD);
1725 }
1726
1727 static bool
1728 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1729                                const_tree type, int *nregs)
1730 {
1731   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1732   return aarch64_vfp_is_call_or_return_candidate (mode,
1733                                                   type,
1734                                                   &pcum->aapcs_vfp_rmode,
1735                                                   nregs,
1736                                                   NULL);
1737 }
1738
1739 /* Given MODE and TYPE of a function argument, return the alignment in
1740    bits.  The idea is to suppress any stronger alignment requested by
1741    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1742    This is a helper function for local use only.  */
1743
1744 static unsigned int
1745 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1746 {
1747   unsigned int alignment;
1748
1749   if (type)
1750     {
1751       if (!integer_zerop (TYPE_SIZE (type)))
1752         {
1753           if (TYPE_MODE (type) == mode)
1754             alignment = TYPE_ALIGN (type);
1755           else
1756             alignment = GET_MODE_ALIGNMENT (mode);
1757         }
1758       else
1759         alignment = 0;
1760     }
1761   else
1762     alignment = GET_MODE_ALIGNMENT (mode);
1763
1764   return alignment;
1765 }
1766
1767 /* Layout a function argument according to the AAPCS64 rules.  The rule
1768    numbers refer to the rule numbers in the AAPCS64.  */
1769
1770 static void
1771 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1772                     const_tree type,
1773                     bool named ATTRIBUTE_UNUSED)
1774 {
1775   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1776   int ncrn, nvrn, nregs;
1777   bool allocate_ncrn, allocate_nvrn;
1778   HOST_WIDE_INT size;
1779
1780   /* We need to do this once per argument.  */
1781   if (pcum->aapcs_arg_processed)
1782     return;
1783
1784   pcum->aapcs_arg_processed = true;
1785
1786   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1787   size
1788     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1789                         UNITS_PER_WORD);
1790
1791   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1792   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1793                                                  mode,
1794                                                  type,
1795                                                  &nregs);
1796
1797   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1798      The following code thus handles passing by SIMD/FP registers first.  */
1799
1800   nvrn = pcum->aapcs_nvrn;
1801
1802   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1803      and homogenous short-vector aggregates (HVA).  */
1804   if (allocate_nvrn)
1805     {
1806       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1807         {
1808           pcum->aapcs_nextnvrn = nvrn + nregs;
1809           if (!aarch64_composite_type_p (type, mode))
1810             {
1811               gcc_assert (nregs == 1);
1812               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1813             }
1814           else
1815             {
1816               rtx par;
1817               int i;
1818               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1819               for (i = 0; i < nregs; i++)
1820                 {
1821                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1822                                          V0_REGNUM + nvrn + i);
1823                   tmp = gen_rtx_EXPR_LIST
1824                     (VOIDmode, tmp,
1825                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1826                   XVECEXP (par, 0, i) = tmp;
1827                 }
1828               pcum->aapcs_reg = par;
1829             }
1830           return;
1831         }
1832       else
1833         {
1834           /* C.3 NSRN is set to 8.  */
1835           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1836           goto on_stack;
1837         }
1838     }
1839
1840   ncrn = pcum->aapcs_ncrn;
1841   nregs = size / UNITS_PER_WORD;
1842
1843   /* C6 - C9.  though the sign and zero extension semantics are
1844      handled elsewhere.  This is the case where the argument fits
1845      entirely general registers.  */
1846   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1847     {
1848       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1849
1850       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1851
1852       /* C.8 if the argument has an alignment of 16 then the NGRN is
1853          rounded up to the next even number.  */
1854       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1855         {
1856           ++ncrn;
1857           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1858         }
1859       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1860          A reg is still generated for it, but the caller should be smart
1861          enough not to use it.  */
1862       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1863         {
1864           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1865         }
1866       else
1867         {
1868           rtx par;
1869           int i;
1870
1871           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1872           for (i = 0; i < nregs; i++)
1873             {
1874               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1875               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1876                                        GEN_INT (i * UNITS_PER_WORD));
1877               XVECEXP (par, 0, i) = tmp;
1878             }
1879           pcum->aapcs_reg = par;
1880         }
1881
1882       pcum->aapcs_nextncrn = ncrn + nregs;
1883       return;
1884     }
1885
1886   /* C.11  */
1887   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1888
1889   /* The argument is passed on stack; record the needed number of words for
1890      this argument and align the total size if necessary.  */
1891 on_stack:
1892   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1893   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1894     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1895                                                16 / UNITS_PER_WORD);
1896   return;
1897 }
1898
1899 /* Implement TARGET_FUNCTION_ARG.  */
1900
1901 static rtx
1902 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1903                       const_tree type, bool named)
1904 {
1905   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1906   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1907
1908   if (mode == VOIDmode)
1909     return NULL_RTX;
1910
1911   aarch64_layout_arg (pcum_v, mode, type, named);
1912   return pcum->aapcs_reg;
1913 }
1914
1915 void
1916 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1917                            const_tree fntype ATTRIBUTE_UNUSED,
1918                            rtx libname ATTRIBUTE_UNUSED,
1919                            const_tree fndecl ATTRIBUTE_UNUSED,
1920                            unsigned n_named ATTRIBUTE_UNUSED)
1921 {
1922   pcum->aapcs_ncrn = 0;
1923   pcum->aapcs_nvrn = 0;
1924   pcum->aapcs_nextncrn = 0;
1925   pcum->aapcs_nextnvrn = 0;
1926   pcum->pcs_variant = ARM_PCS_AAPCS64;
1927   pcum->aapcs_reg = NULL_RTX;
1928   pcum->aapcs_arg_processed = false;
1929   pcum->aapcs_stack_words = 0;
1930   pcum->aapcs_stack_size = 0;
1931
1932   return;
1933 }
1934
1935 static void
1936 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1937                               machine_mode mode,
1938                               const_tree type,
1939                               bool named)
1940 {
1941   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1942   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1943     {
1944       aarch64_layout_arg (pcum_v, mode, type, named);
1945       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1946                   != (pcum->aapcs_stack_words != 0));
1947       pcum->aapcs_arg_processed = false;
1948       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1949       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1950       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1951       pcum->aapcs_stack_words = 0;
1952       pcum->aapcs_reg = NULL_RTX;
1953     }
1954 }
1955
1956 bool
1957 aarch64_function_arg_regno_p (unsigned regno)
1958 {
1959   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1960           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1961 }
1962
1963 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1964    PARM_BOUNDARY bits of alignment, but will be given anything up
1965    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1966    that both before and after the layout of each argument, the Next
1967    Stacked Argument Address (NSAA) will have a minimum alignment of
1968    8 bytes.  */
1969
1970 static unsigned int
1971 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1972 {
1973   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1974
1975   if (alignment < PARM_BOUNDARY)
1976     alignment = PARM_BOUNDARY;
1977   if (alignment > STACK_BOUNDARY)
1978     alignment = STACK_BOUNDARY;
1979   return alignment;
1980 }
1981
1982 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1983
1984    Return true if an argument passed on the stack should be padded upwards,
1985    i.e. if the least-significant byte of the stack slot has useful data.
1986
1987    Small aggregate types are placed in the lowest memory address.
1988
1989    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1990
1991 bool
1992 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1993 {
1994   /* On little-endian targets, the least significant byte of every stack
1995      argument is passed at the lowest byte address of the stack slot.  */
1996   if (!BYTES_BIG_ENDIAN)
1997     return true;
1998
1999   /* Otherwise, integral, floating-point and pointer types are padded downward:
2000      the least significant byte of a stack argument is passed at the highest
2001      byte address of the stack slot.  */
2002   if (type
2003       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2004          || POINTER_TYPE_P (type))
2005       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2006     return false;
2007
2008   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
2009   return true;
2010 }
2011
2012 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2013
2014    It specifies padding for the last (may also be the only)
2015    element of a block move between registers and memory.  If
2016    assuming the block is in the memory, padding upward means that
2017    the last element is padded after its highest significant byte,
2018    while in downward padding, the last element is padded at the
2019    its least significant byte side.
2020
2021    Small aggregates and small complex types are always padded
2022    upwards.
2023
2024    We don't need to worry about homogeneous floating-point or
2025    short-vector aggregates; their move is not affected by the
2026    padding direction determined here.  Regardless of endianness,
2027    each element of such an aggregate is put in the least
2028    significant bits of a fp/simd register.
2029
2030    Return !BYTES_BIG_ENDIAN if the least significant byte of the
2031    register has useful data, and return the opposite if the most
2032    significant byte does.  */
2033
2034 bool
2035 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
2036                      bool first ATTRIBUTE_UNUSED)
2037 {
2038
2039   /* Small composite types are always padded upward.  */
2040   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2041     {
2042       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2043                             : GET_MODE_SIZE (mode));
2044       if (size < 2 * UNITS_PER_WORD)
2045         return true;
2046     }
2047
2048   /* Otherwise, use the default padding.  */
2049   return !BYTES_BIG_ENDIAN;
2050 }
2051
2052 static machine_mode
2053 aarch64_libgcc_cmp_return_mode (void)
2054 {
2055   return SImode;
2056 }
2057
2058 static bool
2059 aarch64_frame_pointer_required (void)
2060 {
2061   /* In aarch64_override_options_after_change
2062      flag_omit_leaf_frame_pointer turns off the frame pointer by
2063      default.  Turn it back on now if we've not got a leaf
2064      function.  */
2065   if (flag_omit_leaf_frame_pointer
2066       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2067     return true;
2068
2069   return false;
2070 }
2071
2072 /* Mark the registers that need to be saved by the callee and calculate
2073    the size of the callee-saved registers area and frame record (both FP
2074    and LR may be omitted).  */
2075 static void
2076 aarch64_layout_frame (void)
2077 {
2078   HOST_WIDE_INT offset = 0;
2079   int regno;
2080
2081   if (reload_completed && cfun->machine->frame.laid_out)
2082     return;
2083
2084 #define SLOT_NOT_REQUIRED (-2)
2085 #define SLOT_REQUIRED     (-1)
2086
2087   cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2088   cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2089
2090   /* First mark all the registers that really need to be saved...  */
2091   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2092     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2093
2094   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2095     cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2096
2097   /* ... that includes the eh data registers (if needed)...  */
2098   if (crtl->calls_eh_return)
2099     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2100       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2101         = SLOT_REQUIRED;
2102
2103   /* ... and any callee saved register that dataflow says is live.  */
2104   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2105     if (df_regs_ever_live_p (regno)
2106         && (regno == R30_REGNUM
2107             || !call_used_regs[regno]))
2108       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2109
2110   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2111     if (df_regs_ever_live_p (regno)
2112         && !call_used_regs[regno])
2113       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2114
2115   if (frame_pointer_needed)
2116     {
2117       /* FP and LR are placed in the linkage record.  */
2118       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2119       cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2120       cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2121       cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2122       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2123       offset += 2 * UNITS_PER_WORD;
2124     }
2125
2126   /* Now assign stack slots for them.  */
2127   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2128     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2129       {
2130         cfun->machine->frame.reg_offset[regno] = offset;
2131         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2132           cfun->machine->frame.wb_candidate1 = regno;
2133         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2134           cfun->machine->frame.wb_candidate2 = regno;
2135         offset += UNITS_PER_WORD;
2136       }
2137
2138   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2139     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2140       {
2141         cfun->machine->frame.reg_offset[regno] = offset;
2142         if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2143           cfun->machine->frame.wb_candidate1 = regno;
2144         else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2145                  && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2146           cfun->machine->frame.wb_candidate2 = regno;
2147         offset += UNITS_PER_WORD;
2148       }
2149
2150   cfun->machine->frame.padding0 =
2151     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2152   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2153
2154   cfun->machine->frame.saved_regs_size = offset;
2155
2156   cfun->machine->frame.hard_fp_offset
2157     = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2158                         + get_frame_size ()
2159                         + cfun->machine->frame.saved_regs_size,
2160                         STACK_BOUNDARY / BITS_PER_UNIT);
2161
2162   cfun->machine->frame.frame_size
2163     = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2164                         + crtl->outgoing_args_size,
2165                         STACK_BOUNDARY / BITS_PER_UNIT);
2166
2167   cfun->machine->frame.laid_out = true;
2168 }
2169
2170 static bool
2171 aarch64_register_saved_on_entry (int regno)
2172 {
2173   return cfun->machine->frame.reg_offset[regno] >= 0;
2174 }
2175
2176 static unsigned
2177 aarch64_next_callee_save (unsigned regno, unsigned limit)
2178 {
2179   while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2180     regno ++;
2181   return regno;
2182 }
2183
2184 static void
2185 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2186                            HOST_WIDE_INT adjustment)
2187  {
2188   rtx base_rtx = stack_pointer_rtx;
2189   rtx insn, reg, mem;
2190
2191   reg = gen_rtx_REG (mode, regno);
2192   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2193                             plus_constant (Pmode, base_rtx, -adjustment));
2194   mem = gen_rtx_MEM (mode, mem);
2195
2196   insn = emit_move_insn (mem, reg);
2197   RTX_FRAME_RELATED_P (insn) = 1;
2198 }
2199
2200 static rtx
2201 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2202                           HOST_WIDE_INT adjustment)
2203 {
2204   switch (mode)
2205     {
2206     case DImode:
2207       return gen_storewb_pairdi_di (base, base, reg, reg2,
2208                                     GEN_INT (-adjustment),
2209                                     GEN_INT (UNITS_PER_WORD - adjustment));
2210     case DFmode:
2211       return gen_storewb_pairdf_di (base, base, reg, reg2,
2212                                     GEN_INT (-adjustment),
2213                                     GEN_INT (UNITS_PER_WORD - adjustment));
2214     default:
2215       gcc_unreachable ();
2216     }
2217 }
2218
2219 static void
2220 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2221                          unsigned regno2, HOST_WIDE_INT adjustment)
2222 {
2223   rtx_insn *insn;
2224   rtx reg1 = gen_rtx_REG (mode, regno1);
2225   rtx reg2 = gen_rtx_REG (mode, regno2);
2226
2227   insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2228                                               reg2, adjustment));
2229   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2230   RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2231   RTX_FRAME_RELATED_P (insn) = 1;
2232 }
2233
2234 static rtx
2235 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2236                          HOST_WIDE_INT adjustment)
2237 {
2238   switch (mode)
2239     {
2240     case DImode:
2241       return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2242                                    GEN_INT (UNITS_PER_WORD));
2243     case DFmode:
2244       return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2245                                    GEN_INT (UNITS_PER_WORD));
2246     default:
2247       gcc_unreachable ();
2248     }
2249 }
2250
2251 static rtx
2252 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2253                         rtx reg2)
2254 {
2255   switch (mode)
2256     {
2257     case DImode:
2258       return gen_store_pairdi (mem1, reg1, mem2, reg2);
2259
2260     case DFmode:
2261       return gen_store_pairdf (mem1, reg1, mem2, reg2);
2262
2263     default:
2264       gcc_unreachable ();
2265     }
2266 }
2267
2268 static rtx
2269 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2270                        rtx mem2)
2271 {
2272   switch (mode)
2273     {
2274     case DImode:
2275       return gen_load_pairdi (reg1, mem1, reg2, mem2);
2276
2277     case DFmode:
2278       return gen_load_pairdf (reg1, mem1, reg2, mem2);
2279
2280     default:
2281       gcc_unreachable ();
2282     }
2283 }
2284
2285
2286 static void
2287 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2288                            unsigned start, unsigned limit, bool skip_wb)
2289 {
2290   rtx_insn *insn;
2291   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2292                                                  ? gen_frame_mem : gen_rtx_MEM);
2293   unsigned regno;
2294   unsigned regno2;
2295
2296   for (regno = aarch64_next_callee_save (start, limit);
2297        regno <= limit;
2298        regno = aarch64_next_callee_save (regno + 1, limit))
2299     {
2300       rtx reg, mem;
2301       HOST_WIDE_INT offset;
2302
2303       if (skip_wb
2304           && (regno == cfun->machine->frame.wb_candidate1
2305               || regno == cfun->machine->frame.wb_candidate2))
2306         continue;
2307
2308       reg = gen_rtx_REG (mode, regno);
2309       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2310       mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2311                                               offset));
2312
2313       regno2 = aarch64_next_callee_save (regno + 1, limit);
2314
2315       if (regno2 <= limit
2316           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2317               == cfun->machine->frame.reg_offset[regno2]))
2318
2319         {
2320           rtx reg2 = gen_rtx_REG (mode, regno2);
2321           rtx mem2;
2322
2323           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2324           mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2325                                                    offset));
2326           insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2327                                                     reg2));
2328
2329           /* The first part of a frame-related parallel insn is
2330              always assumed to be relevant to the frame
2331              calculations; subsequent parts, are only
2332              frame-related if explicitly marked.  */
2333           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2334           regno = regno2;
2335         }
2336       else
2337         insn = emit_move_insn (mem, reg);
2338
2339       RTX_FRAME_RELATED_P (insn) = 1;
2340     }
2341 }
2342
2343 static void
2344 aarch64_restore_callee_saves (machine_mode mode,
2345                               HOST_WIDE_INT start_offset, unsigned start,
2346                               unsigned limit, bool skip_wb, rtx *cfi_ops)
2347 {
2348   rtx base_rtx = stack_pointer_rtx;
2349   rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2350                                                  ? gen_frame_mem : gen_rtx_MEM);
2351   unsigned regno;
2352   unsigned regno2;
2353   HOST_WIDE_INT offset;
2354
2355   for (regno = aarch64_next_callee_save (start, limit);
2356        regno <= limit;
2357        regno = aarch64_next_callee_save (regno + 1, limit))
2358     {
2359       rtx reg, mem;
2360
2361       if (skip_wb
2362           && (regno == cfun->machine->frame.wb_candidate1
2363               || regno == cfun->machine->frame.wb_candidate2))
2364         continue;
2365
2366       reg = gen_rtx_REG (mode, regno);
2367       offset = start_offset + cfun->machine->frame.reg_offset[regno];
2368       mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2369
2370       regno2 = aarch64_next_callee_save (regno + 1, limit);
2371
2372       if (regno2 <= limit
2373           && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2374               == cfun->machine->frame.reg_offset[regno2]))
2375         {
2376           rtx reg2 = gen_rtx_REG (mode, regno2);
2377           rtx mem2;
2378
2379           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2380           mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2381           emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2382
2383           *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2384           regno = regno2;
2385         }
2386       else
2387         emit_move_insn (reg, mem);
2388       *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2389     }
2390 }
2391
2392 /* AArch64 stack frames generated by this compiler look like:
2393
2394         +-------------------------------+
2395         |                               |
2396         |  incoming stack arguments     |
2397         |                               |
2398         +-------------------------------+
2399         |                               | <-- incoming stack pointer (aligned)
2400         |  callee-allocated save area   |
2401         |  for register varargs         |
2402         |                               |
2403         +-------------------------------+
2404         |  local variables              | <-- frame_pointer_rtx
2405         |                               |
2406         +-------------------------------+
2407         |  padding0                     | \
2408         +-------------------------------+  |
2409         |  callee-saved registers       |  | frame.saved_regs_size
2410         +-------------------------------+  |
2411         |  LR'                          |  |
2412         +-------------------------------+  |
2413         |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
2414         +-------------------------------+
2415         |  dynamic allocation           |
2416         +-------------------------------+
2417         |  padding                      |
2418         +-------------------------------+
2419         |  outgoing stack arguments     | <-- arg_pointer
2420         |                               |
2421         +-------------------------------+
2422         |                               | <-- stack_pointer_rtx (aligned)
2423
2424    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2425    but leave frame_pointer_rtx and hard_frame_pointer_rtx
2426    unchanged.  */
2427
2428 /* Generate the prologue instructions for entry into a function.
2429    Establish the stack frame by decreasing the stack pointer with a
2430    properly calculated size and, if necessary, create a frame record
2431    filled with the values of LR and previous frame pointer.  The
2432    current FP is also set up if it is in use.  */
2433
2434 void
2435 aarch64_expand_prologue (void)
2436 {
2437   /* sub sp, sp, #<frame_size>
2438      stp {fp, lr}, [sp, #<frame_size> - 16]
2439      add fp, sp, #<frame_size> - hardfp_offset
2440      stp {cs_reg}, [fp, #-16] etc.
2441
2442      sub sp, sp, <final_adjustment_if_any>
2443   */
2444   HOST_WIDE_INT frame_size, offset;
2445   HOST_WIDE_INT fp_offset;              /* Offset from hard FP to SP.  */
2446   HOST_WIDE_INT hard_fp_offset;
2447   rtx_insn *insn;
2448
2449   aarch64_layout_frame ();
2450
2451   offset = frame_size = cfun->machine->frame.frame_size;
2452   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2453   fp_offset = frame_size - hard_fp_offset;
2454
2455   if (flag_stack_usage_info)
2456     current_function_static_stack_size = frame_size;
2457
2458   /* Store pairs and load pairs have a range only -512 to 504.  */
2459   if (offset >= 512)
2460     {
2461       /* When the frame has a large size, an initial decrease is done on
2462          the stack pointer to jump over the callee-allocated save area for
2463          register varargs, the local variable area and/or the callee-saved
2464          register area.  This will allow the pre-index write-back
2465          store pair instructions to be used for setting up the stack frame
2466          efficiently.  */
2467       offset = hard_fp_offset;
2468       if (offset >= 512)
2469         offset = cfun->machine->frame.saved_regs_size;
2470
2471       frame_size -= (offset + crtl->outgoing_args_size);
2472       fp_offset = 0;
2473
2474       if (frame_size >= 0x1000000)
2475         {
2476           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2477           emit_move_insn (op0, GEN_INT (-frame_size));
2478           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2479
2480           add_reg_note (insn, REG_CFA_ADJUST_CFA,
2481                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2482                                      plus_constant (Pmode, stack_pointer_rtx,
2483                                                     -frame_size)));
2484           RTX_FRAME_RELATED_P (insn) = 1;
2485         }
2486       else if (frame_size > 0)
2487         {
2488           int hi_ofs = frame_size & 0xfff000;
2489           int lo_ofs = frame_size & 0x000fff;
2490
2491           if (hi_ofs)
2492             {
2493               insn = emit_insn (gen_add2_insn
2494                                 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2495               RTX_FRAME_RELATED_P (insn) = 1;
2496             }
2497           if (lo_ofs)
2498             {
2499               insn = emit_insn (gen_add2_insn
2500                                 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2501               RTX_FRAME_RELATED_P (insn) = 1;
2502             }
2503         }
2504     }
2505   else
2506     frame_size = -1;
2507
2508   if (offset > 0)
2509     {
2510       bool skip_wb = false;
2511
2512       if (frame_pointer_needed)
2513         {
2514           skip_wb = true;
2515
2516           if (fp_offset)
2517             {
2518               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2519                                                GEN_INT (-offset)));
2520               RTX_FRAME_RELATED_P (insn) = 1;
2521
2522               aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2523                                          R30_REGNUM, false);
2524             }
2525           else
2526             aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2527
2528           /* Set up frame pointer to point to the location of the
2529              previous frame pointer on the stack.  */
2530           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2531                                            stack_pointer_rtx,
2532                                            GEN_INT (fp_offset)));
2533           RTX_FRAME_RELATED_P (insn) = 1;
2534           emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2535         }
2536       else
2537         {
2538           unsigned reg1 = cfun->machine->frame.wb_candidate1;
2539           unsigned reg2 = cfun->machine->frame.wb_candidate2;
2540
2541           if (fp_offset
2542               || reg1 == FIRST_PSEUDO_REGISTER
2543               || (reg2 == FIRST_PSEUDO_REGISTER
2544                   && offset >= 256))
2545             {
2546               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2547                                                GEN_INT (-offset)));
2548               RTX_FRAME_RELATED_P (insn) = 1;
2549             }
2550           else
2551             {
2552               machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2553
2554               skip_wb = true;
2555
2556               if (reg2 == FIRST_PSEUDO_REGISTER)
2557                 aarch64_pushwb_single_reg (mode1, reg1, offset);
2558               else
2559                 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2560             }
2561         }
2562
2563       aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2564                                  skip_wb);
2565       aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2566                                  skip_wb);
2567     }
2568
2569   /* when offset >= 512,
2570      sub sp, sp, #<outgoing_args_size> */
2571   if (frame_size > -1)
2572     {
2573       if (crtl->outgoing_args_size > 0)
2574         {
2575           insn = emit_insn (gen_add2_insn
2576                             (stack_pointer_rtx,
2577                              GEN_INT (- crtl->outgoing_args_size)));
2578           RTX_FRAME_RELATED_P (insn) = 1;
2579         }
2580     }
2581 }
2582
2583 /* Return TRUE if we can use a simple_return insn.
2584
2585    This function checks whether the callee saved stack is empty, which
2586    means no restore actions are need. The pro_and_epilogue will use
2587    this to check whether shrink-wrapping opt is feasible.  */
2588
2589 bool
2590 aarch64_use_return_insn_p (void)
2591 {
2592   if (!reload_completed)
2593     return false;
2594
2595   if (crtl->profile)
2596     return false;
2597
2598   aarch64_layout_frame ();
2599
2600   return cfun->machine->frame.frame_size == 0;
2601 }
2602
2603 /* Generate the epilogue instructions for returning from a function.  */
2604 void
2605 aarch64_expand_epilogue (bool for_sibcall)
2606 {
2607   HOST_WIDE_INT frame_size, offset;
2608   HOST_WIDE_INT fp_offset;
2609   HOST_WIDE_INT hard_fp_offset;
2610   rtx_insn *insn;
2611   /* We need to add memory barrier to prevent read from deallocated stack.  */
2612   bool need_barrier_p = (get_frame_size () != 0
2613                          || cfun->machine->frame.saved_varargs_size);
2614
2615   aarch64_layout_frame ();
2616
2617   offset = frame_size = cfun->machine->frame.frame_size;
2618   hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2619   fp_offset = frame_size - hard_fp_offset;
2620
2621   /* Store pairs and load pairs have a range only -512 to 504.  */
2622   if (offset >= 512)
2623     {
2624       offset = hard_fp_offset;
2625       if (offset >= 512)
2626         offset = cfun->machine->frame.saved_regs_size;
2627
2628       frame_size -= (offset + crtl->outgoing_args_size);
2629       fp_offset = 0;
2630       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2631         {
2632           insn = emit_insn (gen_add2_insn
2633                             (stack_pointer_rtx,
2634                              GEN_INT (crtl->outgoing_args_size)));
2635           RTX_FRAME_RELATED_P (insn) = 1;
2636         }
2637     }
2638   else
2639     frame_size = -1;
2640
2641   /* If there were outgoing arguments or we've done dynamic stack
2642      allocation, then restore the stack pointer from the frame
2643      pointer.  This is at most one insn and more efficient than using
2644      GCC's internal mechanism.  */
2645   if (frame_pointer_needed
2646       && (crtl->outgoing_args_size || cfun->calls_alloca))
2647     {
2648       if (cfun->calls_alloca)
2649         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2650
2651       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2652                                        hard_frame_pointer_rtx,
2653                                        GEN_INT (0)));
2654       offset = offset - fp_offset;
2655     }
2656
2657   if (offset > 0)
2658     {
2659       unsigned reg1 = cfun->machine->frame.wb_candidate1;
2660       unsigned reg2 = cfun->machine->frame.wb_candidate2;
2661       bool skip_wb = true;
2662       rtx cfi_ops = NULL;
2663
2664       if (frame_pointer_needed)
2665         fp_offset = 0;
2666       else if (fp_offset
2667                || reg1 == FIRST_PSEUDO_REGISTER
2668                || (reg2 == FIRST_PSEUDO_REGISTER
2669                    && offset >= 256))
2670         skip_wb = false;
2671
2672       aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2673                                     skip_wb, &cfi_ops);
2674       aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2675                                     skip_wb, &cfi_ops);
2676
2677       if (need_barrier_p)
2678         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2679
2680       if (skip_wb)
2681         {
2682           machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2683           rtx rreg1 = gen_rtx_REG (mode1, reg1);
2684
2685           cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2686           if (reg2 == FIRST_PSEUDO_REGISTER)
2687             {
2688               rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2689               mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2690               mem = gen_rtx_MEM (mode1, mem);
2691               insn = emit_move_insn (rreg1, mem);
2692             }
2693           else
2694             {
2695               rtx rreg2 = gen_rtx_REG (mode1, reg2);
2696
2697               cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2698               insn = emit_insn (aarch64_gen_loadwb_pair
2699                                 (mode1, stack_pointer_rtx, rreg1,
2700                                  rreg2, offset));
2701             }
2702         }
2703       else
2704         {
2705           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2706                                            GEN_INT (offset)));
2707         }
2708
2709       /* Reset the CFA to be SP + FRAME_SIZE.  */
2710       rtx new_cfa = stack_pointer_rtx;
2711       if (frame_size > 0)
2712         new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2713       cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2714       REG_NOTES (insn) = cfi_ops;
2715       RTX_FRAME_RELATED_P (insn) = 1;
2716     }
2717
2718   if (frame_size > 0)
2719     {
2720       if (need_barrier_p)
2721         emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2722
2723       if (frame_size >= 0x1000000)
2724         {
2725           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2726           emit_move_insn (op0, GEN_INT (frame_size));
2727           insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2728         }
2729       else
2730         {
2731           int hi_ofs = frame_size & 0xfff000;
2732           int lo_ofs = frame_size & 0x000fff;
2733
2734           if (hi_ofs && lo_ofs)
2735             {
2736               insn = emit_insn (gen_add2_insn
2737                                 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2738               RTX_FRAME_RELATED_P (insn) = 1;
2739               frame_size = lo_ofs;
2740             }
2741           insn = emit_insn (gen_add2_insn
2742                             (stack_pointer_rtx, GEN_INT (frame_size)));
2743         }
2744
2745       /* Reset the CFA to be SP + 0.  */
2746       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2747       RTX_FRAME_RELATED_P (insn) = 1;
2748     }
2749
2750   /* Stack adjustment for exception handler.  */
2751   if (crtl->calls_eh_return)
2752     {
2753       /* We need to unwind the stack by the offset computed by
2754          EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
2755          to be SP; letting the CFA move during this adjustment
2756          is just as correct as retaining the CFA from the body
2757          of the function.  Therefore, do nothing special.  */
2758       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2759     }
2760
2761   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2762   if (!for_sibcall)
2763     emit_jump_insn (ret_rtx);
2764 }
2765
2766 /* Return the place to copy the exception unwinding return address to.
2767    This will probably be a stack slot, but could (in theory be the
2768    return register).  */
2769 rtx
2770 aarch64_final_eh_return_addr (void)
2771 {
2772   HOST_WIDE_INT fp_offset;
2773
2774   aarch64_layout_frame ();
2775
2776   fp_offset = cfun->machine->frame.frame_size
2777               - cfun->machine->frame.hard_fp_offset;
2778
2779   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2780     return gen_rtx_REG (DImode, LR_REGNUM);
2781
2782   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2783      result in a store to save LR introduced by builtin_eh_return () being
2784      incorrectly deleted because the alias is not detected.
2785      So in the calculation of the address to copy the exception unwinding
2786      return address to, we note 2 cases.
2787      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2788      we return a SP-relative location since all the addresses are SP-relative
2789      in this case.  This prevents the store from being optimized away.
2790      If the fp_offset is not 0, then the addresses will be FP-relative and
2791      therefore we return a FP-relative location.  */
2792
2793   if (frame_pointer_needed)
2794     {
2795       if (fp_offset)
2796         return gen_frame_mem (DImode,
2797                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2798       else
2799         return gen_frame_mem (DImode,
2800                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2801     }
2802
2803   /* If FP is not needed, we calculate the location of LR, which would be
2804      at the top of the saved registers block.  */
2805
2806   return gen_frame_mem (DImode,
2807                         plus_constant (Pmode,
2808                                        stack_pointer_rtx,
2809                                        fp_offset
2810                                        + cfun->machine->frame.saved_regs_size
2811                                        - 2 * UNITS_PER_WORD));
2812 }
2813
2814 /* Possibly output code to build up a constant in a register.  For
2815    the benefit of the costs infrastructure, returns the number of
2816    instructions which would be emitted.  GENERATE inhibits or
2817    enables code generation.  */
2818
2819 static int
2820 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2821 {
2822   int insns = 0;
2823
2824   if (aarch64_bitmask_imm (val, DImode))
2825     {
2826       if (generate)
2827         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2828       insns = 1;
2829     }
2830   else
2831     {
2832       int i;
2833       int ncount = 0;
2834       int zcount = 0;
2835       HOST_WIDE_INT valp = val >> 16;
2836       HOST_WIDE_INT valm;
2837       HOST_WIDE_INT tval;
2838
2839       for (i = 16; i < 64; i += 16)
2840         {
2841           valm = (valp & 0xffff);
2842
2843           if (valm != 0)
2844             ++ zcount;
2845
2846           if (valm != 0xffff)
2847             ++ ncount;
2848
2849           valp >>= 16;
2850         }
2851
2852       /* zcount contains the number of additional MOVK instructions
2853          required if the constant is built up with an initial MOVZ instruction,
2854          while ncount is the number of MOVK instructions required if starting
2855          with a MOVN instruction.  Choose the sequence that yields the fewest
2856          number of instructions, preferring MOVZ instructions when they are both
2857          the same.  */
2858       if (ncount < zcount)
2859         {
2860           if (generate)
2861             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2862                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2863           tval = 0xffff;
2864           insns++;
2865         }
2866       else
2867         {
2868           if (generate)
2869             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2870                             GEN_INT (val & 0xffff));
2871           tval = 0;
2872           insns++;
2873         }
2874
2875       val >>= 16;
2876
2877       for (i = 16; i < 64; i += 16)
2878         {
2879           if ((val & 0xffff) != tval)
2880             {
2881               if (generate)
2882                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2883                                            GEN_INT (i),
2884                                            GEN_INT (val & 0xffff)));
2885               insns++;
2886             }
2887           val >>= 16;
2888         }
2889     }
2890   return insns;
2891 }
2892
2893 static void
2894 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2895 {
2896   HOST_WIDE_INT mdelta = delta;
2897   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2898   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2899
2900   if (mdelta < 0)
2901     mdelta = -mdelta;
2902
2903   if (mdelta >= 4096 * 4096)
2904     {
2905       (void) aarch64_build_constant (scratchreg, delta, true);
2906       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2907     }
2908   else if (mdelta > 0)
2909     {
2910       if (mdelta >= 4096)
2911         {
2912           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2913           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2914           if (delta < 0)
2915             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2916                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2917           else
2918             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2919                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2920         }
2921       if (mdelta % 4096 != 0)
2922         {
2923           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2924           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2925                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2926         }
2927     }
2928 }
2929
2930 /* Output code to add DELTA to the first argument, and then jump
2931    to FUNCTION.  Used for C++ multiple inheritance.  */
2932 static void
2933 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2934                          HOST_WIDE_INT delta,
2935                          HOST_WIDE_INT vcall_offset,
2936                          tree function)
2937 {
2938   /* The this pointer is always in x0.  Note that this differs from
2939      Arm where the this pointer maybe bumped to r1 if r0 is required
2940      to return a pointer to an aggregate.  On AArch64 a result value
2941      pointer will be in x8.  */
2942   int this_regno = R0_REGNUM;
2943   rtx this_rtx, temp0, temp1, addr, funexp;
2944   rtx_insn *insn;
2945
2946   reload_completed = 1;
2947   emit_note (NOTE_INSN_PROLOGUE_END);
2948
2949   if (vcall_offset == 0)
2950     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2951   else
2952     {
2953       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2954
2955       this_rtx = gen_rtx_REG (Pmode, this_regno);
2956       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2957       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2958
2959       addr = this_rtx;
2960       if (delta != 0)
2961         {
2962           if (delta >= -256 && delta < 256)
2963             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2964                                        plus_constant (Pmode, this_rtx, delta));
2965           else
2966             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2967         }
2968
2969       if (Pmode == ptr_mode)
2970         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2971       else
2972         aarch64_emit_move (temp0,
2973                            gen_rtx_ZERO_EXTEND (Pmode,
2974                                                 gen_rtx_MEM (ptr_mode, addr)));
2975
2976       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2977           addr = plus_constant (Pmode, temp0, vcall_offset);
2978       else
2979         {
2980           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2981           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2982         }
2983
2984       if (Pmode == ptr_mode)
2985         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2986       else
2987         aarch64_emit_move (temp1,
2988                            gen_rtx_SIGN_EXTEND (Pmode,
2989                                                 gen_rtx_MEM (ptr_mode, addr)));
2990
2991       emit_insn (gen_add2_insn (this_rtx, temp1));
2992     }
2993
2994   /* Generate a tail call to the target function.  */
2995   if (!TREE_USED (function))
2996     {
2997       assemble_external (function);
2998       TREE_USED (function) = 1;
2999     }
3000   funexp = XEXP (DECL_RTL (function), 0);
3001   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3002   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3003   SIBLING_CALL_P (insn) = 1;
3004
3005   insn = get_insns ();
3006   shorten_branches (insn);
3007   final_start_function (insn, file, 1);
3008   final (insn, file, 1);
3009   final_end_function ();
3010
3011   /* Stop pretending to be a post-reload pass.  */
3012   reload_completed = 0;
3013 }
3014
3015 static bool
3016 aarch64_tls_referenced_p (rtx x)
3017 {
3018   if (!TARGET_HAVE_TLS)
3019     return false;
3020   subrtx_iterator::array_type array;
3021   FOR_EACH_SUBRTX (iter, array, x, ALL)
3022     {
3023       const_rtx x = *iter;
3024       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3025         return true;
3026       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3027          TLS offsets, not real symbol references.  */
3028       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3029         iter.skip_subrtxes ();
3030     }
3031   return false;
3032 }
3033
3034
3035 static int
3036 aarch64_bitmasks_cmp (const void *i1, const void *i2)
3037 {
3038   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3039   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3040
3041   if (*imm1 < *imm2)
3042     return -1;
3043   if (*imm1 > *imm2)
3044     return +1;
3045   return 0;
3046 }
3047
3048
3049 static void
3050 aarch64_build_bitmask_table (void)
3051 {
3052   unsigned HOST_WIDE_INT mask, imm;
3053   unsigned int log_e, e, s, r;
3054   unsigned int nimms = 0;
3055
3056   for (log_e = 1; log_e <= 6; log_e++)
3057     {
3058       e = 1 << log_e;
3059       if (e == 64)
3060         mask = ~(HOST_WIDE_INT) 0;
3061       else
3062         mask = ((HOST_WIDE_INT) 1 << e) - 1;
3063       for (s = 1; s < e; s++)
3064         {
3065           for (r = 0; r < e; r++)
3066             {
3067               /* set s consecutive bits to 1 (s < 64) */
3068               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3069               /* rotate right by r */
3070               if (r != 0)
3071                 imm = ((imm >> r) | (imm << (e - r))) & mask;
3072               /* replicate the constant depending on SIMD size */
3073               switch (log_e) {
3074               case 1: imm |= (imm <<  2);
3075               case 2: imm |= (imm <<  4);
3076               case 3: imm |= (imm <<  8);
3077               case 4: imm |= (imm << 16);
3078               case 5: imm |= (imm << 32);
3079               case 6:
3080                 break;
3081               default:
3082                 gcc_unreachable ();
3083               }
3084               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3085               aarch64_bitmasks[nimms++] = imm;
3086             }
3087         }
3088     }
3089
3090   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3091   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3092          aarch64_bitmasks_cmp);
3093 }
3094
3095
3096 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3097    a left shift of 0 or 12 bits.  */
3098 bool
3099 aarch64_uimm12_shift (HOST_WIDE_INT val)
3100 {
3101   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3102           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3103           );
3104 }
3105
3106
3107 /* Return true if val is an immediate that can be loaded into a
3108    register by a MOVZ instruction.  */
3109 static bool
3110 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3111 {
3112   if (GET_MODE_SIZE (mode) > 4)
3113     {
3114       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3115           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3116         return 1;
3117     }
3118   else
3119     {
3120       /* Ignore sign extension.  */
3121       val &= (HOST_WIDE_INT) 0xffffffff;
3122     }
3123   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3124           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3125 }
3126
3127
3128 /* Return true if val is a valid bitmask immediate.  */
3129 bool
3130 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3131 {
3132   if (GET_MODE_SIZE (mode) < 8)
3133     {
3134       /* Replicate bit pattern.  */
3135       val &= (HOST_WIDE_INT) 0xffffffff;
3136       val |= val << 32;
3137     }
3138   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3139                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3140 }
3141
3142
3143 /* Return true if val is an immediate that can be loaded into a
3144    register in a single instruction.  */
3145 bool
3146 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3147 {
3148   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3149     return 1;
3150   return aarch64_bitmask_imm (val, mode);
3151 }
3152
3153 static bool
3154 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3155 {
3156   rtx base, offset;
3157
3158   if (GET_CODE (x) == HIGH)
3159     return true;
3160
3161   split_const (x, &base, &offset);
3162   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3163     {
3164       if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3165           != SYMBOL_FORCE_TO_MEM)
3166         return true;
3167       else
3168         /* Avoid generating a 64-bit relocation in ILP32; leave
3169            to aarch64_expand_mov_immediate to handle it properly.  */
3170         return mode != ptr_mode;
3171     }
3172
3173   return aarch64_tls_referenced_p (x);
3174 }
3175
3176 /* Return true if register REGNO is a valid index register.
3177    STRICT_P is true if REG_OK_STRICT is in effect.  */
3178
3179 bool
3180 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3181 {
3182   if (!HARD_REGISTER_NUM_P (regno))
3183     {
3184       if (!strict_p)
3185         return true;
3186
3187       if (!reg_renumber)
3188         return false;
3189
3190       regno = reg_renumber[regno];
3191     }
3192   return GP_REGNUM_P (regno);
3193 }
3194
3195 /* Return true if register REGNO is a valid base register for mode MODE.
3196    STRICT_P is true if REG_OK_STRICT is in effect.  */
3197
3198 bool
3199 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3200 {
3201   if (!HARD_REGISTER_NUM_P (regno))
3202     {
3203       if (!strict_p)
3204         return true;
3205
3206       if (!reg_renumber)
3207         return false;
3208
3209       regno = reg_renumber[regno];
3210     }
3211
3212   /* The fake registers will be eliminated to either the stack or
3213      hard frame pointer, both of which are usually valid base registers.
3214      Reload deals with the cases where the eliminated form isn't valid.  */
3215   return (GP_REGNUM_P (regno)
3216           || regno == SP_REGNUM
3217           || regno == FRAME_POINTER_REGNUM
3218           || regno == ARG_POINTER_REGNUM);
3219 }
3220
3221 /* Return true if X is a valid base register for mode MODE.
3222    STRICT_P is true if REG_OK_STRICT is in effect.  */
3223
3224 static bool
3225 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3226 {
3227   if (!strict_p && GET_CODE (x) == SUBREG)
3228     x = SUBREG_REG (x);
3229
3230   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3231 }
3232
3233 /* Return true if address offset is a valid index.  If it is, fill in INFO
3234    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
3235
3236 static bool
3237 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3238                         machine_mode mode, bool strict_p)
3239 {
3240   enum aarch64_address_type type;
3241   rtx index;
3242   int shift;
3243
3244   /* (reg:P) */
3245   if ((REG_P (x) || GET_CODE (x) == SUBREG)
3246       && GET_MODE (x) == Pmode)
3247     {
3248       type = ADDRESS_REG_REG;
3249       index = x;
3250       shift = 0;
3251     }
3252   /* (sign_extend:DI (reg:SI)) */
3253   else if ((GET_CODE (x) == SIGN_EXTEND
3254             || GET_CODE (x) == ZERO_EXTEND)
3255            && GET_MODE (x) == DImode
3256            && GET_MODE (XEXP (x, 0)) == SImode)
3257     {
3258       type = (GET_CODE (x) == SIGN_EXTEND)
3259         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3260       index = XEXP (x, 0);
3261       shift = 0;
3262     }
3263   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3264   else if (GET_CODE (x) == MULT
3265            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3266                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3267            && GET_MODE (XEXP (x, 0)) == DImode
3268            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3269            && CONST_INT_P (XEXP (x, 1)))
3270     {
3271       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3272         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3273       index = XEXP (XEXP (x, 0), 0);
3274       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3275     }
3276   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3277   else if (GET_CODE (x) == ASHIFT
3278            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3279                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3280            && GET_MODE (XEXP (x, 0)) == DImode
3281            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3282            && CONST_INT_P (XEXP (x, 1)))
3283     {
3284       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3285         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3286       index = XEXP (XEXP (x, 0), 0);
3287       shift = INTVAL (XEXP (x, 1));
3288     }
3289   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3290   else if ((GET_CODE (x) == SIGN_EXTRACT
3291             || GET_CODE (x) == ZERO_EXTRACT)
3292            && GET_MODE (x) == DImode
3293            && GET_CODE (XEXP (x, 0)) == MULT
3294            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3295            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3296     {
3297       type = (GET_CODE (x) == SIGN_EXTRACT)
3298         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3299       index = XEXP (XEXP (x, 0), 0);
3300       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3301       if (INTVAL (XEXP (x, 1)) != 32 + shift
3302           || INTVAL (XEXP (x, 2)) != 0)
3303         shift = -1;
3304     }
3305   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3306      (const_int 0xffffffff<<shift)) */
3307   else if (GET_CODE (x) == AND
3308            && GET_MODE (x) == DImode
3309            && GET_CODE (XEXP (x, 0)) == MULT
3310            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3311            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3312            && CONST_INT_P (XEXP (x, 1)))
3313     {
3314       type = ADDRESS_REG_UXTW;
3315       index = XEXP (XEXP (x, 0), 0);
3316       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3317       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3318         shift = -1;
3319     }
3320   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3321   else if ((GET_CODE (x) == SIGN_EXTRACT
3322             || GET_CODE (x) == ZERO_EXTRACT)
3323            && GET_MODE (x) == DImode
3324            && GET_CODE (XEXP (x, 0)) == ASHIFT
3325            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3326            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3327     {
3328       type = (GET_CODE (x) == SIGN_EXTRACT)
3329         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3330       index = XEXP (XEXP (x, 0), 0);
3331       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3332       if (INTVAL (XEXP (x, 1)) != 32 + shift
3333           || INTVAL (XEXP (x, 2)) != 0)
3334         shift = -1;
3335     }
3336   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3337      (const_int 0xffffffff<<shift)) */
3338   else if (GET_CODE (x) == AND
3339            && GET_MODE (x) == DImode
3340            && GET_CODE (XEXP (x, 0)) == ASHIFT
3341            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3342            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3343            && CONST_INT_P (XEXP (x, 1)))
3344     {
3345       type = ADDRESS_REG_UXTW;
3346       index = XEXP (XEXP (x, 0), 0);
3347       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3348       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3349         shift = -1;
3350     }
3351   /* (mult:P (reg:P) (const_int scale)) */
3352   else if (GET_CODE (x) == MULT
3353            && GET_MODE (x) == Pmode
3354            && GET_MODE (XEXP (x, 0)) == Pmode
3355            && CONST_INT_P (XEXP (x, 1)))
3356     {
3357       type = ADDRESS_REG_REG;
3358       index = XEXP (x, 0);
3359       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3360     }
3361   /* (ashift:P (reg:P) (const_int shift)) */
3362   else if (GET_CODE (x) == ASHIFT
3363            && GET_MODE (x) == Pmode
3364            && GET_MODE (XEXP (x, 0)) == Pmode
3365            && CONST_INT_P (XEXP (x, 1)))
3366     {
3367       type = ADDRESS_REG_REG;
3368       index = XEXP (x, 0);
3369       shift = INTVAL (XEXP (x, 1));
3370     }
3371   else
3372     return false;
3373
3374   if (GET_CODE (index) == SUBREG)
3375     index = SUBREG_REG (index);
3376
3377   if ((shift == 0 ||
3378        (shift > 0 && shift <= 3
3379         && (1 << shift) == GET_MODE_SIZE (mode)))
3380       && REG_P (index)
3381       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3382     {
3383       info->type = type;
3384       info->offset = index;
3385       info->shift = shift;
3386       return true;
3387     }
3388
3389   return false;
3390 }
3391
3392 bool
3393 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3394 {
3395   return (offset >= -64 * GET_MODE_SIZE (mode)
3396           && offset < 64 * GET_MODE_SIZE (mode)
3397           && offset % GET_MODE_SIZE (mode) == 0);
3398 }
3399
3400 static inline bool
3401 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3402                                HOST_WIDE_INT offset)
3403 {
3404   return offset >= -256 && offset < 256;
3405 }
3406
3407 static inline bool
3408 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3409 {
3410   return (offset >= 0
3411           && offset < 4096 * GET_MODE_SIZE (mode)
3412           && offset % GET_MODE_SIZE (mode) == 0);
3413 }
3414
3415 /* Return true if X is a valid address for machine mode MODE.  If it is,
3416    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3417    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3418
3419 static bool
3420 aarch64_classify_address (struct aarch64_address_info *info,
3421                           rtx x, machine_mode mode,
3422                           RTX_CODE outer_code, bool strict_p)
3423 {
3424   enum rtx_code code = GET_CODE (x);
3425   rtx op0, op1;
3426   bool allow_reg_index_p =
3427     outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3428                                || aarch64_vector_mode_supported_p (mode));
3429   /* Don't support anything other than POST_INC or REG addressing for
3430      AdvSIMD.  */
3431   if (aarch64_vect_struct_mode_p (mode)
3432       && (code != POST_INC && code != REG))
3433     return false;
3434
3435   switch (code)
3436     {
3437     case REG:
3438     case SUBREG:
3439       info->type = ADDRESS_REG_IMM;
3440       info->base = x;
3441       info->offset = const0_rtx;
3442       return aarch64_base_register_rtx_p (x, strict_p);
3443
3444     case PLUS:
3445       op0 = XEXP (x, 0);
3446       op1 = XEXP (x, 1);
3447
3448       if (! strict_p
3449           && REG_P (op0)
3450           && (op0 == virtual_stack_vars_rtx
3451               || op0 == frame_pointer_rtx
3452               || op0 == arg_pointer_rtx)
3453           && CONST_INT_P (op1))
3454         {
3455           info->type = ADDRESS_REG_IMM;
3456           info->base = op0;
3457           info->offset = op1;
3458
3459           return true;
3460         }
3461
3462       if (GET_MODE_SIZE (mode) != 0
3463           && CONST_INT_P (op1)
3464           && aarch64_base_register_rtx_p (op0, strict_p))
3465         {
3466           HOST_WIDE_INT offset = INTVAL (op1);
3467
3468           info->type = ADDRESS_REG_IMM;
3469           info->base = op0;
3470           info->offset = op1;
3471
3472           /* TImode and TFmode values are allowed in both pairs of X
3473              registers and individual Q registers.  The available
3474              address modes are:
3475              X,X: 7-bit signed scaled offset
3476              Q:   9-bit signed offset
3477              We conservatively require an offset representable in either mode.
3478            */
3479           if (mode == TImode || mode == TFmode)
3480             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3481                     && offset_9bit_signed_unscaled_p (mode, offset));
3482
3483           if (outer_code == PARALLEL)
3484             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3485                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3486           else
3487             return (offset_9bit_signed_unscaled_p (mode, offset)
3488                     || offset_12bit_unsigned_scaled_p (mode, offset));
3489         }
3490
3491       if (allow_reg_index_p)
3492         {
3493           /* Look for base + (scaled/extended) index register.  */
3494           if (aarch64_base_register_rtx_p (op0, strict_p)
3495               && aarch64_classify_index (info, op1, mode, strict_p))
3496             {
3497               info->base = op0;
3498               return true;
3499             }
3500           if (aarch64_base_register_rtx_p (op1, strict_p)
3501               && aarch64_classify_index (info, op0, mode, strict_p))
3502             {
3503               info->base = op1;
3504               return true;
3505             }
3506         }
3507
3508       return false;
3509
3510     case POST_INC:
3511     case POST_DEC:
3512     case PRE_INC:
3513     case PRE_DEC:
3514       info->type = ADDRESS_REG_WB;
3515       info->base = XEXP (x, 0);
3516       info->offset = NULL_RTX;
3517       return aarch64_base_register_rtx_p (info->base, strict_p);
3518
3519     case POST_MODIFY:
3520     case PRE_MODIFY:
3521       info->type = ADDRESS_REG_WB;
3522       info->base = XEXP (x, 0);
3523       if (GET_CODE (XEXP (x, 1)) == PLUS
3524           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3525           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3526           && aarch64_base_register_rtx_p (info->base, strict_p))
3527         {
3528           HOST_WIDE_INT offset;
3529           info->offset = XEXP (XEXP (x, 1), 1);
3530           offset = INTVAL (info->offset);
3531
3532           /* TImode and TFmode values are allowed in both pairs of X
3533              registers and individual Q registers.  The available
3534              address modes are:
3535              X,X: 7-bit signed scaled offset
3536              Q:   9-bit signed offset
3537              We conservatively require an offset representable in either mode.
3538            */
3539           if (mode == TImode || mode == TFmode)
3540             return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3541                     && offset_9bit_signed_unscaled_p (mode, offset));
3542
3543           if (outer_code == PARALLEL)
3544             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3545                     && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3546           else
3547             return offset_9bit_signed_unscaled_p (mode, offset);
3548         }
3549       return false;
3550
3551     case CONST:
3552     case SYMBOL_REF:
3553     case LABEL_REF:
3554       /* load literal: pc-relative constant pool entry.  Only supported
3555          for SI mode or larger.  */
3556       info->type = ADDRESS_SYMBOLIC;
3557       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3558         {
3559           rtx sym, addend;
3560
3561           split_const (x, &sym, &addend);
3562           return (GET_CODE (sym) == LABEL_REF
3563                   || (GET_CODE (sym) == SYMBOL_REF
3564                       && CONSTANT_POOL_ADDRESS_P (sym)));
3565         }
3566       return false;
3567
3568     case LO_SUM:
3569       info->type = ADDRESS_LO_SUM;
3570       info->base = XEXP (x, 0);
3571       info->offset = XEXP (x, 1);
3572       if (allow_reg_index_p
3573           && aarch64_base_register_rtx_p (info->base, strict_p))
3574         {
3575           rtx sym, offs;
3576           split_const (info->offset, &sym, &offs);
3577           if (GET_CODE (sym) == SYMBOL_REF
3578               && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3579                   == SYMBOL_SMALL_ABSOLUTE))
3580             {
3581               /* The symbol and offset must be aligned to the access size.  */
3582               unsigned int align;
3583               unsigned int ref_size;
3584
3585               if (CONSTANT_POOL_ADDRESS_P (sym))
3586                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3587               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3588                 {
3589                   tree exp = SYMBOL_REF_DECL (sym);
3590                   align = TYPE_ALIGN (TREE_TYPE (exp));
3591                   align = CONSTANT_ALIGNMENT (exp, align);
3592                 }
3593               else if (SYMBOL_REF_DECL (sym))
3594                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3595               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3596                        && SYMBOL_REF_BLOCK (sym) != NULL)
3597                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3598               else
3599                 align = BITS_PER_UNIT;
3600
3601               ref_size = GET_MODE_SIZE (mode);
3602               if (ref_size == 0)
3603                 ref_size = GET_MODE_SIZE (DImode);
3604
3605               return ((INTVAL (offs) & (ref_size - 1)) == 0
3606                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3607             }
3608         }
3609       return false;
3610
3611     default:
3612       return false;
3613     }
3614 }
3615
3616 bool
3617 aarch64_symbolic_address_p (rtx x)
3618 {
3619   rtx offset;
3620
3621   split_const (x, &x, &offset);
3622   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3623 }
3624
3625 /* Classify the base of symbolic expression X, given that X appears in
3626    context CONTEXT.  */
3627
3628 enum aarch64_symbol_type
3629 aarch64_classify_symbolic_expression (rtx x,
3630                                       enum aarch64_symbol_context context)
3631 {
3632   rtx offset;
3633
3634   split_const (x, &x, &offset);
3635   return aarch64_classify_symbol (x, offset, context);
3636 }
3637
3638
3639 /* Return TRUE if X is a legitimate address for accessing memory in
3640    mode MODE.  */
3641 static bool
3642 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3643 {
3644   struct aarch64_address_info addr;
3645
3646   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3647 }
3648
3649 /* Return TRUE if X is a legitimate address for accessing memory in
3650    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3651    pair operation.  */
3652 bool
3653 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3654                               RTX_CODE outer_code, bool strict_p)
3655 {
3656   struct aarch64_address_info addr;
3657
3658   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3659 }
3660
3661 /* Return TRUE if rtx X is immediate constant 0.0 */
3662 bool
3663 aarch64_float_const_zero_rtx_p (rtx x)
3664 {
3665   REAL_VALUE_TYPE r;
3666
3667   if (GET_MODE (x) == VOIDmode)
3668     return false;
3669
3670   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3671   if (REAL_VALUE_MINUS_ZERO (r))
3672     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3673   return REAL_VALUES_EQUAL (r, dconst0);
3674 }
3675
3676 /* Return the fixed registers used for condition codes.  */
3677
3678 static bool
3679 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3680 {
3681   *p1 = CC_REGNUM;
3682   *p2 = INVALID_REGNUM;
3683   return true;
3684 }
3685
3686 /* Emit call insn with PAT and do aarch64-specific handling.  */
3687
3688 void
3689 aarch64_emit_call_insn (rtx pat)
3690 {
3691   rtx insn = emit_call_insn (pat);
3692
3693   rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3694   clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3695   clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3696 }
3697
3698 machine_mode
3699 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3700 {
3701   /* All floating point compares return CCFP if it is an equality
3702      comparison, and CCFPE otherwise.  */
3703   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3704     {
3705       switch (code)
3706         {
3707         case EQ:
3708         case NE:
3709         case UNORDERED:
3710         case ORDERED:
3711         case UNLT:
3712         case UNLE:
3713         case UNGT:
3714         case UNGE:
3715         case UNEQ:
3716         case LTGT:
3717           return CCFPmode;
3718
3719         case LT:
3720         case LE:
3721         case GT:
3722         case GE:
3723           return CCFPEmode;
3724
3725         default:
3726           gcc_unreachable ();
3727         }
3728     }
3729
3730   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3731       && y == const0_rtx
3732       && (code == EQ || code == NE || code == LT || code == GE)
3733       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3734           || GET_CODE (x) == NEG))
3735     return CC_NZmode;
3736
3737   /* A compare with a shifted operand.  Because of canonicalization,
3738      the comparison will have to be swapped when we emit the assembly
3739      code.  */
3740   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3741       && (REG_P (y) || GET_CODE (y) == SUBREG)
3742       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3743           || GET_CODE (x) == LSHIFTRT
3744           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3745     return CC_SWPmode;
3746
3747   /* Similarly for a negated operand, but we can only do this for
3748      equalities.  */
3749   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3750       && (REG_P (y) || GET_CODE (y) == SUBREG)
3751       && (code == EQ || code == NE)
3752       && GET_CODE (x) == NEG)
3753     return CC_Zmode;
3754
3755   /* A compare of a mode narrower than SI mode against zero can be done
3756      by extending the value in the comparison.  */
3757   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3758       && y == const0_rtx)
3759     /* Only use sign-extension if we really need it.  */
3760     return ((code == GT || code == GE || code == LE || code == LT)
3761             ? CC_SESWPmode : CC_ZESWPmode);
3762
3763   /* For everything else, return CCmode.  */
3764   return CCmode;
3765 }
3766
3767 static int
3768 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3769
3770 int
3771 aarch64_get_condition_code (rtx x)
3772 {
3773   machine_mode mode = GET_MODE (XEXP (x, 0));
3774   enum rtx_code comp_code = GET_CODE (x);
3775
3776   if (GET_MODE_CLASS (mode) != MODE_CC)
3777     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3778   return aarch64_get_condition_code_1 (mode, comp_code);
3779 }
3780
3781 static int
3782 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3783 {
3784   int ne = -1, eq = -1;
3785   switch (mode)
3786     {
3787     case CCFPmode:
3788     case CCFPEmode:
3789       switch (comp_code)
3790         {
3791         case GE: return AARCH64_GE;
3792         case GT: return AARCH64_GT;
3793         case LE: return AARCH64_LS;
3794         case LT: return AARCH64_MI;
3795         case NE: return AARCH64_NE;
3796         case EQ: return AARCH64_EQ;
3797         case ORDERED: return AARCH64_VC;
3798         case UNORDERED: return AARCH64_VS;
3799         case UNLT: return AARCH64_LT;
3800         case UNLE: return AARCH64_LE;
3801         case UNGT: return AARCH64_HI;
3802         case UNGE: return AARCH64_PL;
3803         default: return -1;
3804         }
3805       break;
3806
3807     case CC_DNEmode:
3808       ne = AARCH64_NE;
3809       eq = AARCH64_EQ;
3810       break;
3811
3812     case CC_DEQmode:
3813       ne = AARCH64_EQ;
3814       eq = AARCH64_NE;
3815       break;
3816
3817     case CC_DGEmode:
3818       ne = AARCH64_GE;
3819       eq = AARCH64_LT;
3820       break;
3821
3822     case CC_DLTmode:
3823       ne = AARCH64_LT;
3824       eq = AARCH64_GE;
3825       break;
3826
3827     case CC_DGTmode:
3828       ne = AARCH64_GT;
3829       eq = AARCH64_LE;
3830       break;
3831
3832     case CC_DLEmode:
3833       ne = AARCH64_LE;
3834       eq = AARCH64_GT;
3835       break;
3836
3837     case CC_DGEUmode:
3838       ne = AARCH64_CS;
3839       eq = AARCH64_CC;
3840       break;
3841
3842     case CC_DLTUmode:
3843       ne = AARCH64_CC;
3844       eq = AARCH64_CS;
3845       break;
3846
3847     case CC_DGTUmode:
3848       ne = AARCH64_HI;
3849       eq = AARCH64_LS;
3850       break;
3851
3852     case CC_DLEUmode:
3853       ne = AARCH64_LS;
3854       eq = AARCH64_HI;
3855       break;
3856
3857     case CCmode:
3858       switch (comp_code)
3859         {
3860         case NE: return AARCH64_NE;
3861         case EQ: return AARCH64_EQ;
3862         case GE: return AARCH64_GE;
3863         case GT: return AARCH64_GT;
3864         case LE: return AARCH64_LE;
3865         case LT: return AARCH64_LT;
3866         case GEU: return AARCH64_CS;
3867         case GTU: return AARCH64_HI;
3868         case LEU: return AARCH64_LS;
3869         case LTU: return AARCH64_CC;
3870         default: return -1;
3871         }
3872       break;
3873
3874     case CC_SWPmode:
3875     case CC_ZESWPmode:
3876     case CC_SESWPmode:
3877       switch (comp_code)
3878         {
3879         case NE: return AARCH64_NE;
3880         case EQ: return AARCH64_EQ;
3881         case GE: return AARCH64_LE;
3882         case GT: return AARCH64_LT;
3883         case LE: return AARCH64_GE;
3884         case LT: return AARCH64_GT;
3885         case GEU: return AARCH64_LS;
3886         case GTU: return AARCH64_CC;
3887         case LEU: return AARCH64_CS;
3888         case LTU: return AARCH64_HI;
3889         default: return -1;
3890         }
3891       break;
3892
3893     case CC_NZmode:
3894       switch (comp_code)
3895         {
3896         case NE: return AARCH64_NE;
3897         case EQ: return AARCH64_EQ;
3898         case GE: return AARCH64_PL;
3899         case LT: return AARCH64_MI;
3900         default: return -1;
3901         }
3902       break;
3903
3904     case CC_Zmode:
3905       switch (comp_code)
3906         {
3907         case NE: return AARCH64_NE;
3908         case EQ: return AARCH64_EQ;
3909         default: return -1;
3910         }
3911       break;
3912
3913     default:
3914       return -1;
3915       break;
3916     }
3917
3918   if (comp_code == NE)
3919     return ne;
3920
3921   if (comp_code == EQ)
3922     return eq;
3923
3924   return -1;
3925 }
3926
3927 bool
3928 aarch64_const_vec_all_same_in_range_p (rtx x,
3929                                   HOST_WIDE_INT minval,
3930                                   HOST_WIDE_INT maxval)
3931 {
3932   HOST_WIDE_INT firstval;
3933   int count, i;
3934
3935   if (GET_CODE (x) != CONST_VECTOR
3936       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3937     return false;
3938
3939   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3940   if (firstval < minval || firstval > maxval)
3941     return false;
3942
3943   count = CONST_VECTOR_NUNITS (x);
3944   for (i = 1; i < count; i++)
3945     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3946       return false;
3947
3948   return true;
3949 }
3950
3951 bool
3952 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3953 {
3954   return aarch64_const_vec_all_same_in_range_p (x, val, val);
3955 }
3956
3957 static unsigned
3958 bit_count (unsigned HOST_WIDE_INT value)
3959 {
3960   unsigned count = 0;
3961
3962   while (value)
3963     {
3964       count++;
3965       value &= value - 1;
3966     }
3967
3968   return count;
3969 }
3970
3971 /* N Z C V.  */
3972 #define AARCH64_CC_V 1
3973 #define AARCH64_CC_C (1 << 1)
3974 #define AARCH64_CC_Z (1 << 2)
3975 #define AARCH64_CC_N (1 << 3)
3976
3977 /* N Z C V flags for ccmp.  The first code is for AND op and the other
3978    is for IOR op.  Indexed by AARCH64_COND_CODE.  */
3979 static const int aarch64_nzcv_codes[][2] =
3980 {
3981   {AARCH64_CC_Z, 0}, /* EQ, Z == 1.  */
3982   {0, AARCH64_CC_Z}, /* NE, Z == 0.  */
3983   {AARCH64_CC_C, 0}, /* CS, C == 1.  */
3984   {0, AARCH64_CC_C}, /* CC, C == 0.  */
3985   {AARCH64_CC_N, 0}, /* MI, N == 1.  */
3986   {0, AARCH64_CC_N}, /* PL, N == 0.  */
3987   {AARCH64_CC_V, 0}, /* VS, V == 1.  */
3988   {0, AARCH64_CC_V}, /* VC, V == 0.  */
3989   {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0.  */
3990   {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0).  */
3991   {0, AARCH64_CC_V}, /* GE, N == V.  */
3992   {AARCH64_CC_V, 0}, /* LT, N != V.  */
3993   {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V.  */
3994   {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V).  */
3995   {0, 0}, /* AL, Any.  */
3996   {0, 0}, /* NV, Any.  */
3997 };
3998
3999 int
4000 aarch64_ccmp_mode_to_code (enum machine_mode mode)
4001 {
4002   switch (mode)
4003     {
4004     case CC_DNEmode:
4005       return NE;
4006
4007     case CC_DEQmode:
4008       return EQ;
4009
4010     case CC_DLEmode:
4011       return LE;
4012
4013     case CC_DGTmode:
4014       return GT;
4015
4016     case CC_DLTmode:
4017       return LT;
4018
4019     case CC_DGEmode:
4020       return GE;
4021
4022     case CC_DLEUmode:
4023       return LEU;
4024
4025     case CC_DGTUmode:
4026       return GTU;
4027
4028     case CC_DLTUmode:
4029       return LTU;
4030
4031     case CC_DGEUmode:
4032       return GEU;
4033
4034     default:
4035       gcc_unreachable ();
4036     }
4037 }
4038
4039
4040 void
4041 aarch64_print_operand (FILE *f, rtx x, char code)
4042 {
4043   switch (code)
4044     {
4045     /* An integer or symbol address without a preceding # sign.  */
4046     case 'c':
4047       switch (GET_CODE (x))
4048         {
4049         case CONST_INT:
4050           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4051           break;
4052
4053         case SYMBOL_REF:
4054           output_addr_const (f, x);
4055           break;
4056
4057         case CONST:
4058           if (GET_CODE (XEXP (x, 0)) == PLUS
4059               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4060             {
4061               output_addr_const (f, x);
4062               break;
4063             }
4064           /* Fall through.  */
4065
4066         default:
4067           output_operand_lossage ("Unsupported operand for code '%c'", code);
4068         }
4069       break;
4070
4071     case 'e':
4072       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
4073       {
4074         int n;
4075
4076         if (!CONST_INT_P (x)
4077             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4078           {
4079             output_operand_lossage ("invalid operand for '%%%c'", code);
4080             return;
4081           }
4082
4083         switch (n)
4084           {
4085           case 3:
4086             fputc ('b', f);
4087             break;
4088           case 4:
4089             fputc ('h', f);
4090             break;
4091           case 5:
4092             fputc ('w', f);
4093             break;
4094           default:
4095             output_operand_lossage ("invalid operand for '%%%c'", code);
4096             return;
4097           }
4098       }
4099       break;
4100
4101     case 'p':
4102       {
4103         int n;
4104
4105         /* Print N such that 2^N == X.  */
4106         if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4107           {
4108             output_operand_lossage ("invalid operand for '%%%c'", code);
4109             return;
4110           }
4111
4112         asm_fprintf (f, "%d", n);
4113       }
4114       break;
4115
4116     case 'P':
4117       /* Print the number of non-zero bits in X (a const_int).  */
4118       if (!CONST_INT_P (x))
4119         {
4120           output_operand_lossage ("invalid operand for '%%%c'", code);
4121           return;
4122         }
4123
4124       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4125       break;
4126
4127     case 'H':
4128       /* Print the higher numbered register of a pair (TImode) of regs.  */
4129       if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4130         {
4131           output_operand_lossage ("invalid operand for '%%%c'", code);
4132           return;
4133         }
4134
4135       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4136       break;
4137
4138     case 'm':
4139       {
4140         int cond_code;
4141         /* Print a condition (eq, ne, etc).  */
4142
4143         /* CONST_TRUE_RTX means always -- that's the default.  */
4144         if (x == const_true_rtx)
4145           return;
4146
4147         if (!COMPARISON_P (x))
4148           {
4149             output_operand_lossage ("invalid operand for '%%%c'", code);
4150             return;
4151           }
4152
4153         cond_code = aarch64_get_condition_code (x);
4154         gcc_assert (cond_code >= 0);
4155         fputs (aarch64_condition_codes[cond_code], f);
4156       }
4157       break;
4158
4159     case 'M':
4160       {
4161         int cond_code;
4162         /* Print the inverse of a condition (eq <-> ne, etc).  */
4163
4164         /* CONST_TRUE_RTX means never -- that's the default.  */
4165         if (x == const_true_rtx)
4166           {
4167             fputs ("nv", f);
4168             return;
4169           }
4170
4171         if (!COMPARISON_P (x))
4172           {
4173             output_operand_lossage ("invalid operand for '%%%c'", code);
4174             return;
4175           }
4176         cond_code = aarch64_get_condition_code (x);
4177         gcc_assert (cond_code >= 0);
4178         fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4179                                        (cond_code)], f);
4180       }
4181       break;
4182
4183     case 'b':
4184     case 'h':
4185     case 's':
4186     case 'd':
4187     case 'q':
4188       /* Print a scalar FP/SIMD register name.  */
4189       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4190         {
4191           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4192           return;
4193         }
4194       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4195       break;
4196
4197     case 'S':
4198     case 'T':
4199     case 'U':
4200     case 'V':
4201       /* Print the first FP/SIMD register name in a list.  */
4202       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4203         {
4204           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4205           return;
4206         }
4207       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4208       break;
4209
4210     case 'X':
4211       /* Print bottom 16 bits of integer constant in hex.  */
4212       if (!CONST_INT_P (x))
4213         {
4214           output_operand_lossage ("invalid operand for '%%%c'", code);
4215           return;
4216         }
4217       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4218       break;
4219
4220     case 'w':
4221     case 'x':
4222       /* Print a general register name or the zero register (32-bit or
4223          64-bit).  */
4224       if (x == const0_rtx
4225           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4226         {
4227           asm_fprintf (f, "%czr", code);
4228           break;
4229         }
4230
4231       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4232         {
4233           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4234           break;
4235         }
4236
4237       if (REG_P (x) && REGNO (x) == SP_REGNUM)
4238         {
4239           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4240           break;
4241         }
4242
4243       /* Fall through */
4244
4245     case 0:
4246       /* Print a normal operand, if it's a general register, then we
4247          assume DImode.  */
4248       if (x == NULL)
4249         {
4250           output_operand_lossage ("missing operand");
4251           return;
4252         }
4253
4254       switch (GET_CODE (x))
4255         {
4256         case REG:
4257           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4258           break;
4259
4260         case MEM:
4261           aarch64_memory_reference_mode = GET_MODE (x);
4262           output_address (XEXP (x, 0));
4263           break;
4264
4265         case LABEL_REF:
4266         case SYMBOL_REF:
4267           output_addr_const (asm_out_file, x);
4268           break;
4269
4270         case CONST_INT:
4271           asm_fprintf (f, "%wd", INTVAL (x));
4272           break;
4273
4274         case CONST_VECTOR:
4275           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4276             {
4277               gcc_assert (
4278                   aarch64_const_vec_all_same_in_range_p (x,
4279                                                          HOST_WIDE_INT_MIN,
4280                                                          HOST_WIDE_INT_MAX));
4281               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4282             }
4283           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4284             {
4285               fputc ('0', f);
4286             }
4287           else
4288             gcc_unreachable ();
4289           break;
4290
4291         case CONST_DOUBLE:
4292           /* CONST_DOUBLE can represent a double-width integer.
4293              In this case, the mode of x is VOIDmode.  */
4294           if (GET_MODE (x) == VOIDmode)
4295             ; /* Do Nothing.  */
4296           else if (aarch64_float_const_zero_rtx_p (x))
4297             {
4298               fputc ('0', f);
4299               break;
4300             }
4301           else if (aarch64_float_const_representable_p (x))
4302             {
4303 #define buf_size 20
4304               char float_buf[buf_size] = {'\0'};
4305               REAL_VALUE_TYPE r;
4306               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4307               real_to_decimal_for_mode (float_buf, &r,
4308                                         buf_size, buf_size,
4309                                         1, GET_MODE (x));
4310               asm_fprintf (asm_out_file, "%s", float_buf);
4311               break;
4312 #undef buf_size
4313             }
4314           output_operand_lossage ("invalid constant");
4315           return;
4316         default:
4317           output_operand_lossage ("invalid operand");
4318           return;
4319         }
4320       break;
4321
4322     case 'A':
4323       if (GET_CODE (x) == HIGH)
4324         x = XEXP (x, 0);
4325
4326       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4327         {
4328         case SYMBOL_SMALL_GOT:
4329           asm_fprintf (asm_out_file, ":got:");
4330           break;
4331
4332         case SYMBOL_SMALL_TLSGD:
4333           asm_fprintf (asm_out_file, ":tlsgd:");
4334           break;
4335
4336         case SYMBOL_SMALL_TLSDESC:
4337           asm_fprintf (asm_out_file, ":tlsdesc:");
4338           break;
4339
4340         case SYMBOL_SMALL_GOTTPREL:
4341           asm_fprintf (asm_out_file, ":gottprel:");
4342           break;
4343
4344         case SYMBOL_SMALL_TPREL:
4345           asm_fprintf (asm_out_file, ":tprel:");
4346           break;
4347
4348         case SYMBOL_TINY_GOT:
4349           gcc_unreachable ();
4350           break;
4351
4352         default:
4353           break;
4354         }
4355       output_addr_const (asm_out_file, x);
4356       break;
4357
4358     case 'L':
4359       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4360         {
4361         case SYMBOL_SMALL_GOT:
4362           asm_fprintf (asm_out_file, ":lo12:");
4363           break;
4364
4365         case SYMBOL_SMALL_TLSGD:
4366           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4367           break;
4368
4369         case SYMBOL_SMALL_TLSDESC:
4370           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4371           break;
4372
4373         case SYMBOL_SMALL_GOTTPREL:
4374           asm_fprintf (asm_out_file, ":gottprel_lo12:");
4375           break;
4376
4377         case SYMBOL_SMALL_TPREL:
4378           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4379           break;
4380
4381         case SYMBOL_TINY_GOT:
4382           asm_fprintf (asm_out_file, ":got:");
4383           break;
4384
4385         default:
4386           break;
4387         }
4388       output_addr_const (asm_out_file, x);
4389       break;
4390
4391     case 'G':
4392
4393       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4394         {
4395         case SYMBOL_SMALL_TPREL:
4396           asm_fprintf (asm_out_file, ":tprel_hi12:");
4397           break;
4398         default:
4399           break;
4400         }
4401       output_addr_const (asm_out_file, x);
4402       break;
4403
4404     case 'K':
4405       {
4406         int cond_code;
4407         /* Print nzcv.  */
4408
4409         if (!COMPARISON_P (x))
4410           {
4411             output_operand_lossage ("invalid operand for '%%%c'", code);
4412             return;
4413           }
4414
4415         cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4416         gcc_assert (cond_code >= 0);
4417         asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4418       }
4419       break;
4420
4421     case 'k':
4422       {
4423         int cond_code;
4424         /* Print nzcv.  */
4425
4426         if (!COMPARISON_P (x))
4427           {
4428             output_operand_lossage ("invalid operand for '%%%c'", code);
4429             return;
4430           }
4431
4432         cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4433         gcc_assert (cond_code >= 0);
4434         asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4435       }
4436       break;
4437
4438     default:
4439       output_operand_lossage ("invalid operand prefix '%%%c'", code);
4440       return;
4441     }
4442 }
4443
4444 void
4445 aarch64_print_operand_address (FILE *f, rtx x)
4446 {
4447   struct aarch64_address_info addr;
4448
4449   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4450                              MEM, true))
4451     switch (addr.type)
4452       {
4453       case ADDRESS_REG_IMM:
4454         if (addr.offset == const0_rtx)
4455           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4456         else
4457           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4458                        INTVAL (addr.offset));
4459         return;
4460
4461       case ADDRESS_REG_REG:
4462         if (addr.shift == 0)
4463           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4464                        reg_names [REGNO (addr.offset)]);
4465         else
4466           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4467                        reg_names [REGNO (addr.offset)], addr.shift);
4468         return;
4469
4470       case ADDRESS_REG_UXTW:
4471         if (addr.shift == 0)
4472           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4473                        REGNO (addr.offset) - R0_REGNUM);
4474         else
4475           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4476                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4477         return;
4478
4479       case ADDRESS_REG_SXTW:
4480         if (addr.shift == 0)
4481           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4482                        REGNO (addr.offset) - R0_REGNUM);
4483         else
4484           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4485                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
4486         return;
4487
4488       case ADDRESS_REG_WB:
4489         switch (GET_CODE (x))
4490           {
4491           case PRE_INC:
4492             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4493                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4494             return;
4495           case POST_INC:
4496             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4497                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4498             return;
4499           case PRE_DEC:
4500             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4501                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4502             return;
4503           case POST_DEC:
4504             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4505                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4506             return;
4507           case PRE_MODIFY:
4508             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4509                          INTVAL (addr.offset));
4510             return;
4511           case POST_MODIFY:
4512             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4513                          INTVAL (addr.offset));
4514             return;
4515           default:
4516             break;
4517           }
4518         break;
4519
4520       case ADDRESS_LO_SUM:
4521         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4522         output_addr_const (f, addr.offset);
4523         asm_fprintf (f, "]");
4524         return;
4525
4526       case ADDRESS_SYMBOLIC:
4527         break;
4528       }
4529
4530   output_addr_const (f, x);
4531 }
4532
4533 bool
4534 aarch64_label_mentioned_p (rtx x)
4535 {
4536   const char *fmt;
4537   int i;
4538
4539   if (GET_CODE (x) == LABEL_REF)
4540     return true;
4541
4542   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4543      referencing instruction, but they are constant offsets, not
4544      symbols.  */
4545   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4546     return false;
4547
4548   fmt = GET_RTX_FORMAT (GET_CODE (x));
4549   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4550     {
4551       if (fmt[i] == 'E')
4552         {
4553           int j;
4554
4555           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4556             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4557               return 1;
4558         }
4559       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4560         return 1;
4561     }
4562
4563   return 0;
4564 }
4565
4566 /* Implement REGNO_REG_CLASS.  */
4567
4568 enum reg_class
4569 aarch64_regno_regclass (unsigned regno)
4570 {
4571   if (GP_REGNUM_P (regno))
4572     return GENERAL_REGS;
4573
4574   if (regno == SP_REGNUM)
4575     return STACK_REG;
4576
4577   if (regno == FRAME_POINTER_REGNUM
4578       || regno == ARG_POINTER_REGNUM)
4579     return POINTER_REGS;
4580
4581   if (FP_REGNUM_P (regno))
4582     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4583
4584   return NO_REGS;
4585 }
4586
4587 static rtx
4588 aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
4589 {
4590   /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4591      where mask is selected by alignment and size of the offset.
4592      We try to pick as large a range for the offset as possible to
4593      maximize the chance of a CSE.  However, for aligned addresses
4594      we limit the range to 4k so that structures with different sized
4595      elements are likely to use the same base.  */
4596
4597   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4598     {
4599       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4600       HOST_WIDE_INT base_offset;
4601
4602       /* Does it look like we'll need a load/store-pair operation?  */
4603       if (GET_MODE_SIZE (mode) > 16
4604           || mode == TImode)
4605         base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4606                        & ~((128 * GET_MODE_SIZE (mode)) - 1));
4607       /* For offsets aren't a multiple of the access size, the limit is
4608          -256...255.  */
4609       else if (offset & (GET_MODE_SIZE (mode) - 1))
4610         base_offset = (offset + 0x100) & ~0x1ff;
4611       else
4612         base_offset = offset & ~0xfff;
4613
4614       if (base_offset == 0)
4615         return x;
4616
4617       offset -= base_offset;
4618       rtx base_reg = gen_reg_rtx (Pmode);
4619       rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4620                            NULL_RTX);
4621       emit_move_insn (base_reg, val);
4622       x = plus_constant (Pmode, base_reg, offset);
4623     }
4624
4625   return x;
4626 }
4627
4628 /* Try a machine-dependent way of reloading an illegitimate address
4629    operand.  If we find one, push the reload and return the new rtx.  */
4630
4631 rtx
4632 aarch64_legitimize_reload_address (rtx *x_p,
4633                                    machine_mode mode,
4634                                    int opnum, int type,
4635                                    int ind_levels ATTRIBUTE_UNUSED)
4636 {
4637   rtx x = *x_p;
4638
4639   /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
4640   if (aarch64_vect_struct_mode_p (mode)
4641       && GET_CODE (x) == PLUS
4642       && REG_P (XEXP (x, 0))
4643       && CONST_INT_P (XEXP (x, 1)))
4644     {
4645       rtx orig_rtx = x;
4646       x = copy_rtx (x);
4647       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4648                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4649                    opnum, (enum reload_type) type);
4650       return x;
4651     }
4652
4653   /* We must recognize output that we have already generated ourselves.  */
4654   if (GET_CODE (x) == PLUS
4655       && GET_CODE (XEXP (x, 0)) == PLUS
4656       && REG_P (XEXP (XEXP (x, 0), 0))
4657       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4658       && CONST_INT_P (XEXP (x, 1)))
4659     {
4660       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4661                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4662                    opnum, (enum reload_type) type);
4663       return x;
4664     }
4665
4666   /* We wish to handle large displacements off a base register by splitting
4667      the addend across an add and the mem insn.  This can cut the number of
4668      extra insns needed from 3 to 1.  It is only useful for load/store of a
4669      single register with 12 bit offset field.  */
4670   if (GET_CODE (x) == PLUS
4671       && REG_P (XEXP (x, 0))
4672       && CONST_INT_P (XEXP (x, 1))
4673       && HARD_REGISTER_P (XEXP (x, 0))
4674       && mode != TImode
4675       && mode != TFmode
4676       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4677     {
4678       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4679       HOST_WIDE_INT low = val & 0xfff;
4680       HOST_WIDE_INT high = val - low;
4681       HOST_WIDE_INT offs;
4682       rtx cst;
4683       machine_mode xmode = GET_MODE (x);
4684
4685       /* In ILP32, xmode can be either DImode or SImode.  */
4686       gcc_assert (xmode == DImode || xmode == SImode);
4687
4688       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4689          BLKmode alignment.  */
4690       if (GET_MODE_SIZE (mode) == 0)
4691         return NULL_RTX;
4692
4693       offs = low % GET_MODE_SIZE (mode);
4694
4695       /* Align misaligned offset by adjusting high part to compensate.  */
4696       if (offs != 0)
4697         {
4698           if (aarch64_uimm12_shift (high + offs))
4699             {
4700               /* Align down.  */
4701               low = low - offs;
4702               high = high + offs;
4703             }
4704           else
4705             {
4706               /* Align up.  */
4707               offs = GET_MODE_SIZE (mode) - offs;
4708               low = low + offs;
4709               high = high + (low & 0x1000) - offs;
4710               low &= 0xfff;
4711             }
4712         }
4713
4714       /* Check for overflow.  */
4715       if (high + low != val)
4716         return NULL_RTX;
4717
4718       cst = GEN_INT (high);
4719       if (!aarch64_uimm12_shift (high))
4720         cst = force_const_mem (xmode, cst);
4721
4722       /* Reload high part into base reg, leaving the low part
4723          in the mem instruction.
4724          Note that replacing this gen_rtx_PLUS with plus_constant is
4725          wrong in this case because we rely on the
4726          (plus (plus reg c1) c2) structure being preserved so that
4727          XEXP (*p, 0) in push_reload below uses the correct term.  */
4728       x = gen_rtx_PLUS (xmode,
4729                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4730                         GEN_INT (low));
4731
4732       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4733                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4734                    opnum, (enum reload_type) type);
4735       return x;
4736     }
4737
4738   return NULL_RTX;
4739 }
4740
4741
4742 static reg_class_t
4743 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4744                           reg_class_t rclass,
4745                           machine_mode mode,
4746                           secondary_reload_info *sri)
4747 {
4748   /* Without the TARGET_SIMD instructions we cannot move a Q register
4749      to a Q register directly.  We need a scratch.  */
4750   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4751       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4752       && reg_class_subset_p (rclass, FP_REGS))
4753     {
4754       if (mode == TFmode)
4755         sri->icode = CODE_FOR_aarch64_reload_movtf;
4756       else if (mode == TImode)
4757         sri->icode = CODE_FOR_aarch64_reload_movti;
4758       return NO_REGS;
4759     }
4760
4761   /* A TFmode or TImode memory access should be handled via an FP_REGS
4762      because AArch64 has richer addressing modes for LDR/STR instructions
4763      than LDP/STP instructions.  */
4764   if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4765       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4766     return FP_REGS;
4767
4768   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4769       return GENERAL_REGS;
4770
4771   return NO_REGS;
4772 }
4773
4774 static bool
4775 aarch64_can_eliminate (const int from, const int to)
4776 {
4777   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4778      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4779
4780   if (frame_pointer_needed)
4781     {
4782       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4783         return true;
4784       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4785         return false;
4786       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4787           && !cfun->calls_alloca)
4788         return true;
4789       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4790         return true;
4791
4792       return false;
4793     }
4794   else
4795     {
4796       /* If we decided that we didn't need a leaf frame pointer but then used
4797          LR in the function, then we'll want a frame pointer after all, so
4798          prevent this elimination to ensure a frame pointer is used.  */
4799       if (to == STACK_POINTER_REGNUM
4800           && flag_omit_leaf_frame_pointer
4801           && df_regs_ever_live_p (LR_REGNUM))
4802         return false;
4803     }
4804
4805   return true;
4806 }
4807
4808 HOST_WIDE_INT
4809 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4810 {
4811   aarch64_layout_frame ();
4812
4813   if (to == HARD_FRAME_POINTER_REGNUM)
4814     {
4815       if (from == ARG_POINTER_REGNUM)
4816         return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4817
4818       if (from == FRAME_POINTER_REGNUM)
4819         return (cfun->machine->frame.hard_fp_offset
4820                 - cfun->machine->frame.saved_varargs_size);
4821     }
4822
4823   if (to == STACK_POINTER_REGNUM)
4824     {
4825       if (from == FRAME_POINTER_REGNUM)
4826           return (cfun->machine->frame.frame_size
4827                   - cfun->machine->frame.saved_varargs_size);
4828     }
4829
4830   return cfun->machine->frame.frame_size;
4831 }
4832
4833 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4834    previous frame.  */
4835
4836 rtx
4837 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4838 {
4839   if (count != 0)
4840     return const0_rtx;
4841   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4842 }
4843
4844
4845 static void
4846 aarch64_asm_trampoline_template (FILE *f)
4847 {
4848   if (TARGET_ILP32)
4849     {
4850       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4851       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4852     }
4853   else
4854     {
4855       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4856       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4857     }
4858   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4859   assemble_aligned_integer (4, const0_rtx);
4860   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4861   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4862 }
4863
4864 static void
4865 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4866 {
4867   rtx fnaddr, mem, a_tramp;
4868   const int tramp_code_sz = 16;
4869
4870   /* Don't need to copy the trailing D-words, we fill those in below.  */
4871   emit_block_move (m_tramp, assemble_trampoline_template (),
4872                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4873   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4874   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4875   if (GET_MODE (fnaddr) != ptr_mode)
4876     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4877   emit_move_insn (mem, fnaddr);
4878
4879   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4880   emit_move_insn (mem, chain_value);
4881
4882   /* XXX We should really define a "clear_cache" pattern and use
4883      gen_clear_cache().  */
4884   a_tramp = XEXP (m_tramp, 0);
4885   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4886                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4887                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4888                      ptr_mode);
4889 }
4890
4891 static unsigned char
4892 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4893 {
4894   switch (regclass)
4895     {
4896     case CALLER_SAVE_REGS:
4897     case POINTER_REGS:
4898     case GENERAL_REGS:
4899     case ALL_REGS:
4900     case FP_REGS:
4901     case FP_LO_REGS:
4902       return
4903         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4904                                        (GET_MODE_SIZE (mode) + 7) / 8;
4905     case STACK_REG:
4906       return 1;
4907
4908     case NO_REGS:
4909       return 0;
4910
4911     default:
4912       break;
4913     }
4914   gcc_unreachable ();
4915 }
4916
4917 static reg_class_t
4918 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4919 {
4920   if (regclass == POINTER_REGS)
4921     return GENERAL_REGS;
4922
4923   if (regclass == STACK_REG)
4924     {
4925       if (REG_P(x)
4926           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4927           return regclass;
4928
4929       return NO_REGS;
4930     }
4931
4932   /* If it's an integer immediate that MOVI can't handle, then
4933      FP_REGS is not an option, so we return NO_REGS instead.  */
4934   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4935       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4936     return NO_REGS;
4937
4938   /* Register eliminiation can result in a request for
4939      SP+constant->FP_REGS.  We cannot support such operations which
4940      use SP as source and an FP_REG as destination, so reject out
4941      right now.  */
4942   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4943     {
4944       rtx lhs = XEXP (x, 0);
4945
4946       /* Look through a possible SUBREG introduced by ILP32.  */
4947       if (GET_CODE (lhs) == SUBREG)
4948         lhs = SUBREG_REG (lhs);
4949
4950       gcc_assert (REG_P (lhs));
4951       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4952                                       POINTER_REGS));
4953       return NO_REGS;
4954     }
4955
4956   return regclass;
4957 }
4958
4959 void
4960 aarch64_asm_output_labelref (FILE* f, const char *name)
4961 {
4962   asm_fprintf (f, "%U%s", name);
4963 }
4964
4965 static void
4966 aarch64_elf_asm_constructor (rtx symbol, int priority)
4967 {
4968   if (priority == DEFAULT_INIT_PRIORITY)
4969     default_ctor_section_asm_out_constructor (symbol, priority);
4970   else
4971     {
4972       section *s;
4973       char buf[18];
4974       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4975       s = get_section (buf, SECTION_WRITE, NULL);
4976       switch_to_section (s);
4977       assemble_align (POINTER_SIZE);
4978       assemble_aligned_integer (POINTER_BYTES, symbol);
4979     }
4980 }
4981
4982 static void
4983 aarch64_elf_asm_destructor (rtx symbol, int priority)
4984 {
4985   if (priority == DEFAULT_INIT_PRIORITY)
4986     default_dtor_section_asm_out_destructor (symbol, priority);
4987   else
4988     {
4989       section *s;
4990       char buf[18];
4991       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4992       s = get_section (buf, SECTION_WRITE, NULL);
4993       switch_to_section (s);
4994       assemble_align (POINTER_SIZE);
4995       assemble_aligned_integer (POINTER_BYTES, symbol);
4996     }
4997 }
4998
4999 const char*
5000 aarch64_output_casesi (rtx *operands)
5001 {
5002   char buf[100];
5003   char label[100];
5004   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
5005   int index;
5006   static const char *const patterns[4][2] =
5007   {
5008     {
5009       "ldrb\t%w3, [%0,%w1,uxtw]",
5010       "add\t%3, %4, %w3, sxtb #2"
5011     },
5012     {
5013       "ldrh\t%w3, [%0,%w1,uxtw #1]",
5014       "add\t%3, %4, %w3, sxth #2"
5015     },
5016     {
5017       "ldr\t%w3, [%0,%w1,uxtw #2]",
5018       "add\t%3, %4, %w3, sxtw #2"
5019     },
5020     /* We assume that DImode is only generated when not optimizing and
5021        that we don't really need 64-bit address offsets.  That would
5022        imply an object file with 8GB of code in a single function!  */
5023     {
5024       "ldr\t%w3, [%0,%w1,uxtw #2]",
5025       "add\t%3, %4, %w3, sxtw #2"
5026     }
5027   };
5028
5029   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5030
5031   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5032
5033   gcc_assert (index >= 0 && index <= 3);
5034
5035   /* Need to implement table size reduction, by chaning the code below.  */
5036   output_asm_insn (patterns[index][0], operands);
5037   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5038   snprintf (buf, sizeof (buf),
5039             "adr\t%%4, %s", targetm.strip_name_encoding (label));
5040   output_asm_insn (buf, operands);
5041   output_asm_insn (patterns[index][1], operands);
5042   output_asm_insn ("br\t%3", operands);
5043   assemble_label (asm_out_file, label);
5044   return "";
5045 }
5046
5047
5048 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5049    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5050    operator.  */
5051
5052 int
5053 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5054 {
5055   if (shift >= 0 && shift <= 3)
5056     {
5057       int size;
5058       for (size = 8; size <= 32; size *= 2)
5059         {
5060           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5061           if (mask == bits << shift)
5062             return size;
5063         }
5064     }
5065   return 0;
5066 }
5067
5068 static bool
5069 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
5070                                    const_rtx x ATTRIBUTE_UNUSED)
5071 {
5072   /* We can't use blocks for constants when we're using a per-function
5073      constant pool.  */
5074   return false;
5075 }
5076
5077 static section *
5078 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5079                             rtx x ATTRIBUTE_UNUSED,
5080                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5081 {
5082   /* Force all constant pool entries into the current function section.  */
5083   return function_section (current_function_decl);
5084 }
5085
5086
5087 /* Costs.  */
5088
5089 /* Helper function for rtx cost calculation.  Strip a shift expression
5090    from X.  Returns the inner operand if successful, or the original
5091    expression on failure.  */
5092 static rtx
5093 aarch64_strip_shift (rtx x)
5094 {
5095   rtx op = x;
5096
5097   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5098      we can convert both to ROR during final output.  */
5099   if ((GET_CODE (op) == ASHIFT
5100        || GET_CODE (op) == ASHIFTRT
5101        || GET_CODE (op) == LSHIFTRT
5102        || GET_CODE (op) == ROTATERT
5103        || GET_CODE (op) == ROTATE)
5104       && CONST_INT_P (XEXP (op, 1)))
5105     return XEXP (op, 0);
5106
5107   if (GET_CODE (op) == MULT
5108       && CONST_INT_P (XEXP (op, 1))
5109       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5110     return XEXP (op, 0);
5111
5112   return x;
5113 }
5114
5115 /* Helper function for rtx cost calculation.  Strip an extend
5116    expression from X.  Returns the inner operand if successful, or the
5117    original expression on failure.  We deal with a number of possible
5118    canonicalization variations here.  */
5119 static rtx
5120 aarch64_strip_extend (rtx x)
5121 {
5122   rtx op = x;
5123
5124   /* Zero and sign extraction of a widened value.  */
5125   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5126       && XEXP (op, 2) == const0_rtx
5127       && GET_CODE (XEXP (op, 0)) == MULT
5128       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5129                                          XEXP (op, 1)))
5130     return XEXP (XEXP (op, 0), 0);
5131
5132   /* It can also be represented (for zero-extend) as an AND with an
5133      immediate.  */
5134   if (GET_CODE (op) == AND
5135       && GET_CODE (XEXP (op, 0)) == MULT
5136       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5137       && CONST_INT_P (XEXP (op, 1))
5138       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5139                            INTVAL (XEXP (op, 1))) != 0)
5140     return XEXP (XEXP (op, 0), 0);
5141
5142   /* Now handle extended register, as this may also have an optional
5143      left shift by 1..4.  */
5144   if (GET_CODE (op) == ASHIFT
5145       && CONST_INT_P (XEXP (op, 1))
5146       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5147     op = XEXP (op, 0);
5148
5149   if (GET_CODE (op) == ZERO_EXTEND
5150       || GET_CODE (op) == SIGN_EXTEND)
5151     op = XEXP (op, 0);
5152
5153   if (op != x)
5154     return op;
5155
5156   return x;
5157 }
5158
5159 /* Helper function for rtx cost calculation.  Calculate the cost of
5160    a MULT, which may be part of a multiply-accumulate rtx.  Return
5161    the calculated cost of the expression, recursing manually in to
5162    operands where needed.  */
5163
5164 static int
5165 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5166 {
5167   rtx op0, op1;
5168   const struct cpu_cost_table *extra_cost
5169     = aarch64_tune_params->insn_extra_cost;
5170   int cost = 0;
5171   bool maybe_fma = (outer == PLUS || outer == MINUS);
5172   machine_mode mode = GET_MODE (x);
5173
5174   gcc_checking_assert (code == MULT);
5175
5176   op0 = XEXP (x, 0);
5177   op1 = XEXP (x, 1);
5178
5179   if (VECTOR_MODE_P (mode))
5180     mode = GET_MODE_INNER (mode);
5181
5182   /* Integer multiply/fma.  */
5183   if (GET_MODE_CLASS (mode) == MODE_INT)
5184     {
5185       /* The multiply will be canonicalized as a shift, cost it as such.  */
5186       if (CONST_INT_P (op1)
5187           && exact_log2 (INTVAL (op1)) > 0)
5188         {
5189           if (speed)
5190             {
5191               if (maybe_fma)
5192                 /* ADD (shifted register).  */
5193                 cost += extra_cost->alu.arith_shift;
5194               else
5195                 /* LSL (immediate).  */
5196                 cost += extra_cost->alu.shift;
5197             }
5198
5199           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5200
5201           return cost;
5202         }
5203
5204       /* Integer multiplies or FMAs have zero/sign extending variants.  */
5205       if ((GET_CODE (op0) == ZERO_EXTEND
5206            && GET_CODE (op1) == ZERO_EXTEND)
5207           || (GET_CODE (op0) == SIGN_EXTEND
5208               && GET_CODE (op1) == SIGN_EXTEND))
5209         {
5210           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5211                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5212
5213           if (speed)
5214             {
5215               if (maybe_fma)
5216                 /* MADD/SMADDL/UMADDL.  */
5217                 cost += extra_cost->mult[0].extend_add;
5218               else
5219                 /* MUL/SMULL/UMULL.  */
5220                 cost += extra_cost->mult[0].extend;
5221             }
5222
5223           return cost;
5224         }
5225
5226       /* This is either an integer multiply or an FMA.  In both cases
5227          we want to recurse and cost the operands.  */
5228       cost += rtx_cost (op0, MULT, 0, speed)
5229               + rtx_cost (op1, MULT, 1, speed);
5230
5231       if (speed)
5232         {
5233           if (maybe_fma)
5234             /* MADD.  */
5235             cost += extra_cost->mult[mode == DImode].add;
5236           else
5237             /* MUL.  */
5238             cost += extra_cost->mult[mode == DImode].simple;
5239         }
5240
5241       return cost;
5242     }
5243   else
5244     {
5245       if (speed)
5246         {
5247           /* Floating-point FMA/FMUL can also support negations of the
5248              operands.  */
5249           if (GET_CODE (op0) == NEG)
5250             op0 = XEXP (op0, 0);
5251           if (GET_CODE (op1) == NEG)
5252             op1 = XEXP (op1, 0);
5253
5254           if (maybe_fma)
5255             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
5256             cost += extra_cost->fp[mode == DFmode].fma;
5257           else
5258             /* FMUL/FNMUL.  */
5259             cost += extra_cost->fp[mode == DFmode].mult;
5260         }
5261
5262       cost += rtx_cost (op0, MULT, 0, speed)
5263               + rtx_cost (op1, MULT, 1, speed);
5264       return cost;
5265     }
5266 }
5267
5268 static int
5269 aarch64_address_cost (rtx x,
5270                       machine_mode mode,
5271                       addr_space_t as ATTRIBUTE_UNUSED,
5272                       bool speed)
5273 {
5274   enum rtx_code c = GET_CODE (x);
5275   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5276   struct aarch64_address_info info;
5277   int cost = 0;
5278   info.shift = 0;
5279
5280   if (!aarch64_classify_address (&info, x, mode, c, false))
5281     {
5282       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5283         {
5284           /* This is a CONST or SYMBOL ref which will be split
5285              in a different way depending on the code model in use.
5286              Cost it through the generic infrastructure.  */
5287           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5288           /* Divide through by the cost of one instruction to
5289              bring it to the same units as the address costs.  */
5290           cost_symbol_ref /= COSTS_N_INSNS (1);
5291           /* The cost is then the cost of preparing the address,
5292              followed by an immediate (possibly 0) offset.  */
5293           return cost_symbol_ref + addr_cost->imm_offset;
5294         }
5295       else
5296         {
5297           /* This is most likely a jump table from a case
5298              statement.  */
5299           return addr_cost->register_offset;
5300         }
5301     }
5302
5303   switch (info.type)
5304     {
5305       case ADDRESS_LO_SUM:
5306       case ADDRESS_SYMBOLIC:
5307       case ADDRESS_REG_IMM:
5308         cost += addr_cost->imm_offset;
5309         break;
5310
5311       case ADDRESS_REG_WB:
5312         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5313           cost += addr_cost->pre_modify;
5314         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5315           cost += addr_cost->post_modify;
5316         else
5317           gcc_unreachable ();
5318
5319         break;
5320
5321       case ADDRESS_REG_REG:
5322         cost += addr_cost->register_offset;
5323         break;
5324
5325       case ADDRESS_REG_UXTW:
5326       case ADDRESS_REG_SXTW:
5327         cost += addr_cost->register_extend;
5328         break;
5329
5330       default:
5331         gcc_unreachable ();
5332     }
5333
5334
5335   if (info.shift > 0)
5336     {
5337       /* For the sake of calculating the cost of the shifted register
5338          component, we can treat same sized modes in the same way.  */
5339       switch (GET_MODE_BITSIZE (mode))
5340         {
5341           case 16:
5342             cost += addr_cost->addr_scale_costs.hi;
5343             break;
5344
5345           case 32:
5346             cost += addr_cost->addr_scale_costs.si;
5347             break;
5348
5349           case 64:
5350             cost += addr_cost->addr_scale_costs.di;
5351             break;
5352
5353           /* We can't tell, or this is a 128-bit vector.  */
5354           default:
5355             cost += addr_cost->addr_scale_costs.ti;
5356             break;
5357         }
5358     }
5359
5360   return cost;
5361 }
5362
5363 /* Return true if the RTX X in mode MODE is a zero or sign extract
5364    usable in an ADD or SUB (extended register) instruction.  */
5365 static bool
5366 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5367 {
5368   /* Catch add with a sign extract.
5369      This is add_<optab><mode>_multp2.  */
5370   if (GET_CODE (x) == SIGN_EXTRACT
5371       || GET_CODE (x) == ZERO_EXTRACT)
5372     {
5373       rtx op0 = XEXP (x, 0);
5374       rtx op1 = XEXP (x, 1);
5375       rtx op2 = XEXP (x, 2);
5376
5377       if (GET_CODE (op0) == MULT
5378           && CONST_INT_P (op1)
5379           && op2 == const0_rtx
5380           && CONST_INT_P (XEXP (op0, 1))
5381           && aarch64_is_extend_from_extract (mode,
5382                                              XEXP (op0, 1),
5383                                              op1))
5384         {
5385           return true;
5386         }
5387     }
5388
5389   return false;
5390 }
5391
5392 static bool
5393 aarch64_frint_unspec_p (unsigned int u)
5394 {
5395   switch (u)
5396     {
5397       case UNSPEC_FRINTZ:
5398       case UNSPEC_FRINTP:
5399       case UNSPEC_FRINTM:
5400       case UNSPEC_FRINTA:
5401       case UNSPEC_FRINTN:
5402       case UNSPEC_FRINTX:
5403       case UNSPEC_FRINTI:
5404         return true;
5405
5406       default:
5407         return false;
5408     }
5409 }
5410
5411 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5412    storing it in *COST.  Result is true if the total cost of the operation
5413    has now been calculated.  */
5414 static bool
5415 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5416 {
5417   rtx inner;
5418   rtx comparator;
5419   enum rtx_code cmpcode;
5420
5421   if (COMPARISON_P (op0))
5422     {
5423       inner = XEXP (op0, 0);
5424       comparator = XEXP (op0, 1);
5425       cmpcode = GET_CODE (op0);
5426     }
5427   else
5428     {
5429       inner = op0;
5430       comparator = const0_rtx;
5431       cmpcode = NE;
5432     }
5433
5434   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5435     {
5436       /* Conditional branch.  */
5437       if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5438         return true;
5439       else
5440         {
5441           if (cmpcode == NE || cmpcode == EQ)
5442             {
5443               if (comparator == const0_rtx)
5444                 {
5445                   /* TBZ/TBNZ/CBZ/CBNZ.  */
5446                   if (GET_CODE (inner) == ZERO_EXTRACT)
5447                     /* TBZ/TBNZ.  */
5448                     *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5449                                        0, speed);
5450                 else
5451                   /* CBZ/CBNZ.  */
5452                   *cost += rtx_cost (inner, cmpcode, 0, speed);
5453
5454                 return true;
5455               }
5456             }
5457           else if (cmpcode == LT || cmpcode == GE)
5458             {
5459               /* TBZ/TBNZ.  */
5460               if (comparator == const0_rtx)
5461                 return true;
5462             }
5463         }
5464     }
5465   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5466     {
5467       /* It's a conditional operation based on the status flags,
5468          so it must be some flavor of CSEL.  */
5469
5470       /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
5471       if (GET_CODE (op1) == NEG
5472           || GET_CODE (op1) == NOT
5473           || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5474         op1 = XEXP (op1, 0);
5475
5476       *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5477       *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5478       return true;
5479     }
5480
5481   /* We don't know what this is, cost all operands.  */
5482   return false;
5483 }
5484
5485 /* Calculate the cost of calculating X, storing it in *COST.  Result
5486    is true if the total cost of the operation has now been calculated.  */
5487 static bool
5488 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5489                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5490 {
5491   rtx op0, op1, op2;
5492   const struct cpu_cost_table *extra_cost
5493     = aarch64_tune_params->insn_extra_cost;
5494   machine_mode mode = GET_MODE (x);
5495
5496   /* By default, assume that everything has equivalent cost to the
5497      cheapest instruction.  Any additional costs are applied as a delta
5498      above this default.  */
5499   *cost = COSTS_N_INSNS (1);
5500
5501   /* TODO: The cost infrastructure currently does not handle
5502      vector operations.  Assume that all vector operations
5503      are equally expensive.  */
5504   if (VECTOR_MODE_P (mode))
5505     {
5506       if (speed)
5507         *cost += extra_cost->vect.alu;
5508       return true;
5509     }
5510
5511   switch (code)
5512     {
5513     case SET:
5514       /* The cost depends entirely on the operands to SET.  */
5515       *cost = 0;
5516       op0 = SET_DEST (x);
5517       op1 = SET_SRC (x);
5518
5519       switch (GET_CODE (op0))
5520         {
5521         case MEM:
5522           if (speed)
5523             {
5524               rtx address = XEXP (op0, 0);
5525               if (GET_MODE_CLASS (mode) == MODE_INT)
5526                 *cost += extra_cost->ldst.store;
5527               else if (mode == SFmode)
5528                 *cost += extra_cost->ldst.storef;
5529               else if (mode == DFmode)
5530                 *cost += extra_cost->ldst.stored;
5531
5532               *cost +=
5533                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5534                                                      0, speed));
5535             }
5536
5537           *cost += rtx_cost (op1, SET, 1, speed);
5538           return true;
5539
5540         case SUBREG:
5541           if (! REG_P (SUBREG_REG (op0)))
5542             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5543
5544           /* Fall through.  */
5545         case REG:
5546           /* const0_rtx is in general free, but we will use an
5547              instruction to set a register to 0.  */
5548           if (REG_P (op1) || op1 == const0_rtx)
5549             {
5550               /* The cost is 1 per register copied.  */
5551               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5552                               / UNITS_PER_WORD;
5553               *cost = COSTS_N_INSNS (n_minus_1 + 1);
5554             }
5555           else
5556             /* Cost is just the cost of the RHS of the set.  */
5557             *cost += rtx_cost (op1, SET, 1, speed);
5558           return true;
5559
5560         case ZERO_EXTRACT:
5561         case SIGN_EXTRACT:
5562           /* Bit-field insertion.  Strip any redundant widening of
5563              the RHS to meet the width of the target.  */
5564           if (GET_CODE (op1) == SUBREG)
5565             op1 = SUBREG_REG (op1);
5566           if ((GET_CODE (op1) == ZERO_EXTEND
5567                || GET_CODE (op1) == SIGN_EXTEND)
5568               && CONST_INT_P (XEXP (op0, 1))
5569               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5570                   >= INTVAL (XEXP (op0, 1))))
5571             op1 = XEXP (op1, 0);
5572
5573           if (CONST_INT_P (op1))
5574             {
5575               /* MOV immediate is assumed to always be cheap.  */
5576               *cost = COSTS_N_INSNS (1);
5577             }
5578           else
5579             {
5580               /* BFM.  */
5581               if (speed)
5582                 *cost += extra_cost->alu.bfi;
5583               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5584             }
5585
5586           return true;
5587
5588         default:
5589           /* We can't make sense of this, assume default cost.  */
5590           *cost = COSTS_N_INSNS (1);
5591           return false;
5592         }
5593       return false;
5594
5595     case CONST_INT:
5596       /* If an instruction can incorporate a constant within the
5597          instruction, the instruction's expression avoids calling
5598          rtx_cost() on the constant.  If rtx_cost() is called on a
5599          constant, then it is usually because the constant must be
5600          moved into a register by one or more instructions.
5601
5602          The exception is constant 0, which can be expressed
5603          as XZR/WZR and is therefore free.  The exception to this is
5604          if we have (set (reg) (const0_rtx)) in which case we must cost
5605          the move.  However, we can catch that when we cost the SET, so
5606          we don't need to consider that here.  */
5607       if (x == const0_rtx)
5608         *cost = 0;
5609       else
5610         {
5611           /* To an approximation, building any other constant is
5612              proportionally expensive to the number of instructions
5613              required to build that constant.  This is true whether we
5614              are compiling for SPEED or otherwise.  */
5615           *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5616                                  (NULL_RTX, x, false, mode));
5617         }
5618       return true;
5619
5620     case CONST_DOUBLE:
5621       if (speed)
5622         {
5623           /* mov[df,sf]_aarch64.  */
5624           if (aarch64_float_const_representable_p (x))
5625             /* FMOV (scalar immediate).  */
5626             *cost += extra_cost->fp[mode == DFmode].fpconst;
5627           else if (!aarch64_float_const_zero_rtx_p (x))
5628             {
5629               /* This will be a load from memory.  */
5630               if (mode == DFmode)
5631                 *cost += extra_cost->ldst.loadd;
5632               else
5633                 *cost += extra_cost->ldst.loadf;
5634             }
5635           else
5636             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5637                or MOV v0.s[0], wzr - neither of which are modeled by the
5638                cost tables.  Just use the default cost.  */
5639             {
5640             }
5641         }
5642
5643       return true;
5644
5645     case MEM:
5646       if (speed)
5647         {
5648           /* For loads we want the base cost of a load, plus an
5649              approximation for the additional cost of the addressing
5650              mode.  */
5651           rtx address = XEXP (x, 0);
5652           if (GET_MODE_CLASS (mode) == MODE_INT)
5653             *cost += extra_cost->ldst.load;
5654           else if (mode == SFmode)
5655             *cost += extra_cost->ldst.loadf;
5656           else if (mode == DFmode)
5657             *cost += extra_cost->ldst.loadd;
5658
5659           *cost +=
5660                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5661                                                      0, speed));
5662         }
5663
5664       return true;
5665
5666     case NEG:
5667       op0 = XEXP (x, 0);
5668
5669       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5670        {
5671           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5672               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5673             {
5674               /* CSETM.  */
5675               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5676               return true;
5677             }
5678
5679           /* Cost this as SUB wzr, X.  */
5680           op0 = CONST0_RTX (GET_MODE (x));
5681           op1 = XEXP (x, 0);
5682           goto cost_minus;
5683         }
5684
5685       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5686         {
5687           /* Support (neg(fma...)) as a single instruction only if
5688              sign of zeros is unimportant.  This matches the decision
5689              making in aarch64.md.  */
5690           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5691             {
5692               /* FNMADD.  */
5693               *cost = rtx_cost (op0, NEG, 0, speed);
5694               return true;
5695             }
5696           if (speed)
5697             /* FNEG.  */
5698             *cost += extra_cost->fp[mode == DFmode].neg;
5699           return false;
5700         }
5701
5702       return false;
5703
5704     case CLRSB:
5705     case CLZ:
5706       if (speed)
5707         *cost += extra_cost->alu.clz;
5708
5709       return false;
5710
5711     case COMPARE:
5712       op0 = XEXP (x, 0);
5713       op1 = XEXP (x, 1);
5714
5715       if (op1 == const0_rtx
5716           && GET_CODE (op0) == AND)
5717         {
5718           x = op0;
5719           goto cost_logic;
5720         }
5721
5722       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5723         {
5724           /* TODO: A write to the CC flags possibly costs extra, this
5725              needs encoding in the cost tables.  */
5726
5727           /* CC_ZESWPmode supports zero extend for free.  */
5728           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5729             op0 = XEXP (op0, 0);
5730
5731           /* ANDS.  */
5732           if (GET_CODE (op0) == AND)
5733             {
5734               x = op0;
5735               goto cost_logic;
5736             }
5737
5738           if (GET_CODE (op0) == PLUS)
5739             {
5740               /* ADDS (and CMN alias).  */
5741               x = op0;
5742               goto cost_plus;
5743             }
5744
5745           if (GET_CODE (op0) == MINUS)
5746             {
5747               /* SUBS.  */
5748               x = op0;
5749               goto cost_minus;
5750             }
5751
5752           if (GET_CODE (op1) == NEG)
5753             {
5754               /* CMN.  */
5755               if (speed)
5756                 *cost += extra_cost->alu.arith;
5757
5758               *cost += rtx_cost (op0, COMPARE, 0, speed);
5759               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5760               return true;
5761             }
5762
5763           /* CMP.
5764
5765              Compare can freely swap the order of operands, and
5766              canonicalization puts the more complex operation first.
5767              But the integer MINUS logic expects the shift/extend
5768              operation in op1.  */
5769           if (! (REG_P (op0)
5770                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5771           {
5772             op0 = XEXP (x, 1);
5773             op1 = XEXP (x, 0);
5774           }
5775           goto cost_minus;
5776         }
5777
5778       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5779         {
5780           /* FCMP.  */
5781           if (speed)
5782             *cost += extra_cost->fp[mode == DFmode].compare;
5783
5784           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5785             {
5786               /* FCMP supports constant 0.0 for no extra cost. */
5787               return true;
5788             }
5789           return false;
5790         }
5791
5792       return false;
5793
5794     case MINUS:
5795       {
5796         op0 = XEXP (x, 0);
5797         op1 = XEXP (x, 1);
5798
5799 cost_minus:
5800         /* Detect valid immediates.  */
5801         if ((GET_MODE_CLASS (mode) == MODE_INT
5802              || (GET_MODE_CLASS (mode) == MODE_CC
5803                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5804             && CONST_INT_P (op1)
5805             && aarch64_uimm12_shift (INTVAL (op1)))
5806           {
5807             *cost += rtx_cost (op0, MINUS, 0, speed);
5808
5809             if (speed)
5810               /* SUB(S) (immediate).  */
5811               *cost += extra_cost->alu.arith;
5812             return true;
5813
5814           }
5815
5816         /* Look for SUB (extended register).  */
5817         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5818           {
5819             if (speed)
5820               *cost += extra_cost->alu.arith_shift;
5821
5822             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5823                                (enum rtx_code) GET_CODE (op1),
5824                                0, speed);
5825             return true;
5826           }
5827
5828         rtx new_op1 = aarch64_strip_extend (op1);
5829
5830         /* Cost this as an FMA-alike operation.  */
5831         if ((GET_CODE (new_op1) == MULT
5832              || GET_CODE (new_op1) == ASHIFT)
5833             && code != COMPARE)
5834           {
5835             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5836                                             (enum rtx_code) code,
5837                                             speed);
5838             *cost += rtx_cost (op0, MINUS, 0, speed);
5839             return true;
5840           }
5841
5842         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5843
5844         if (speed)
5845           {
5846             if (GET_MODE_CLASS (mode) == MODE_INT)
5847               /* SUB(S).  */
5848               *cost += extra_cost->alu.arith;
5849             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5850               /* FSUB.  */
5851               *cost += extra_cost->fp[mode == DFmode].addsub;
5852           }
5853         return true;
5854       }
5855
5856     case PLUS:
5857       {
5858         rtx new_op0;
5859
5860         op0 = XEXP (x, 0);
5861         op1 = XEXP (x, 1);
5862
5863 cost_plus:
5864         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5865             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5866           {
5867             /* CSINC.  */
5868             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5869             *cost += rtx_cost (op1, PLUS, 1, speed);
5870             return true;
5871           }
5872
5873         if (GET_MODE_CLASS (mode) == MODE_INT
5874             && CONST_INT_P (op1)
5875             && aarch64_uimm12_shift (INTVAL (op1)))
5876           {
5877             *cost += rtx_cost (op0, PLUS, 0, speed);
5878
5879             if (speed)
5880               /* ADD (immediate).  */
5881               *cost += extra_cost->alu.arith;
5882             return true;
5883           }
5884
5885         /* Look for ADD (extended register).  */
5886         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5887           {
5888             if (speed)
5889               *cost += extra_cost->alu.arith_shift;
5890
5891             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5892                                (enum rtx_code) GET_CODE (op0),
5893                                0, speed);
5894             return true;
5895           }
5896
5897         /* Strip any extend, leave shifts behind as we will
5898            cost them through mult_cost.  */
5899         new_op0 = aarch64_strip_extend (op0);
5900
5901         if (GET_CODE (new_op0) == MULT
5902             || GET_CODE (new_op0) == ASHIFT)
5903           {
5904             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5905                                             speed);
5906             *cost += rtx_cost (op1, PLUS, 1, speed);
5907             return true;
5908           }
5909
5910         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5911                   + rtx_cost (op1, PLUS, 1, speed));
5912
5913         if (speed)
5914           {
5915             if (GET_MODE_CLASS (mode) == MODE_INT)
5916               /* ADD.  */
5917               *cost += extra_cost->alu.arith;
5918             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5919               /* FADD.  */
5920               *cost += extra_cost->fp[mode == DFmode].addsub;
5921           }
5922         return true;
5923       }
5924
5925     case BSWAP:
5926       *cost = COSTS_N_INSNS (1);
5927
5928       if (speed)
5929         *cost += extra_cost->alu.rev;
5930
5931       return false;
5932
5933     case IOR:
5934       if (aarch_rev16_p (x))
5935         {
5936           *cost = COSTS_N_INSNS (1);
5937
5938           if (speed)
5939             *cost += extra_cost->alu.rev;
5940
5941           return true;
5942         }
5943     /* Fall through.  */
5944     case XOR:
5945     case AND:
5946     cost_logic:
5947       op0 = XEXP (x, 0);
5948       op1 = XEXP (x, 1);
5949
5950       if (code == AND
5951           && GET_CODE (op0) == MULT
5952           && CONST_INT_P (XEXP (op0, 1))
5953           && CONST_INT_P (op1)
5954           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5955                                INTVAL (op1)) != 0)
5956         {
5957           /* This is a UBFM/SBFM.  */
5958           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5959           if (speed)
5960             *cost += extra_cost->alu.bfx;
5961           return true;
5962         }
5963
5964       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5965         {
5966           /* We possibly get the immediate for free, this is not
5967              modelled.  */
5968           if (CONST_INT_P (op1)
5969               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5970             {
5971               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5972
5973               if (speed)
5974                 *cost += extra_cost->alu.logical;
5975
5976               return true;
5977             }
5978           else
5979             {
5980               rtx new_op0 = op0;
5981
5982               /* Handle ORN, EON, or BIC.  */
5983               if (GET_CODE (op0) == NOT)
5984                 op0 = XEXP (op0, 0);
5985
5986               new_op0 = aarch64_strip_shift (op0);
5987
5988               /* If we had a shift on op0 then this is a logical-shift-
5989                  by-register/immediate operation.  Otherwise, this is just
5990                  a logical operation.  */
5991               if (speed)
5992                 {
5993                   if (new_op0 != op0)
5994                     {
5995                       /* Shift by immediate.  */
5996                       if (CONST_INT_P (XEXP (op0, 1)))
5997                         *cost += extra_cost->alu.log_shift;
5998                       else
5999                         *cost += extra_cost->alu.log_shift_reg;
6000                     }
6001                   else
6002                     *cost += extra_cost->alu.logical;
6003                 }
6004
6005               /* In both cases we want to cost both operands.  */
6006               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6007                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6008
6009               return true;
6010             }
6011         }
6012       return false;
6013
6014     case NOT:
6015       /* MVN.  */
6016       if (speed)
6017         *cost += extra_cost->alu.logical;
6018
6019       /* The logical instruction could have the shifted register form,
6020          but the cost is the same if the shift is processed as a separate
6021          instruction, so we don't bother with it here.  */
6022       return false;
6023
6024     case ZERO_EXTEND:
6025
6026       op0 = XEXP (x, 0);
6027       /* If a value is written in SI mode, then zero extended to DI
6028          mode, the operation will in general be free as a write to
6029          a 'w' register implicitly zeroes the upper bits of an 'x'
6030          register.  However, if this is
6031
6032            (set (reg) (zero_extend (reg)))
6033
6034          we must cost the explicit register move.  */
6035       if (mode == DImode
6036           && GET_MODE (op0) == SImode
6037           && outer == SET)
6038         {
6039           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6040
6041           if (!op_cost && speed)
6042             /* MOV.  */
6043             *cost += extra_cost->alu.extend;
6044           else
6045             /* Free, the cost is that of the SI mode operation.  */
6046             *cost = op_cost;
6047
6048           return true;
6049         }
6050       else if (MEM_P (XEXP (x, 0)))
6051         {
6052           /* All loads can zero extend to any size for free.  */
6053           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
6054           return true;
6055         }
6056
6057       /* UXTB/UXTH.  */
6058       if (speed)
6059         *cost += extra_cost->alu.extend;
6060
6061       return false;
6062
6063     case SIGN_EXTEND:
6064       if (MEM_P (XEXP (x, 0)))
6065         {
6066           /* LDRSH.  */
6067           if (speed)
6068             {
6069               rtx address = XEXP (XEXP (x, 0), 0);
6070               *cost += extra_cost->ldst.load_sign_extend;
6071
6072               *cost +=
6073                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6074                                                      0, speed));
6075             }
6076           return true;
6077         }
6078
6079       if (speed)
6080         *cost += extra_cost->alu.extend;
6081       return false;
6082
6083     case ASHIFT:
6084       op0 = XEXP (x, 0);
6085       op1 = XEXP (x, 1);
6086
6087       if (CONST_INT_P (op1))
6088         {
6089           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
6090              aliases.  */
6091           if (speed)
6092             *cost += extra_cost->alu.shift;
6093
6094           /* We can incorporate zero/sign extend for free.  */
6095           if (GET_CODE (op0) == ZERO_EXTEND
6096               || GET_CODE (op0) == SIGN_EXTEND)
6097             op0 = XEXP (op0, 0);
6098
6099           *cost += rtx_cost (op0, ASHIFT, 0, speed);
6100           return true;
6101         }
6102       else
6103         {
6104           /* LSLV.  */
6105           if (speed)
6106             *cost += extra_cost->alu.shift_reg;
6107
6108           return false;  /* All arguments need to be in registers.  */
6109         }
6110
6111     case ROTATE:
6112     case ROTATERT:
6113     case LSHIFTRT:
6114     case ASHIFTRT:
6115       op0 = XEXP (x, 0);
6116       op1 = XEXP (x, 1);
6117
6118       if (CONST_INT_P (op1))
6119         {
6120           /* ASR (immediate) and friends.  */
6121           if (speed)
6122             *cost += extra_cost->alu.shift;
6123
6124           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6125           return true;
6126         }
6127       else
6128         {
6129
6130           /* ASR (register) and friends.  */
6131           if (speed)
6132             *cost += extra_cost->alu.shift_reg;
6133
6134           return false;  /* All arguments need to be in registers.  */
6135         }
6136
6137     case SYMBOL_REF:
6138
6139       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6140         {
6141           /* LDR.  */
6142           if (speed)
6143             *cost += extra_cost->ldst.load;
6144         }
6145       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6146                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6147         {
6148           /* ADRP, followed by ADD.  */
6149           *cost += COSTS_N_INSNS (1);
6150           if (speed)
6151             *cost += 2 * extra_cost->alu.arith;
6152         }
6153       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6154                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6155         {
6156           /* ADR.  */
6157           if (speed)
6158             *cost += extra_cost->alu.arith;
6159         }
6160
6161       if (flag_pic)
6162         {
6163           /* One extra load instruction, after accessing the GOT.  */
6164           *cost += COSTS_N_INSNS (1);
6165           if (speed)
6166             *cost += extra_cost->ldst.load;
6167         }
6168       return true;
6169
6170     case HIGH:
6171     case LO_SUM:
6172       /* ADRP/ADD (immediate).  */
6173       if (speed)
6174         *cost += extra_cost->alu.arith;
6175       return true;
6176
6177     case ZERO_EXTRACT:
6178     case SIGN_EXTRACT:
6179       /* UBFX/SBFX.  */
6180       if (speed)
6181         *cost += extra_cost->alu.bfx;
6182
6183       /* We can trust that the immediates used will be correct (there
6184          are no by-register forms), so we need only cost op0.  */
6185       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
6186       return true;
6187
6188     case MULT:
6189       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6190       /* aarch64_rtx_mult_cost always handles recursion to its
6191          operands.  */
6192       return true;
6193
6194     case MOD:
6195     case UMOD:
6196       if (speed)
6197         {
6198           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6199             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6200                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
6201           else if (GET_MODE (x) == DFmode)
6202             *cost += (extra_cost->fp[1].mult
6203                       + extra_cost->fp[1].div);
6204           else if (GET_MODE (x) == SFmode)
6205             *cost += (extra_cost->fp[0].mult
6206                       + extra_cost->fp[0].div);
6207         }
6208       return false;  /* All arguments need to be in registers.  */
6209
6210     case DIV:
6211     case UDIV:
6212     case SQRT:
6213       if (speed)
6214         {
6215           if (GET_MODE_CLASS (mode) == MODE_INT)
6216             /* There is no integer SQRT, so only DIV and UDIV can get
6217                here.  */
6218             *cost += extra_cost->mult[mode == DImode].idiv;
6219           else
6220             *cost += extra_cost->fp[mode == DFmode].div;
6221         }
6222       return false;  /* All arguments need to be in registers.  */
6223
6224     case IF_THEN_ELSE:
6225       return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6226                                          XEXP (x, 2), cost, speed);
6227
6228     case EQ:
6229     case NE:
6230     case GT:
6231     case GTU:
6232     case LT:
6233     case LTU:
6234     case GE:
6235     case GEU:
6236     case LE:
6237     case LEU:
6238
6239       return false; /* All arguments must be in registers.  */
6240
6241     case FMA:
6242       op0 = XEXP (x, 0);
6243       op1 = XEXP (x, 1);
6244       op2 = XEXP (x, 2);
6245
6246       if (speed)
6247         *cost += extra_cost->fp[mode == DFmode].fma;
6248
6249       /* FMSUB, FNMADD, and FNMSUB are free.  */
6250       if (GET_CODE (op0) == NEG)
6251         op0 = XEXP (op0, 0);
6252
6253       if (GET_CODE (op2) == NEG)
6254         op2 = XEXP (op2, 0);
6255
6256       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6257          and the by-element operand as operand 0.  */
6258       if (GET_CODE (op1) == NEG)
6259         op1 = XEXP (op1, 0);
6260
6261       /* Catch vector-by-element operations.  The by-element operand can
6262          either be (vec_duplicate (vec_select (x))) or just
6263          (vec_select (x)), depending on whether we are multiplying by
6264          a vector or a scalar.
6265
6266          Canonicalization is not very good in these cases, FMA4 will put the
6267          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
6268       if (GET_CODE (op0) == VEC_DUPLICATE)
6269         op0 = XEXP (op0, 0);
6270       else if (GET_CODE (op1) == VEC_DUPLICATE)
6271         op1 = XEXP (op1, 0);
6272
6273       if (GET_CODE (op0) == VEC_SELECT)
6274         op0 = XEXP (op0, 0);
6275       else if (GET_CODE (op1) == VEC_SELECT)
6276         op1 = XEXP (op1, 0);
6277
6278       /* If the remaining parameters are not registers,
6279          get the cost to put them into registers.  */
6280       *cost += rtx_cost (op0, FMA, 0, speed);
6281       *cost += rtx_cost (op1, FMA, 1, speed);
6282       *cost += rtx_cost (op2, FMA, 2, speed);
6283       return true;
6284
6285     case FLOAT_EXTEND:
6286       if (speed)
6287         *cost += extra_cost->fp[mode == DFmode].widen;
6288       return false;
6289
6290     case FLOAT_TRUNCATE:
6291       if (speed)
6292         *cost += extra_cost->fp[mode == DFmode].narrow;
6293       return false;
6294
6295     case FIX:
6296     case UNSIGNED_FIX:
6297       x = XEXP (x, 0);
6298       /* Strip the rounding part.  They will all be implemented
6299          by the fcvt* family of instructions anyway.  */
6300       if (GET_CODE (x) == UNSPEC)
6301         {
6302           unsigned int uns_code = XINT (x, 1);
6303
6304           if (uns_code == UNSPEC_FRINTA
6305               || uns_code == UNSPEC_FRINTM
6306               || uns_code == UNSPEC_FRINTN
6307               || uns_code == UNSPEC_FRINTP
6308               || uns_code == UNSPEC_FRINTZ)
6309             x = XVECEXP (x, 0, 0);
6310         }
6311
6312       if (speed)
6313         *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6314
6315       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6316       return true;
6317
6318     case ABS:
6319       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6320         {
6321           /* FABS and FNEG are analogous.  */
6322           if (speed)
6323             *cost += extra_cost->fp[mode == DFmode].neg;
6324         }
6325       else
6326         {
6327           /* Integer ABS will either be split to
6328              two arithmetic instructions, or will be an ABS
6329              (scalar), which we don't model.  */
6330           *cost = COSTS_N_INSNS (2);
6331           if (speed)
6332             *cost += 2 * extra_cost->alu.arith;
6333         }
6334       return false;
6335
6336     case SMAX:
6337     case SMIN:
6338       if (speed)
6339         {
6340           /* FMAXNM/FMINNM/FMAX/FMIN.
6341              TODO: This may not be accurate for all implementations, but
6342              we do not model this in the cost tables.  */
6343           *cost += extra_cost->fp[mode == DFmode].addsub;
6344         }
6345       return false;
6346
6347     case UNSPEC:
6348       /* The floating point round to integer frint* instructions.  */
6349       if (aarch64_frint_unspec_p (XINT (x, 1)))
6350         {
6351           if (speed)
6352             *cost += extra_cost->fp[mode == DFmode].roundint;
6353
6354           return false;
6355         }
6356
6357       if (XINT (x, 1) == UNSPEC_RBIT)
6358         {
6359           if (speed)
6360             *cost += extra_cost->alu.rev;
6361
6362           return false;
6363         }
6364       break;
6365
6366     case TRUNCATE:
6367
6368       /* Decompose <su>muldi3_highpart.  */
6369       if (/* (truncate:DI  */
6370           mode == DImode
6371           /*   (lshiftrt:TI  */
6372           && GET_MODE (XEXP (x, 0)) == TImode
6373           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6374           /*      (mult:TI  */
6375           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6376           /*        (ANY_EXTEND:TI (reg:DI))
6377                     (ANY_EXTEND:TI (reg:DI)))  */
6378           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6379                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6380               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6381                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6382           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6383           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6384           /*     (const_int 64)  */
6385           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6386           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6387         {
6388           /* UMULH/SMULH.  */
6389           if (speed)
6390             *cost += extra_cost->mult[mode == DImode].extend;
6391           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6392                              MULT, 0, speed);
6393           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6394                              MULT, 1, speed);
6395           return true;
6396         }
6397
6398       /* Fall through.  */
6399     default:
6400       break;
6401     }
6402
6403   if (dump_file && (dump_flags & TDF_DETAILS))
6404     fprintf (dump_file,
6405       "\nFailed to cost RTX.  Assuming default cost.\n");
6406
6407   return true;
6408 }
6409
6410 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6411    calculated for X.  This cost is stored in *COST.  Returns true
6412    if the total cost of X was calculated.  */
6413 static bool
6414 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6415                    int param, int *cost, bool speed)
6416 {
6417   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6418
6419   if (dump_file && (dump_flags & TDF_DETAILS))
6420     {
6421       print_rtl_single (dump_file, x);
6422       fprintf (dump_file, "\n%s cost: %d (%s)\n",
6423                speed ? "Hot" : "Cold",
6424                *cost, result ? "final" : "partial");
6425     }
6426
6427   return result;
6428 }
6429
6430 static int
6431 aarch64_register_move_cost (machine_mode mode,
6432                             reg_class_t from_i, reg_class_t to_i)
6433 {
6434   enum reg_class from = (enum reg_class) from_i;
6435   enum reg_class to = (enum reg_class) to_i;
6436   const struct cpu_regmove_cost *regmove_cost
6437     = aarch64_tune_params->regmove_cost;
6438
6439   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
6440   if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6441     to = GENERAL_REGS;
6442
6443   if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6444     from = GENERAL_REGS;
6445
6446   /* Moving between GPR and stack cost is the same as GP2GP.  */
6447   if ((from == GENERAL_REGS && to == STACK_REG)
6448       || (to == GENERAL_REGS && from == STACK_REG))
6449     return regmove_cost->GP2GP;
6450
6451   /* To/From the stack register, we move via the gprs.  */
6452   if (to == STACK_REG || from == STACK_REG)
6453     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6454             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6455
6456   if (GET_MODE_SIZE (mode) == 16)
6457     {
6458       /* 128-bit operations on general registers require 2 instructions.  */
6459       if (from == GENERAL_REGS && to == GENERAL_REGS)
6460         return regmove_cost->GP2GP * 2;
6461       else if (from == GENERAL_REGS)
6462         return regmove_cost->GP2FP * 2;
6463       else if (to == GENERAL_REGS)
6464         return regmove_cost->FP2GP * 2;
6465
6466       /* When AdvSIMD instructions are disabled it is not possible to move
6467          a 128-bit value directly between Q registers.  This is handled in
6468          secondary reload.  A general register is used as a scratch to move
6469          the upper DI value and the lower DI value is moved directly,
6470          hence the cost is the sum of three moves. */
6471       if (! TARGET_SIMD)
6472         return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6473
6474       return regmove_cost->FP2FP;
6475     }
6476
6477   if (from == GENERAL_REGS && to == GENERAL_REGS)
6478     return regmove_cost->GP2GP;
6479   else if (from == GENERAL_REGS)
6480     return regmove_cost->GP2FP;
6481   else if (to == GENERAL_REGS)
6482     return regmove_cost->FP2GP;
6483
6484   return regmove_cost->FP2FP;
6485 }
6486
6487 static int
6488 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6489                           reg_class_t rclass ATTRIBUTE_UNUSED,
6490                           bool in ATTRIBUTE_UNUSED)
6491 {
6492   return aarch64_tune_params->memmov_cost;
6493 }
6494
6495 /* Return the number of instructions that can be issued per cycle.  */
6496 static int
6497 aarch64_sched_issue_rate (void)
6498 {
6499   return aarch64_tune_params->issue_rate;
6500 }
6501
6502 /* Vectorizer cost model target hooks.  */
6503
6504 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6505 static int
6506 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6507                                     tree vectype,
6508                                     int misalign ATTRIBUTE_UNUSED)
6509 {
6510   unsigned elements;
6511
6512   switch (type_of_cost)
6513     {
6514       case scalar_stmt:
6515         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6516
6517       case scalar_load:
6518         return aarch64_tune_params->vec_costs->scalar_load_cost;
6519
6520       case scalar_store:
6521         return aarch64_tune_params->vec_costs->scalar_store_cost;
6522
6523       case vector_stmt:
6524         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6525
6526       case vector_load:
6527         return aarch64_tune_params->vec_costs->vec_align_load_cost;
6528
6529       case vector_store:
6530         return aarch64_tune_params->vec_costs->vec_store_cost;
6531
6532       case vec_to_scalar:
6533         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6534
6535       case scalar_to_vec:
6536         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6537
6538       case unaligned_load:
6539         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6540
6541       case unaligned_store:
6542         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6543
6544       case cond_branch_taken:
6545         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6546
6547       case cond_branch_not_taken:
6548         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6549
6550       case vec_perm:
6551       case vec_promote_demote:
6552         return aarch64_tune_params->vec_costs->vec_stmt_cost;
6553
6554       case vec_construct:
6555         elements = TYPE_VECTOR_SUBPARTS (vectype);
6556         return elements / 2 + 1;
6557
6558       default:
6559         gcc_unreachable ();
6560     }
6561 }
6562
6563 /* Implement targetm.vectorize.add_stmt_cost.  */
6564 static unsigned
6565 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6566                        struct _stmt_vec_info *stmt_info, int misalign,
6567                        enum vect_cost_model_location where)
6568 {
6569   unsigned *cost = (unsigned *) data;
6570   unsigned retval = 0;
6571
6572   if (flag_vect_cost_model)
6573     {
6574       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6575       int stmt_cost =
6576             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6577
6578       /* Statements in an inner loop relative to the loop being
6579          vectorized are weighted more heavily.  The value here is
6580          a function (linear for now) of the loop nest level.  */
6581       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6582         {
6583           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6584           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
6585           unsigned nest_level = loop_depth (loop);
6586
6587           count *= nest_level;
6588         }
6589
6590       retval = (unsigned) (count * stmt_cost);
6591       cost[where] += retval;
6592     }
6593
6594   return retval;
6595 }
6596
6597 static void initialize_aarch64_code_model (void);
6598
6599 /* Parse the architecture extension string.  */
6600
6601 static void
6602 aarch64_parse_extension (char *str)
6603 {
6604   /* The extension string is parsed left to right.  */
6605   const struct aarch64_option_extension *opt = NULL;
6606
6607   /* Flag to say whether we are adding or removing an extension.  */
6608   int adding_ext = -1;
6609
6610   while (str != NULL && *str != 0)
6611     {
6612       char *ext;
6613       size_t len;
6614
6615       str++;
6616       ext = strchr (str, '+');
6617
6618       if (ext != NULL)
6619         len = ext - str;
6620       else
6621         len = strlen (str);
6622
6623       if (len >= 2 && strncmp (str, "no", 2) == 0)
6624         {
6625           adding_ext = 0;
6626           len -= 2;
6627           str += 2;
6628         }
6629       else if (len > 0)
6630         adding_ext = 1;
6631
6632       if (len == 0)
6633         {
6634           error ("missing feature modifier after %qs", adding_ext ? "+"
6635                                                                   : "+no");
6636           return;
6637         }
6638
6639       /* Scan over the extensions table trying to find an exact match.  */
6640       for (opt = all_extensions; opt->name != NULL; opt++)
6641         {
6642           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6643             {
6644               /* Add or remove the extension.  */
6645               if (adding_ext)
6646                 aarch64_isa_flags |= opt->flags_on;
6647               else
6648                 aarch64_isa_flags &= ~(opt->flags_off);
6649               break;
6650             }
6651         }
6652
6653       if (opt->name == NULL)
6654         {
6655           /* Extension not found in list.  */
6656           error ("unknown feature modifier %qs", str);
6657           return;
6658         }
6659
6660       str = ext;
6661     };
6662
6663   return;
6664 }
6665
6666 /* Parse the ARCH string.  */
6667
6668 static void
6669 aarch64_parse_arch (void)
6670 {
6671   char *ext;
6672   const struct processor *arch;
6673   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6674   size_t len;
6675
6676   strcpy (str, aarch64_arch_string);
6677
6678   ext = strchr (str, '+');
6679
6680   if (ext != NULL)
6681     len = ext - str;
6682   else
6683     len = strlen (str);
6684
6685   if (len == 0)
6686     {
6687       error ("missing arch name in -march=%qs", str);
6688       return;
6689     }
6690
6691   /* Loop through the list of supported ARCHs to find a match.  */
6692   for (arch = all_architectures; arch->name != NULL; arch++)
6693     {
6694       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6695         {
6696           selected_arch = arch;
6697           aarch64_isa_flags = selected_arch->flags;
6698
6699           if (!selected_cpu)
6700             selected_cpu = &all_cores[selected_arch->core];
6701
6702           if (ext != NULL)
6703             {
6704               /* ARCH string contains at least one extension.  */
6705               aarch64_parse_extension (ext);
6706             }
6707
6708           if (strcmp (selected_arch->arch, selected_cpu->arch))
6709             {
6710               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6711                        selected_cpu->name, selected_arch->name);
6712             }
6713
6714           return;
6715         }
6716     }
6717
6718   /* ARCH name not found in list.  */
6719   error ("unknown value %qs for -march", str);
6720   return;
6721 }
6722
6723 /* Parse the CPU string.  */
6724
6725 static void
6726 aarch64_parse_cpu (void)
6727 {
6728   char *ext;
6729   const struct processor *cpu;
6730   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6731   size_t len;
6732
6733   strcpy (str, aarch64_cpu_string);
6734
6735   ext = strchr (str, '+');
6736
6737   if (ext != NULL)
6738     len = ext - str;
6739   else
6740     len = strlen (str);
6741
6742   if (len == 0)
6743     {
6744       error ("missing cpu name in -mcpu=%qs", str);
6745       return;
6746     }
6747
6748   /* Loop through the list of supported CPUs to find a match.  */
6749   for (cpu = all_cores; cpu->name != NULL; cpu++)
6750     {
6751       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6752         {
6753           selected_cpu = cpu;
6754           aarch64_isa_flags = selected_cpu->flags;
6755
6756           if (ext != NULL)
6757             {
6758               /* CPU string contains at least one extension.  */
6759               aarch64_parse_extension (ext);
6760             }
6761
6762           return;
6763         }
6764     }
6765
6766   /* CPU name not found in list.  */
6767   error ("unknown value %qs for -mcpu", str);
6768   return;
6769 }
6770
6771 /* Parse the TUNE string.  */
6772
6773 static void
6774 aarch64_parse_tune (void)
6775 {
6776   const struct processor *cpu;
6777   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6778   strcpy (str, aarch64_tune_string);
6779
6780   /* Loop through the list of supported CPUs to find a match.  */
6781   for (cpu = all_cores; cpu->name != NULL; cpu++)
6782     {
6783       if (strcmp (cpu->name, str) == 0)
6784         {
6785           selected_tune = cpu;
6786           return;
6787         }
6788     }
6789
6790   /* CPU name not found in list.  */
6791   error ("unknown value %qs for -mtune", str);
6792   return;
6793 }
6794
6795
6796 /* Implement TARGET_OPTION_OVERRIDE.  */
6797
6798 static void
6799 aarch64_override_options (void)
6800 {
6801   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6802      If either of -march or -mtune is given, they override their
6803      respective component of -mcpu.
6804
6805      So, first parse AARCH64_CPU_STRING, then the others, be careful
6806      with -march as, if -mcpu is not present on the command line, march
6807      must set a sensible default CPU.  */
6808   if (aarch64_cpu_string)
6809     {
6810       aarch64_parse_cpu ();
6811     }
6812
6813   if (aarch64_arch_string)
6814     {
6815       aarch64_parse_arch ();
6816     }
6817
6818   if (aarch64_tune_string)
6819     {
6820       aarch64_parse_tune ();
6821     }
6822
6823 #ifndef HAVE_AS_MABI_OPTION
6824   /* The compiler may have been configured with 2.23.* binutils, which does
6825      not have support for ILP32.  */
6826   if (TARGET_ILP32)
6827     error ("Assembler does not support -mabi=ilp32");
6828 #endif
6829
6830   initialize_aarch64_code_model ();
6831
6832   aarch64_build_bitmask_table ();
6833
6834   /* This target defaults to strict volatile bitfields.  */
6835   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6836     flag_strict_volatile_bitfields = 1;
6837
6838   /* If the user did not specify a processor, choose the default
6839      one for them.  This will be the CPU set during configuration using
6840      --with-cpu, otherwise it is "generic".  */
6841   if (!selected_cpu)
6842     {
6843       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6844       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6845     }
6846
6847   gcc_assert (selected_cpu);
6848
6849   if (!selected_tune)
6850     selected_tune = selected_cpu;
6851
6852   aarch64_tune_flags = selected_tune->flags;
6853   aarch64_tune = selected_tune->core;
6854   aarch64_tune_params = selected_tune->tune;
6855   aarch64_architecture_version = selected_cpu->architecture_version;
6856
6857   if (aarch64_fix_a53_err835769 == 2)
6858     {
6859 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6860       aarch64_fix_a53_err835769 = 1;
6861 #else
6862       aarch64_fix_a53_err835769 = 0;
6863 #endif
6864     }
6865
6866   /* If not opzimizing for size, set the default
6867      alignment to what the target wants */
6868   if (!optimize_size)
6869     {
6870       if (align_loops <= 0)
6871         align_loops = aarch64_tune_params->loop_align;
6872       if (align_jumps <= 0)
6873         align_jumps = aarch64_tune_params->jump_align;
6874       if (align_functions <= 0)
6875         align_functions = aarch64_tune_params->function_align;
6876     }
6877
6878   aarch64_override_options_after_change ();
6879 }
6880
6881 /* Implement targetm.override_options_after_change.  */
6882
6883 static void
6884 aarch64_override_options_after_change (void)
6885 {
6886   if (flag_omit_frame_pointer)
6887     flag_omit_leaf_frame_pointer = false;
6888   else if (flag_omit_leaf_frame_pointer)
6889     flag_omit_frame_pointer = true;
6890 }
6891
6892 static struct machine_function *
6893 aarch64_init_machine_status (void)
6894 {
6895   struct machine_function *machine;
6896   machine = ggc_cleared_alloc<machine_function> ();
6897   return machine;
6898 }
6899
6900 void
6901 aarch64_init_expanders (void)
6902 {
6903   init_machine_status = aarch64_init_machine_status;
6904 }
6905
6906 /* A checking mechanism for the implementation of the various code models.  */
6907 static void
6908 initialize_aarch64_code_model (void)
6909 {
6910    if (flag_pic)
6911      {
6912        switch (aarch64_cmodel_var)
6913          {
6914          case AARCH64_CMODEL_TINY:
6915            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6916            break;
6917          case AARCH64_CMODEL_SMALL:
6918            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6919            break;
6920          case AARCH64_CMODEL_LARGE:
6921            sorry ("code model %qs with -f%s", "large",
6922                   flag_pic > 1 ? "PIC" : "pic");
6923          default:
6924            gcc_unreachable ();
6925          }
6926      }
6927    else
6928      aarch64_cmodel = aarch64_cmodel_var;
6929 }
6930
6931 /* Return true if SYMBOL_REF X binds locally.  */
6932
6933 static bool
6934 aarch64_symbol_binds_local_p (const_rtx x)
6935 {
6936   return (SYMBOL_REF_DECL (x)
6937           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6938           : SYMBOL_REF_LOCAL_P (x));
6939 }
6940
6941 /* Return true if SYMBOL_REF X is thread local */
6942 static bool
6943 aarch64_tls_symbol_p (rtx x)
6944 {
6945   if (! TARGET_HAVE_TLS)
6946     return false;
6947
6948   if (GET_CODE (x) != SYMBOL_REF)
6949     return false;
6950
6951   return SYMBOL_REF_TLS_MODEL (x) != 0;
6952 }
6953
6954 /* Classify a TLS symbol into one of the TLS kinds.  */
6955 enum aarch64_symbol_type
6956 aarch64_classify_tls_symbol (rtx x)
6957 {
6958   enum tls_model tls_kind = tls_symbolic_operand_type (x);
6959
6960   switch (tls_kind)
6961     {
6962     case TLS_MODEL_GLOBAL_DYNAMIC:
6963     case TLS_MODEL_LOCAL_DYNAMIC:
6964       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6965
6966     case TLS_MODEL_INITIAL_EXEC:
6967       return SYMBOL_SMALL_GOTTPREL;
6968
6969     case TLS_MODEL_LOCAL_EXEC:
6970       return SYMBOL_SMALL_TPREL;
6971
6972     case TLS_MODEL_EMULATED:
6973     case TLS_MODEL_NONE:
6974       return SYMBOL_FORCE_TO_MEM;
6975
6976     default:
6977       gcc_unreachable ();
6978     }
6979 }
6980
6981 /* Return the method that should be used to access SYMBOL_REF or
6982    LABEL_REF X in context CONTEXT.  */
6983
6984 enum aarch64_symbol_type
6985 aarch64_classify_symbol (rtx x, rtx offset,
6986                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6987 {
6988   if (GET_CODE (x) == LABEL_REF)
6989     {
6990       switch (aarch64_cmodel)
6991         {
6992         case AARCH64_CMODEL_LARGE:
6993           return SYMBOL_FORCE_TO_MEM;
6994
6995         case AARCH64_CMODEL_TINY_PIC:
6996         case AARCH64_CMODEL_TINY:
6997           return SYMBOL_TINY_ABSOLUTE;
6998
6999         case AARCH64_CMODEL_SMALL_PIC:
7000         case AARCH64_CMODEL_SMALL:
7001           return SYMBOL_SMALL_ABSOLUTE;
7002
7003         default:
7004           gcc_unreachable ();
7005         }
7006     }
7007
7008   if (GET_CODE (x) == SYMBOL_REF)
7009     {
7010       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7011           return SYMBOL_FORCE_TO_MEM;
7012
7013       if (aarch64_tls_symbol_p (x))
7014         return aarch64_classify_tls_symbol (x);
7015
7016       switch (aarch64_cmodel)
7017         {
7018         case AARCH64_CMODEL_TINY:
7019           /* When we retreive symbol + offset address, we have to make sure
7020              the offset does not cause overflow of the final address.  But
7021              we have no way of knowing the address of symbol at compile time
7022              so we can't accurately say if the distance between the PC and
7023              symbol + offset is outside the addressible range of +/-1M in the
7024              TINY code model.  So we rely on images not being greater than
7025              1M and cap the offset at 1M and anything beyond 1M will have to
7026              be loaded using an alternative mechanism.  */
7027           if (SYMBOL_REF_WEAK (x)
7028               || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
7029             return SYMBOL_FORCE_TO_MEM;
7030           return SYMBOL_TINY_ABSOLUTE;
7031
7032         case AARCH64_CMODEL_SMALL:
7033           /* Same reasoning as the tiny code model, but the offset cap here is
7034              4G.  */
7035           if (SYMBOL_REF_WEAK (x)
7036               || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
7037               || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
7038             return SYMBOL_FORCE_TO_MEM;
7039           return SYMBOL_SMALL_ABSOLUTE;
7040
7041         case AARCH64_CMODEL_TINY_PIC:
7042           if (!aarch64_symbol_binds_local_p (x))
7043             return SYMBOL_TINY_GOT;
7044           return SYMBOL_TINY_ABSOLUTE;
7045
7046         case AARCH64_CMODEL_SMALL_PIC:
7047           if (!aarch64_symbol_binds_local_p (x))
7048             return SYMBOL_SMALL_GOT;
7049           return SYMBOL_SMALL_ABSOLUTE;
7050
7051         default:
7052           gcc_unreachable ();
7053         }
7054     }
7055
7056   /* By default push everything into the constant pool.  */
7057   return SYMBOL_FORCE_TO_MEM;
7058 }
7059
7060 bool
7061 aarch64_constant_address_p (rtx x)
7062 {
7063   return (CONSTANT_P (x) && memory_address_p (DImode, x));
7064 }
7065
7066 bool
7067 aarch64_legitimate_pic_operand_p (rtx x)
7068 {
7069   if (GET_CODE (x) == SYMBOL_REF
7070       || (GET_CODE (x) == CONST
7071           && GET_CODE (XEXP (x, 0)) == PLUS
7072           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7073      return false;
7074
7075   return true;
7076 }
7077
7078 /* Return true if X holds either a quarter-precision or
7079      floating-point +0.0 constant.  */
7080 static bool
7081 aarch64_valid_floating_const (machine_mode mode, rtx x)
7082 {
7083   if (!CONST_DOUBLE_P (x))
7084     return false;
7085
7086   /* TODO: We could handle moving 0.0 to a TFmode register,
7087      but first we would like to refactor the movtf_aarch64
7088      to be more amicable to split moves properly and
7089      correctly gate on TARGET_SIMD.  For now - reject all
7090      constants which are not to SFmode or DFmode registers.  */
7091   if (!(mode == SFmode || mode == DFmode))
7092     return false;
7093
7094   if (aarch64_float_const_zero_rtx_p (x))
7095     return true;
7096   return aarch64_float_const_representable_p (x);
7097 }
7098
7099 static bool
7100 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
7101 {
7102   /* Do not allow vector struct mode constants.  We could support
7103      0 and -1 easily, but they need support in aarch64-simd.md.  */
7104   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7105     return false;
7106
7107   /* This could probably go away because
7108      we now decompose CONST_INTs according to expand_mov_immediate.  */
7109   if ((GET_CODE (x) == CONST_VECTOR
7110        && aarch64_simd_valid_immediate (x, mode, false, NULL))
7111       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7112         return !targetm.cannot_force_const_mem (mode, x);
7113
7114   if (GET_CODE (x) == HIGH
7115       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7116     return true;
7117
7118   return aarch64_constant_address_p (x);
7119 }
7120
7121 rtx
7122 aarch64_load_tp (rtx target)
7123 {
7124   if (!target
7125       || GET_MODE (target) != Pmode
7126       || !register_operand (target, Pmode))
7127     target = gen_reg_rtx (Pmode);
7128
7129   /* Can return in any reg.  */
7130   emit_insn (gen_aarch64_load_tp_hard (target));
7131   return target;
7132 }
7133
7134 /* On AAPCS systems, this is the "struct __va_list".  */
7135 static GTY(()) tree va_list_type;
7136
7137 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7138    Return the type to use as __builtin_va_list.
7139
7140    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7141
7142    struct __va_list
7143    {
7144      void *__stack;
7145      void *__gr_top;
7146      void *__vr_top;
7147      int   __gr_offs;
7148      int   __vr_offs;
7149    };  */
7150
7151 static tree
7152 aarch64_build_builtin_va_list (void)
7153 {
7154   tree va_list_name;
7155   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7156
7157   /* Create the type.  */
7158   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7159   /* Give it the required name.  */
7160   va_list_name = build_decl (BUILTINS_LOCATION,
7161                              TYPE_DECL,
7162                              get_identifier ("__va_list"),
7163                              va_list_type);
7164   DECL_ARTIFICIAL (va_list_name) = 1;
7165   TYPE_NAME (va_list_type) = va_list_name;
7166   TYPE_STUB_DECL (va_list_type) = va_list_name;
7167
7168   /* Create the fields.  */
7169   f_stack = build_decl (BUILTINS_LOCATION,
7170                         FIELD_DECL, get_identifier ("__stack"),
7171                         ptr_type_node);
7172   f_grtop = build_decl (BUILTINS_LOCATION,
7173                         FIELD_DECL, get_identifier ("__gr_top"),
7174                         ptr_type_node);
7175   f_vrtop = build_decl (BUILTINS_LOCATION,
7176                         FIELD_DECL, get_identifier ("__vr_top"),
7177                         ptr_type_node);
7178   f_groff = build_decl (BUILTINS_LOCATION,
7179                         FIELD_DECL, get_identifier ("__gr_offs"),
7180                         integer_type_node);
7181   f_vroff = build_decl (BUILTINS_LOCATION,
7182                         FIELD_DECL, get_identifier ("__vr_offs"),
7183                         integer_type_node);
7184
7185   DECL_ARTIFICIAL (f_stack) = 1;
7186   DECL_ARTIFICIAL (f_grtop) = 1;
7187   DECL_ARTIFICIAL (f_vrtop) = 1;
7188   DECL_ARTIFICIAL (f_groff) = 1;
7189   DECL_ARTIFICIAL (f_vroff) = 1;
7190
7191   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7192   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7193   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7194   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7195   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7196
7197   TYPE_FIELDS (va_list_type) = f_stack;
7198   DECL_CHAIN (f_stack) = f_grtop;
7199   DECL_CHAIN (f_grtop) = f_vrtop;
7200   DECL_CHAIN (f_vrtop) = f_groff;
7201   DECL_CHAIN (f_groff) = f_vroff;
7202
7203   /* Compute its layout.  */
7204   layout_type (va_list_type);
7205
7206   return va_list_type;
7207 }
7208
7209 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
7210 static void
7211 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7212 {
7213   const CUMULATIVE_ARGS *cum;
7214   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7215   tree stack, grtop, vrtop, groff, vroff;
7216   tree t;
7217   int gr_save_area_size;
7218   int vr_save_area_size;
7219   int vr_offset;
7220
7221   cum = &crtl->args.info;
7222   gr_save_area_size
7223     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7224   vr_save_area_size
7225     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7226
7227   if (TARGET_GENERAL_REGS_ONLY)
7228     {
7229       if (cum->aapcs_nvrn > 0)
7230         sorry ("%qs and floating point or vector arguments",
7231                "-mgeneral-regs-only");
7232       vr_save_area_size = 0;
7233     }
7234
7235   f_stack = TYPE_FIELDS (va_list_type_node);
7236   f_grtop = DECL_CHAIN (f_stack);
7237   f_vrtop = DECL_CHAIN (f_grtop);
7238   f_groff = DECL_CHAIN (f_vrtop);
7239   f_vroff = DECL_CHAIN (f_groff);
7240
7241   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7242                   NULL_TREE);
7243   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7244                   NULL_TREE);
7245   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7246                   NULL_TREE);
7247   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7248                   NULL_TREE);
7249   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7250                   NULL_TREE);
7251
7252   /* Emit code to initialize STACK, which points to the next varargs stack
7253      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
7254      by named arguments.  STACK is 8-byte aligned.  */
7255   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7256   if (cum->aapcs_stack_size > 0)
7257     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7258   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7259   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7260
7261   /* Emit code to initialize GRTOP, the top of the GR save area.
7262      virtual_incoming_args_rtx should have been 16 byte aligned.  */
7263   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7264   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7265   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7266
7267   /* Emit code to initialize VRTOP, the top of the VR save area.
7268      This address is gr_save_area_bytes below GRTOP, rounded
7269      down to the next 16-byte boundary.  */
7270   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7271   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7272                              STACK_BOUNDARY / BITS_PER_UNIT);
7273
7274   if (vr_offset)
7275     t = fold_build_pointer_plus_hwi (t, -vr_offset);
7276   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7277   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7278
7279   /* Emit code to initialize GROFF, the offset from GRTOP of the
7280      next GPR argument.  */
7281   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7282               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7283   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7284
7285   /* Likewise emit code to initialize VROFF, the offset from FTOP
7286      of the next VR argument.  */
7287   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7288               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7289   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7290 }
7291
7292 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
7293
7294 static tree
7295 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7296                               gimple_seq *post_p ATTRIBUTE_UNUSED)
7297 {
7298   tree addr;
7299   bool indirect_p;
7300   bool is_ha;           /* is HFA or HVA.  */
7301   bool dw_align;        /* double-word align.  */
7302   machine_mode ag_mode = VOIDmode;
7303   int nregs;
7304   machine_mode mode;
7305
7306   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7307   tree stack, f_top, f_off, off, arg, roundup, on_stack;
7308   HOST_WIDE_INT size, rsize, adjust, align;
7309   tree t, u, cond1, cond2;
7310
7311   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7312   if (indirect_p)
7313     type = build_pointer_type (type);
7314
7315   mode = TYPE_MODE (type);
7316
7317   f_stack = TYPE_FIELDS (va_list_type_node);
7318   f_grtop = DECL_CHAIN (f_stack);
7319   f_vrtop = DECL_CHAIN (f_grtop);
7320   f_groff = DECL_CHAIN (f_vrtop);
7321   f_vroff = DECL_CHAIN (f_groff);
7322
7323   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7324                   f_stack, NULL_TREE);
7325   size = int_size_in_bytes (type);
7326   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7327
7328   dw_align = false;
7329   adjust = 0;
7330   if (aarch64_vfp_is_call_or_return_candidate (mode,
7331                                                type,
7332                                                &ag_mode,
7333                                                &nregs,
7334                                                &is_ha))
7335     {
7336       /* TYPE passed in fp/simd registers.  */
7337       if (TARGET_GENERAL_REGS_ONLY)
7338         sorry ("%qs and floating point or vector arguments",
7339                "-mgeneral-regs-only");
7340
7341       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7342                       unshare_expr (valist), f_vrtop, NULL_TREE);
7343       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7344                       unshare_expr (valist), f_vroff, NULL_TREE);
7345
7346       rsize = nregs * UNITS_PER_VREG;
7347
7348       if (is_ha)
7349         {
7350           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7351             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7352         }
7353       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7354                && size < UNITS_PER_VREG)
7355         {
7356           adjust = UNITS_PER_VREG - size;
7357         }
7358     }
7359   else
7360     {
7361       /* TYPE passed in general registers.  */
7362       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7363                       unshare_expr (valist), f_grtop, NULL_TREE);
7364       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7365                       unshare_expr (valist), f_groff, NULL_TREE);
7366       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7367       nregs = rsize / UNITS_PER_WORD;
7368
7369       if (align > 8)
7370         dw_align = true;
7371
7372       if (BLOCK_REG_PADDING (mode, type, 1) == downward
7373           && size < UNITS_PER_WORD)
7374         {
7375           adjust = UNITS_PER_WORD  - size;
7376         }
7377     }
7378
7379   /* Get a local temporary for the field value.  */
7380   off = get_initialized_tmp_var (f_off, pre_p, NULL);
7381
7382   /* Emit code to branch if off >= 0.  */
7383   t = build2 (GE_EXPR, boolean_type_node, off,
7384               build_int_cst (TREE_TYPE (off), 0));
7385   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7386
7387   if (dw_align)
7388     {
7389       /* Emit: offs = (offs + 15) & -16.  */
7390       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7391                   build_int_cst (TREE_TYPE (off), 15));
7392       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7393                   build_int_cst (TREE_TYPE (off), -16));
7394       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7395     }
7396   else
7397     roundup = NULL;
7398
7399   /* Update ap.__[g|v]r_offs  */
7400   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7401               build_int_cst (TREE_TYPE (off), rsize));
7402   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7403
7404   /* String up.  */
7405   if (roundup)
7406     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7407
7408   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
7409   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7410               build_int_cst (TREE_TYPE (f_off), 0));
7411   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7412
7413   /* String up: make sure the assignment happens before the use.  */
7414   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7415   COND_EXPR_ELSE (cond1) = t;
7416
7417   /* Prepare the trees handling the argument that is passed on the stack;
7418      the top level node will store in ON_STACK.  */
7419   arg = get_initialized_tmp_var (stack, pre_p, NULL);
7420   if (align > 8)
7421     {
7422       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
7423       t = fold_convert (intDI_type_node, arg);
7424       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7425                   build_int_cst (TREE_TYPE (t), 15));
7426       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7427                   build_int_cst (TREE_TYPE (t), -16));
7428       t = fold_convert (TREE_TYPE (arg), t);
7429       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7430     }
7431   else
7432     roundup = NULL;
7433   /* Advance ap.__stack  */
7434   t = fold_convert (intDI_type_node, arg);
7435   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7436               build_int_cst (TREE_TYPE (t), size + 7));
7437   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7438               build_int_cst (TREE_TYPE (t), -8));
7439   t = fold_convert (TREE_TYPE (arg), t);
7440   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7441   /* String up roundup and advance.  */
7442   if (roundup)
7443     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7444   /* String up with arg */
7445   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7446   /* Big-endianness related address adjustment.  */
7447   if (BLOCK_REG_PADDING (mode, type, 1) == downward
7448       && size < UNITS_PER_WORD)
7449   {
7450     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7451                 size_int (UNITS_PER_WORD - size));
7452     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7453   }
7454
7455   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7456   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7457
7458   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
7459   t = off;
7460   if (adjust)
7461     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7462                 build_int_cst (TREE_TYPE (off), adjust));
7463
7464   t = fold_convert (sizetype, t);
7465   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7466
7467   if (is_ha)
7468     {
7469       /* type ha; // treat as "struct {ftype field[n];}"
7470          ... [computing offs]
7471          for (i = 0; i <nregs; ++i, offs += 16)
7472            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7473          return ha;  */
7474       int i;
7475       tree tmp_ha, field_t, field_ptr_t;
7476
7477       /* Declare a local variable.  */
7478       tmp_ha = create_tmp_var_raw (type, "ha");
7479       gimple_add_tmp_var (tmp_ha);
7480
7481       /* Establish the base type.  */
7482       switch (ag_mode)
7483         {
7484         case SFmode:
7485           field_t = float_type_node;
7486           field_ptr_t = float_ptr_type_node;
7487           break;
7488         case DFmode:
7489           field_t = double_type_node;
7490           field_ptr_t = double_ptr_type_node;
7491           break;
7492         case TFmode:
7493           field_t = long_double_type_node;
7494           field_ptr_t = long_double_ptr_type_node;
7495           break;
7496 /* The half precision and quad precision are not fully supported yet.  Enable
7497    the following code after the support is complete.  Need to find the correct
7498    type node for __fp16 *.  */
7499 #if 0
7500         case HFmode:
7501           field_t = float_type_node;
7502           field_ptr_t = float_ptr_type_node;
7503           break;
7504 #endif
7505         case V2SImode:
7506         case V4SImode:
7507             {
7508               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7509               field_t = build_vector_type_for_mode (innertype, ag_mode);
7510               field_ptr_t = build_pointer_type (field_t);
7511             }
7512           break;
7513         default:
7514           gcc_assert (0);
7515         }
7516
7517       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
7518       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7519       addr = t;
7520       t = fold_convert (field_ptr_t, addr);
7521       t = build2 (MODIFY_EXPR, field_t,
7522                   build1 (INDIRECT_REF, field_t, tmp_ha),
7523                   build1 (INDIRECT_REF, field_t, t));
7524
7525       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
7526       for (i = 1; i < nregs; ++i)
7527         {
7528           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7529           u = fold_convert (field_ptr_t, addr);
7530           u = build2 (MODIFY_EXPR, field_t,
7531                       build2 (MEM_REF, field_t, tmp_ha,
7532                               build_int_cst (field_ptr_t,
7533                                              (i *
7534                                               int_size_in_bytes (field_t)))),
7535                       build1 (INDIRECT_REF, field_t, u));
7536           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7537         }
7538
7539       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7540       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7541     }
7542
7543   COND_EXPR_ELSE (cond2) = t;
7544   addr = fold_convert (build_pointer_type (type), cond1);
7545   addr = build_va_arg_indirect_ref (addr);
7546
7547   if (indirect_p)
7548     addr = build_va_arg_indirect_ref (addr);
7549
7550   return addr;
7551 }
7552
7553 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
7554
7555 static void
7556 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7557                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7558                                 int no_rtl)
7559 {
7560   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7561   CUMULATIVE_ARGS local_cum;
7562   int gr_saved, vr_saved;
7563
7564   /* The caller has advanced CUM up to, but not beyond, the last named
7565      argument.  Advance a local copy of CUM past the last "real" named
7566      argument, to find out how many registers are left over.  */
7567   local_cum = *cum;
7568   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7569
7570   /* Found out how many registers we need to save.  */
7571   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7572   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7573
7574   if (TARGET_GENERAL_REGS_ONLY)
7575     {
7576       if (local_cum.aapcs_nvrn > 0)
7577         sorry ("%qs and floating point or vector arguments",
7578                "-mgeneral-regs-only");
7579       vr_saved = 0;
7580     }
7581
7582   if (!no_rtl)
7583     {
7584       if (gr_saved > 0)
7585         {
7586           rtx ptr, mem;
7587
7588           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
7589           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7590                                - gr_saved * UNITS_PER_WORD);
7591           mem = gen_frame_mem (BLKmode, ptr);
7592           set_mem_alias_set (mem, get_varargs_alias_set ());
7593
7594           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7595                                mem, gr_saved);
7596         }
7597       if (vr_saved > 0)
7598         {
7599           /* We can't use move_block_from_reg, because it will use
7600              the wrong mode, storing D regs only.  */
7601           machine_mode mode = TImode;
7602           int off, i;
7603
7604           /* Set OFF to the offset from virtual_incoming_args_rtx of
7605              the first vector register.  The VR save area lies below
7606              the GR one, and is aligned to 16 bytes.  */
7607           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7608                                    STACK_BOUNDARY / BITS_PER_UNIT);
7609           off -= vr_saved * UNITS_PER_VREG;
7610
7611           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7612             {
7613               rtx ptr, mem;
7614
7615               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7616               mem = gen_frame_mem (mode, ptr);
7617               set_mem_alias_set (mem, get_varargs_alias_set ());
7618               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7619               off += UNITS_PER_VREG;
7620             }
7621         }
7622     }
7623
7624   /* We don't save the size into *PRETEND_SIZE because we want to avoid
7625      any complication of having crtl->args.pretend_args_size changed.  */
7626   cfun->machine->frame.saved_varargs_size
7627     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7628                       STACK_BOUNDARY / BITS_PER_UNIT)
7629        + vr_saved * UNITS_PER_VREG);
7630 }
7631
7632 static void
7633 aarch64_conditional_register_usage (void)
7634 {
7635   int i;
7636   if (!TARGET_FLOAT)
7637     {
7638       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7639         {
7640           fixed_regs[i] = 1;
7641           call_used_regs[i] = 1;
7642         }
7643     }
7644 }
7645
7646 /* Walk down the type tree of TYPE counting consecutive base elements.
7647    If *MODEP is VOIDmode, then set it to the first valid floating point
7648    type.  If a non-floating point type is found, or if a floating point
7649    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7650    otherwise return the count in the sub-tree.  */
7651 static int
7652 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7653 {
7654   machine_mode mode;
7655   HOST_WIDE_INT size;
7656
7657   switch (TREE_CODE (type))
7658     {
7659     case REAL_TYPE:
7660       mode = TYPE_MODE (type);
7661       if (mode != DFmode && mode != SFmode && mode != TFmode)
7662         return -1;
7663
7664       if (*modep == VOIDmode)
7665         *modep = mode;
7666
7667       if (*modep == mode)
7668         return 1;
7669
7670       break;
7671
7672     case COMPLEX_TYPE:
7673       mode = TYPE_MODE (TREE_TYPE (type));
7674       if (mode != DFmode && mode != SFmode && mode != TFmode)
7675         return -1;
7676
7677       if (*modep == VOIDmode)
7678         *modep = mode;
7679
7680       if (*modep == mode)
7681         return 2;
7682
7683       break;
7684
7685     case VECTOR_TYPE:
7686       /* Use V2SImode and V4SImode as representatives of all 64-bit
7687          and 128-bit vector types.  */
7688       size = int_size_in_bytes (type);
7689       switch (size)
7690         {
7691         case 8:
7692           mode = V2SImode;
7693           break;
7694         case 16:
7695           mode = V4SImode;
7696           break;
7697         default:
7698           return -1;
7699         }
7700
7701       if (*modep == VOIDmode)
7702         *modep = mode;
7703
7704       /* Vector modes are considered to be opaque: two vectors are
7705          equivalent for the purposes of being homogeneous aggregates
7706          if they are the same size.  */
7707       if (*modep == mode)
7708         return 1;
7709
7710       break;
7711
7712     case ARRAY_TYPE:
7713       {
7714         int count;
7715         tree index = TYPE_DOMAIN (type);
7716
7717         /* Can't handle incomplete types nor sizes that are not
7718            fixed.  */
7719         if (!COMPLETE_TYPE_P (type)
7720             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7721           return -1;
7722
7723         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7724         if (count == -1
7725             || !index
7726             || !TYPE_MAX_VALUE (index)
7727             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7728             || !TYPE_MIN_VALUE (index)
7729             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7730             || count < 0)
7731           return -1;
7732
7733         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7734                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7735
7736         /* There must be no padding.  */
7737         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7738           return -1;
7739
7740         return count;
7741       }
7742
7743     case RECORD_TYPE:
7744       {
7745         int count = 0;
7746         int sub_count;
7747         tree field;
7748
7749         /* Can't handle incomplete types nor sizes that are not
7750            fixed.  */
7751         if (!COMPLETE_TYPE_P (type)
7752             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7753           return -1;
7754
7755         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7756           {
7757             if (TREE_CODE (field) != FIELD_DECL)
7758               continue;
7759
7760             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7761             if (sub_count < 0)
7762               return -1;
7763             count += sub_count;
7764           }
7765
7766         /* There must be no padding.  */
7767         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7768           return -1;
7769
7770         return count;
7771       }
7772
7773     case UNION_TYPE:
7774     case QUAL_UNION_TYPE:
7775       {
7776         /* These aren't very interesting except in a degenerate case.  */
7777         int count = 0;
7778         int sub_count;
7779         tree field;
7780
7781         /* Can't handle incomplete types nor sizes that are not
7782            fixed.  */
7783         if (!COMPLETE_TYPE_P (type)
7784             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7785           return -1;
7786
7787         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7788           {
7789             if (TREE_CODE (field) != FIELD_DECL)
7790               continue;
7791
7792             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7793             if (sub_count < 0)
7794               return -1;
7795             count = count > sub_count ? count : sub_count;
7796           }
7797
7798         /* There must be no padding.  */
7799         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7800           return -1;
7801
7802         return count;
7803       }
7804
7805     default:
7806       break;
7807     }
7808
7809   return -1;
7810 }
7811
7812 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7813    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7814    array types.  The C99 floating-point complex types are also considered
7815    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7816    types, which are GCC extensions and out of the scope of AAPCS64, are
7817    treated as composite types here as well.
7818
7819    Note that MODE itself is not sufficient in determining whether a type
7820    is such a composite type or not.  This is because
7821    stor-layout.c:compute_record_mode may have already changed the MODE
7822    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7823    structure with only one field may have its MODE set to the mode of the
7824    field.  Also an integer mode whose size matches the size of the
7825    RECORD_TYPE type may be used to substitute the original mode
7826    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7827    solely relied on.  */
7828
7829 static bool
7830 aarch64_composite_type_p (const_tree type,
7831                           machine_mode mode)
7832 {
7833   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7834     return true;
7835
7836   if (mode == BLKmode
7837       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7838       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7839     return true;
7840
7841   return false;
7842 }
7843
7844 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7845    type as described in AAPCS64 \S 4.1.2.
7846
7847    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7848
7849 static bool
7850 aarch64_short_vector_p (const_tree type,
7851                         machine_mode mode)
7852 {
7853   HOST_WIDE_INT size = -1;
7854
7855   if (type && TREE_CODE (type) == VECTOR_TYPE)
7856     size = int_size_in_bytes (type);
7857   else if (!aarch64_composite_type_p (type, mode)
7858            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7859                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7860     size = GET_MODE_SIZE (mode);
7861
7862   return (size == 8 || size == 16) ? true : false;
7863 }
7864
7865 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7866    shall be passed or returned in simd/fp register(s) (providing these
7867    parameter passing registers are available).
7868
7869    Upon successful return, *COUNT returns the number of needed registers,
7870    *BASE_MODE returns the mode of the individual register and when IS_HAF
7871    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7872    floating-point aggregate or a homogeneous short-vector aggregate.  */
7873
7874 static bool
7875 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7876                                          const_tree type,
7877                                          machine_mode *base_mode,
7878                                          int *count,
7879                                          bool *is_ha)
7880 {
7881   machine_mode new_mode = VOIDmode;
7882   bool composite_p = aarch64_composite_type_p (type, mode);
7883
7884   if (is_ha != NULL) *is_ha = false;
7885
7886   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7887       || aarch64_short_vector_p (type, mode))
7888     {
7889       *count = 1;
7890       new_mode = mode;
7891     }
7892   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7893     {
7894       if (is_ha != NULL) *is_ha = true;
7895       *count = 2;
7896       new_mode = GET_MODE_INNER (mode);
7897     }
7898   else if (type && composite_p)
7899     {
7900       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7901
7902       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7903         {
7904           if (is_ha != NULL) *is_ha = true;
7905           *count = ag_count;
7906         }
7907       else
7908         return false;
7909     }
7910   else
7911     return false;
7912
7913   *base_mode = new_mode;
7914   return true;
7915 }
7916
7917 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7918
7919 static rtx
7920 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7921                           int incoming ATTRIBUTE_UNUSED)
7922 {
7923   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7924 }
7925
7926 /* Implements target hook vector_mode_supported_p.  */
7927 static bool
7928 aarch64_vector_mode_supported_p (machine_mode mode)
7929 {
7930   if (TARGET_SIMD
7931       && (mode == V4SImode  || mode == V8HImode
7932           || mode == V16QImode || mode == V2DImode
7933           || mode == V2SImode  || mode == V4HImode
7934           || mode == V8QImode || mode == V2SFmode
7935           || mode == V4SFmode || mode == V2DFmode
7936           || mode == V1DFmode))
7937     return true;
7938
7939   return false;
7940 }
7941
7942 /* Return appropriate SIMD container
7943    for MODE within a vector of WIDTH bits.  */
7944 static machine_mode
7945 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7946 {
7947   gcc_assert (width == 64 || width == 128);
7948   if (TARGET_SIMD)
7949     {
7950       if (width == 128)
7951         switch (mode)
7952           {
7953           case DFmode:
7954             return V2DFmode;
7955           case SFmode:
7956             return V4SFmode;
7957           case SImode:
7958             return V4SImode;
7959           case HImode:
7960             return V8HImode;
7961           case QImode:
7962             return V16QImode;
7963           case DImode:
7964             return V2DImode;
7965           default:
7966             break;
7967           }
7968       else
7969         switch (mode)
7970           {
7971           case SFmode:
7972             return V2SFmode;
7973           case SImode:
7974             return V2SImode;
7975           case HImode:
7976             return V4HImode;
7977           case QImode:
7978             return V8QImode;
7979           default:
7980             break;
7981           }
7982     }
7983   return word_mode;
7984 }
7985
7986 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
7987 static machine_mode
7988 aarch64_preferred_simd_mode (machine_mode mode)
7989 {
7990   return aarch64_simd_container_mode (mode, 128);
7991 }
7992
7993 /* Return the bitmask of possible vector sizes for the vectorizer
7994    to iterate over.  */
7995 static unsigned int
7996 aarch64_autovectorize_vector_sizes (void)
7997 {
7998   return (16 | 8);
7999 }
8000
8001 /* Implement TARGET_MANGLE_TYPE.  */
8002
8003 static const char *
8004 aarch64_mangle_type (const_tree type)
8005 {
8006   /* The AArch64 ABI documents say that "__va_list" has to be
8007      managled as if it is in the "std" namespace.  */
8008   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8009     return "St9__va_list";
8010
8011   /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
8012      builtin types.  */
8013   if (TYPE_NAME (type) != NULL)
8014     return aarch64_mangle_builtin_type (type);
8015
8016   /* Use the default mangling.  */
8017   return NULL;
8018 }
8019
8020
8021 /* Return true if the rtx_insn contains a MEM RTX somewhere
8022    in it.  */
8023
8024 static bool
8025 has_memory_op (rtx_insn *mem_insn)
8026 {
8027   subrtx_iterator::array_type array;
8028   FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8029     if (MEM_P (*iter))
8030       return true;
8031
8032   return false;
8033 }
8034
8035 /* Find the first rtx_insn before insn that will generate an assembly
8036    instruction.  */
8037
8038 static rtx_insn *
8039 aarch64_prev_real_insn (rtx_insn *insn)
8040 {
8041   if (!insn)
8042     return NULL;
8043
8044   do
8045     {
8046       insn = prev_real_insn (insn);
8047     }
8048   while (insn && recog_memoized (insn) < 0);
8049
8050   return insn;
8051 }
8052
8053 static bool
8054 is_madd_op (enum attr_type t1)
8055 {
8056   unsigned int i;
8057   /* A number of these may be AArch32 only.  */
8058   enum attr_type mlatypes[] = {
8059     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8060     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8061     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8062   };
8063
8064   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8065     {
8066       if (t1 == mlatypes[i])
8067         return true;
8068     }
8069
8070   return false;
8071 }
8072
8073 /* Check if there is a register dependency between a load and the insn
8074    for which we hold recog_data.  */
8075
8076 static bool
8077 dep_between_memop_and_curr (rtx memop)
8078 {
8079   rtx load_reg;
8080   int opno;
8081
8082   gcc_assert (GET_CODE (memop) == SET);
8083
8084   if (!REG_P (SET_DEST (memop)))
8085     return false;
8086
8087   load_reg = SET_DEST (memop);
8088   for (opno = 1; opno < recog_data.n_operands; opno++)
8089     {
8090       rtx operand = recog_data.operand[opno];
8091       if (REG_P (operand)
8092           && reg_overlap_mentioned_p (load_reg, operand))
8093         return true;
8094
8095     }
8096   return false;
8097 }
8098
8099
8100 /* When working around the Cortex-A53 erratum 835769,
8101    given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8102    instruction and has a preceding memory instruction such that a NOP
8103    should be inserted between them.  */
8104
8105 bool
8106 aarch64_madd_needs_nop (rtx_insn* insn)
8107 {
8108   enum attr_type attr_type;
8109   rtx_insn *prev;
8110   rtx body;
8111
8112   if (!aarch64_fix_a53_err835769)
8113     return false;
8114
8115   if (recog_memoized (insn) < 0)
8116     return false;
8117
8118   attr_type = get_attr_type (insn);
8119   if (!is_madd_op (attr_type))
8120     return false;
8121
8122   prev = aarch64_prev_real_insn (insn);
8123   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8124      Restore recog state to INSN to avoid state corruption.  */
8125   extract_constrain_insn_cached (insn);
8126
8127   if (!prev || !has_memory_op (prev))
8128     return false;
8129
8130   body = single_set (prev);
8131
8132   /* If the previous insn is a memory op and there is no dependency between
8133      it and the DImode madd, emit a NOP between them.  If body is NULL then we
8134      have a complex memory operation, probably a load/store pair.
8135      Be conservative for now and emit a NOP.  */
8136   if (GET_MODE (recog_data.operand[0]) == DImode
8137       && (!body || !dep_between_memop_and_curr (body)))
8138     return true;
8139
8140   return false;
8141
8142 }
8143
8144
8145 /* Implement FINAL_PRESCAN_INSN.  */
8146
8147 void
8148 aarch64_final_prescan_insn (rtx_insn *insn)
8149 {
8150   if (aarch64_madd_needs_nop (insn))
8151     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8152 }
8153
8154
8155 /* Return the equivalent letter for size.  */
8156 static char
8157 sizetochar (int size)
8158 {
8159   switch (size)
8160     {
8161     case 64: return 'd';
8162     case 32: return 's';
8163     case 16: return 'h';
8164     case 8 : return 'b';
8165     default: gcc_unreachable ();
8166     }
8167 }
8168
8169 /* Return true iff x is a uniform vector of floating-point
8170    constants, and the constant can be represented in
8171    quarter-precision form.  Note, as aarch64_float_const_representable
8172    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
8173 static bool
8174 aarch64_vect_float_const_representable_p (rtx x)
8175 {
8176   int i = 0;
8177   REAL_VALUE_TYPE r0, ri;
8178   rtx x0, xi;
8179
8180   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8181     return false;
8182
8183   x0 = CONST_VECTOR_ELT (x, 0);
8184   if (!CONST_DOUBLE_P (x0))
8185     return false;
8186
8187   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8188
8189   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8190     {
8191       xi = CONST_VECTOR_ELT (x, i);
8192       if (!CONST_DOUBLE_P (xi))
8193         return false;
8194
8195       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8196       if (!REAL_VALUES_EQUAL (r0, ri))
8197         return false;
8198     }
8199
8200   return aarch64_float_const_representable_p (x0);
8201 }
8202
8203 /* Return true for valid and false for invalid.  */
8204 bool
8205 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
8206                               struct simd_immediate_info *info)
8207 {
8208 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
8209   matches = 1;                                          \
8210   for (i = 0; i < idx; i += (STRIDE))                   \
8211     if (!(TEST))                                        \
8212       matches = 0;                                      \
8213   if (matches)                                          \
8214     {                                                   \
8215       immtype = (CLASS);                                \
8216       elsize = (ELSIZE);                                \
8217       eshift = (SHIFT);                                 \
8218       emvn = (NEG);                                     \
8219       break;                                            \
8220     }
8221
8222   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8223   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8224   unsigned char bytes[16];
8225   int immtype = -1, matches;
8226   unsigned int invmask = inverse ? 0xff : 0;
8227   int eshift, emvn;
8228
8229   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8230     {
8231       if (! (aarch64_simd_imm_zero_p (op, mode)
8232              || aarch64_vect_float_const_representable_p (op)))
8233         return false;
8234
8235       if (info)
8236         {
8237           info->value = CONST_VECTOR_ELT (op, 0);
8238           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
8239           info->mvn = false;
8240           info->shift = 0;
8241         }
8242
8243       return true;
8244     }
8245
8246   /* Splat vector constant out into a byte vector.  */
8247   for (i = 0; i < n_elts; i++)
8248     {
8249       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
8250          it must be laid out in the vector register in reverse order.  */
8251       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
8252       unsigned HOST_WIDE_INT elpart;
8253       unsigned int part, parts;
8254
8255       if (CONST_INT_P (el))
8256         {
8257           elpart = INTVAL (el);
8258           parts = 1;
8259         }
8260       else if (GET_CODE (el) == CONST_DOUBLE)
8261         {
8262           elpart = CONST_DOUBLE_LOW (el);
8263           parts = 2;
8264         }
8265       else
8266         gcc_unreachable ();
8267
8268       for (part = 0; part < parts; part++)
8269         {
8270           unsigned int byte;
8271           for (byte = 0; byte < innersize; byte++)
8272             {
8273               bytes[idx++] = (elpart & 0xff) ^ invmask;
8274               elpart >>= BITS_PER_UNIT;
8275             }
8276           if (GET_CODE (el) == CONST_DOUBLE)
8277             elpart = CONST_DOUBLE_HIGH (el);
8278         }
8279     }
8280
8281   /* Sanity check.  */
8282   gcc_assert (idx == GET_MODE_SIZE (mode));
8283
8284   do
8285     {
8286       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8287              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8288
8289       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8290              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8291
8292       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8293              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8294
8295       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8296              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8297
8298       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8299
8300       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8301
8302       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8303              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8304
8305       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8306              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8307
8308       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8309              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8310
8311       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8312              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8313
8314       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8315
8316       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8317
8318       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8319              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8320
8321       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8322              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8323
8324       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8325              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8326
8327       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8328              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8329
8330       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8331
8332       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8333              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8334     }
8335   while (0);
8336
8337   if (immtype == -1)
8338     return false;
8339
8340   if (info)
8341     {
8342       info->element_width = elsize;
8343       info->mvn = emvn != 0;
8344       info->shift = eshift;
8345
8346       unsigned HOST_WIDE_INT imm = 0;
8347
8348       if (immtype >= 12 && immtype <= 15)
8349         info->msl = true;
8350
8351       /* Un-invert bytes of recognized vector, if necessary.  */
8352       if (invmask != 0)
8353         for (i = 0; i < idx; i++)
8354           bytes[i] ^= invmask;
8355
8356       if (immtype == 17)
8357         {
8358           /* FIXME: Broken on 32-bit H_W_I hosts.  */
8359           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8360
8361           for (i = 0; i < 8; i++)
8362             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8363               << (i * BITS_PER_UNIT);
8364
8365
8366           info->value = GEN_INT (imm);
8367         }
8368       else
8369         {
8370           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8371             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8372
8373           /* Construct 'abcdefgh' because the assembler cannot handle
8374              generic constants.  */
8375           if (info->mvn)
8376             imm = ~imm;
8377           imm = (imm >> info->shift) & 0xff;
8378           info->value = GEN_INT (imm);
8379         }
8380     }
8381
8382   return true;
8383 #undef CHECK
8384 }
8385
8386 /* Check of immediate shift constants are within range.  */
8387 bool
8388 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8389 {
8390   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8391   if (left)
8392     return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8393   else
8394     return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8395 }
8396
8397 /* Return true if X is a uniform vector where all elements
8398    are either the floating-point constant 0.0 or the
8399    integer constant 0.  */
8400 bool
8401 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8402 {
8403   return x == CONST0_RTX (mode);
8404 }
8405
8406 bool
8407 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8408 {
8409   HOST_WIDE_INT imm = INTVAL (x);
8410   int i;
8411
8412   for (i = 0; i < 8; i++)
8413     {
8414       unsigned int byte = imm & 0xff;
8415       if (byte != 0xff && byte != 0)
8416        return false;
8417       imm >>= 8;
8418     }
8419
8420   return true;
8421 }
8422
8423 bool
8424 aarch64_mov_operand_p (rtx x,
8425                        enum aarch64_symbol_context context,
8426                        machine_mode mode)
8427 {
8428   if (GET_CODE (x) == HIGH
8429       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8430     return true;
8431
8432   if (CONST_INT_P (x))
8433     return true;
8434
8435   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8436     return true;
8437
8438   return aarch64_classify_symbolic_expression (x, context)
8439     == SYMBOL_TINY_ABSOLUTE;
8440 }
8441
8442 /* Return a const_int vector of VAL.  */
8443 rtx
8444 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8445 {
8446   int nunits = GET_MODE_NUNITS (mode);
8447   rtvec v = rtvec_alloc (nunits);
8448   int i;
8449
8450   for (i=0; i < nunits; i++)
8451     RTVEC_ELT (v, i) = GEN_INT (val);
8452
8453   return gen_rtx_CONST_VECTOR (mode, v);
8454 }
8455
8456 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
8457
8458 bool
8459 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8460 {
8461   machine_mode vmode;
8462
8463   gcc_assert (!VECTOR_MODE_P (mode));
8464   vmode = aarch64_preferred_simd_mode (mode);
8465   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8466   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8467 }
8468
8469 /* Construct and return a PARALLEL RTX vector with elements numbering the
8470    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8471    the vector - from the perspective of the architecture.  This does not
8472    line up with GCC's perspective on lane numbers, so we end up with
8473    different masks depending on our target endian-ness.  The diagram
8474    below may help.  We must draw the distinction when building masks
8475    which select one half of the vector.  An instruction selecting
8476    architectural low-lanes for a big-endian target, must be described using
8477    a mask selecting GCC high-lanes.
8478
8479                  Big-Endian             Little-Endian
8480
8481 GCC             0   1   2   3           3   2   1   0
8482               | x | x | x | x |       | x | x | x | x |
8483 Architecture    3   2   1   0           3   2   1   0
8484
8485 Low Mask:         { 2, 3 }                { 0, 1 }
8486 High Mask:        { 0, 1 }                { 2, 3 }
8487 */
8488
8489 rtx
8490 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8491 {
8492   int nunits = GET_MODE_NUNITS (mode);
8493   rtvec v = rtvec_alloc (nunits / 2);
8494   int high_base = nunits / 2;
8495   int low_base = 0;
8496   int base;
8497   rtx t1;
8498   int i;
8499
8500   if (BYTES_BIG_ENDIAN)
8501     base = high ? low_base : high_base;
8502   else
8503     base = high ? high_base : low_base;
8504
8505   for (i = 0; i < nunits / 2; i++)
8506     RTVEC_ELT (v, i) = GEN_INT (base + i);
8507
8508   t1 = gen_rtx_PARALLEL (mode, v);
8509   return t1;
8510 }
8511
8512 /* Check OP for validity as a PARALLEL RTX vector with elements
8513    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8514    from the perspective of the architecture.  See the diagram above
8515    aarch64_simd_vect_par_cnst_half for more details.  */
8516
8517 bool
8518 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8519                                        bool high)
8520 {
8521   rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8522   HOST_WIDE_INT count_op = XVECLEN (op, 0);
8523   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8524   int i = 0;
8525
8526   if (!VECTOR_MODE_P (mode))
8527     return false;
8528
8529   if (count_op != count_ideal)
8530     return false;
8531
8532   for (i = 0; i < count_ideal; i++)
8533     {
8534       rtx elt_op = XVECEXP (op, 0, i);
8535       rtx elt_ideal = XVECEXP (ideal, 0, i);
8536
8537       if (!CONST_INT_P (elt_op)
8538           || INTVAL (elt_ideal) != INTVAL (elt_op))
8539         return false;
8540     }
8541   return true;
8542 }
8543
8544 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
8545    HIGH (exclusive).  */
8546 void
8547 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8548                           const_tree exp)
8549 {
8550   HOST_WIDE_INT lane;
8551   gcc_assert (CONST_INT_P (operand));
8552   lane = INTVAL (operand);
8553
8554   if (lane < low || lane >= high)
8555   {
8556     if (exp)
8557       error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8558     else
8559       error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8560   }
8561 }
8562
8563 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8564    registers).  */
8565 void
8566 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8567                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8568                             rtx op1)
8569 {
8570   rtx mem = gen_rtx_MEM (mode, destaddr);
8571   rtx tmp1 = gen_reg_rtx (mode);
8572   rtx tmp2 = gen_reg_rtx (mode);
8573
8574   emit_insn (intfn (tmp1, op1, tmp2));
8575
8576   emit_move_insn (mem, tmp1);
8577   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8578   emit_move_insn (mem, tmp2);
8579 }
8580
8581 /* Return TRUE if OP is a valid vector addressing mode.  */
8582 bool
8583 aarch64_simd_mem_operand_p (rtx op)
8584 {
8585   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8586                         || REG_P (XEXP (op, 0)));
8587 }
8588
8589 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8590    not to early-clobber SRC registers in the process.
8591
8592    We assume that the operands described by SRC and DEST represent a
8593    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
8594    number of components into which the copy has been decomposed.  */
8595 void
8596 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8597                                 rtx *src, unsigned int count)
8598 {
8599   unsigned int i;
8600
8601   if (!reg_overlap_mentioned_p (operands[0], operands[1])
8602       || REGNO (operands[0]) < REGNO (operands[1]))
8603     {
8604       for (i = 0; i < count; i++)
8605         {
8606           operands[2 * i] = dest[i];
8607           operands[2 * i + 1] = src[i];
8608         }
8609     }
8610   else
8611     {
8612       for (i = 0; i < count; i++)
8613         {
8614           operands[2 * i] = dest[count - i - 1];
8615           operands[2 * i + 1] = src[count - i - 1];
8616         }
8617     }
8618 }
8619
8620 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8621    one of VSTRUCT modes: OI, CI or XI.  */
8622 int
8623 aarch64_simd_attr_length_move (rtx_insn *insn)
8624 {
8625   machine_mode mode;
8626
8627   extract_insn_cached (insn);
8628
8629   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8630     {
8631       mode = GET_MODE (recog_data.operand[0]);
8632       switch (mode)
8633         {
8634         case OImode:
8635           return 8;
8636         case CImode:
8637           return 12;
8638         case XImode:
8639           return 16;
8640         default:
8641           gcc_unreachable ();
8642         }
8643     }
8644   return 4;
8645 }
8646
8647 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
8648    alignment of a vector to 128 bits.  */
8649 static HOST_WIDE_INT
8650 aarch64_simd_vector_alignment (const_tree type)
8651 {
8652   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8653   return MIN (align, 128);
8654 }
8655
8656 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
8657 static bool
8658 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8659 {
8660   if (is_packed)
8661     return false;
8662
8663   /* We guarantee alignment for vectors up to 128-bits.  */
8664   if (tree_int_cst_compare (TYPE_SIZE (type),
8665                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8666     return false;
8667
8668   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
8669   return true;
8670 }
8671
8672 /* If VALS is a vector constant that can be loaded into a register
8673    using DUP, generate instructions to do so and return an RTX to
8674    assign to the register.  Otherwise return NULL_RTX.  */
8675 static rtx
8676 aarch64_simd_dup_constant (rtx vals)
8677 {
8678   machine_mode mode = GET_MODE (vals);
8679   machine_mode inner_mode = GET_MODE_INNER (mode);
8680   int n_elts = GET_MODE_NUNITS (mode);
8681   bool all_same = true;
8682   rtx x;
8683   int i;
8684
8685   if (GET_CODE (vals) != CONST_VECTOR)
8686     return NULL_RTX;
8687
8688   for (i = 1; i < n_elts; ++i)
8689     {
8690       x = CONST_VECTOR_ELT (vals, i);
8691       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8692         all_same = false;
8693     }
8694
8695   if (!all_same)
8696     return NULL_RTX;
8697
8698   /* We can load this constant by using DUP and a constant in a
8699      single ARM register.  This will be cheaper than a vector
8700      load.  */
8701   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8702   return gen_rtx_VEC_DUPLICATE (mode, x);
8703 }
8704
8705
8706 /* Generate code to load VALS, which is a PARALLEL containing only
8707    constants (for vec_init) or CONST_VECTOR, efficiently into a
8708    register.  Returns an RTX to copy into the register, or NULL_RTX
8709    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
8710 static rtx
8711 aarch64_simd_make_constant (rtx vals)
8712 {
8713   machine_mode mode = GET_MODE (vals);
8714   rtx const_dup;
8715   rtx const_vec = NULL_RTX;
8716   int n_elts = GET_MODE_NUNITS (mode);
8717   int n_const = 0;
8718   int i;
8719
8720   if (GET_CODE (vals) == CONST_VECTOR)
8721     const_vec = vals;
8722   else if (GET_CODE (vals) == PARALLEL)
8723     {
8724       /* A CONST_VECTOR must contain only CONST_INTs and
8725          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8726          Only store valid constants in a CONST_VECTOR.  */
8727       for (i = 0; i < n_elts; ++i)
8728         {
8729           rtx x = XVECEXP (vals, 0, i);
8730           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8731             n_const++;
8732         }
8733       if (n_const == n_elts)
8734         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8735     }
8736   else
8737     gcc_unreachable ();
8738
8739   if (const_vec != NULL_RTX
8740       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8741     /* Load using MOVI/MVNI.  */
8742     return const_vec;
8743   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8744     /* Loaded using DUP.  */
8745     return const_dup;
8746   else if (const_vec != NULL_RTX)
8747     /* Load from constant pool. We can not take advantage of single-cycle
8748        LD1 because we need a PC-relative addressing mode.  */
8749     return const_vec;
8750   else
8751     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8752        We can not construct an initializer.  */
8753     return NULL_RTX;
8754 }
8755
8756 void
8757 aarch64_expand_vector_init (rtx target, rtx vals)
8758 {
8759   machine_mode mode = GET_MODE (target);
8760   machine_mode inner_mode = GET_MODE_INNER (mode);
8761   int n_elts = GET_MODE_NUNITS (mode);
8762   int n_var = 0, one_var = -1;
8763   bool all_same = true;
8764   rtx x, mem;
8765   int i;
8766
8767   x = XVECEXP (vals, 0, 0);
8768   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8769     n_var = 1, one_var = 0;
8770
8771   for (i = 1; i < n_elts; ++i)
8772     {
8773       x = XVECEXP (vals, 0, i);
8774       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8775         ++n_var, one_var = i;
8776
8777       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8778         all_same = false;
8779     }
8780
8781   if (n_var == 0)
8782     {
8783       rtx constant = aarch64_simd_make_constant (vals);
8784       if (constant != NULL_RTX)
8785         {
8786           emit_move_insn (target, constant);
8787           return;
8788         }
8789     }
8790
8791   /* Splat a single non-constant element if we can.  */
8792   if (all_same)
8793     {
8794       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8795       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8796       return;
8797     }
8798
8799   /* One field is non-constant.  Load constant then overwrite varying
8800      field.  This is more efficient than using the stack.  */
8801   if (n_var == 1)
8802     {
8803       rtx copy = copy_rtx (vals);
8804       rtx index = GEN_INT (one_var);
8805       enum insn_code icode;
8806
8807       /* Load constant part of vector, substitute neighboring value for
8808          varying element.  */
8809       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8810       aarch64_expand_vector_init (target, copy);
8811
8812       /* Insert variable.  */
8813       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8814       icode = optab_handler (vec_set_optab, mode);
8815       gcc_assert (icode != CODE_FOR_nothing);
8816       emit_insn (GEN_FCN (icode) (target, x, index));
8817       return;
8818     }
8819
8820   /* Construct the vector in memory one field at a time
8821      and load the whole vector.  */
8822   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8823   for (i = 0; i < n_elts; i++)
8824     emit_move_insn (adjust_address_nv (mem, inner_mode,
8825                                     i * GET_MODE_SIZE (inner_mode)),
8826                     XVECEXP (vals, 0, i));
8827   emit_move_insn (target, mem);
8828
8829 }
8830
8831 static unsigned HOST_WIDE_INT
8832 aarch64_shift_truncation_mask (machine_mode mode)
8833 {
8834   return
8835     (aarch64_vector_mode_supported_p (mode)
8836      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8837 }
8838
8839 #ifndef TLS_SECTION_ASM_FLAG
8840 #define TLS_SECTION_ASM_FLAG 'T'
8841 #endif
8842
8843 void
8844 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8845                                tree decl ATTRIBUTE_UNUSED)
8846 {
8847   char flagchars[10], *f = flagchars;
8848
8849   /* If we have already declared this section, we can use an
8850      abbreviated form to switch back to it -- unless this section is
8851      part of a COMDAT groups, in which case GAS requires the full
8852      declaration every time.  */
8853   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8854       && (flags & SECTION_DECLARED))
8855     {
8856       fprintf (asm_out_file, "\t.section\t%s\n", name);
8857       return;
8858     }
8859
8860   if (!(flags & SECTION_DEBUG))
8861     *f++ = 'a';
8862   if (flags & SECTION_WRITE)
8863     *f++ = 'w';
8864   if (flags & SECTION_CODE)
8865     *f++ = 'x';
8866   if (flags & SECTION_SMALL)
8867     *f++ = 's';
8868   if (flags & SECTION_MERGE)
8869     *f++ = 'M';
8870   if (flags & SECTION_STRINGS)
8871     *f++ = 'S';
8872   if (flags & SECTION_TLS)
8873     *f++ = TLS_SECTION_ASM_FLAG;
8874   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8875     *f++ = 'G';
8876   *f = '\0';
8877
8878   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8879
8880   if (!(flags & SECTION_NOTYPE))
8881     {
8882       const char *type;
8883       const char *format;
8884
8885       if (flags & SECTION_BSS)
8886         type = "nobits";
8887       else
8888         type = "progbits";
8889
8890 #ifdef TYPE_OPERAND_FMT
8891       format = "," TYPE_OPERAND_FMT;
8892 #else
8893       format = ",@%s";
8894 #endif
8895
8896       fprintf (asm_out_file, format, type);
8897
8898       if (flags & SECTION_ENTSIZE)
8899         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8900       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8901         {
8902           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8903             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8904           else
8905             fprintf (asm_out_file, ",%s,comdat",
8906                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8907         }
8908     }
8909
8910   putc ('\n', asm_out_file);
8911 }
8912
8913 /* Select a format to encode pointers in exception handling data.  */
8914 int
8915 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8916 {
8917    int type;
8918    switch (aarch64_cmodel)
8919      {
8920      case AARCH64_CMODEL_TINY:
8921      case AARCH64_CMODEL_TINY_PIC:
8922      case AARCH64_CMODEL_SMALL:
8923      case AARCH64_CMODEL_SMALL_PIC:
8924        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8925           for everything.  */
8926        type = DW_EH_PE_sdata4;
8927        break;
8928      default:
8929        /* No assumptions here.  8-byte relocs required.  */
8930        type = DW_EH_PE_sdata8;
8931        break;
8932      }
8933    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8934 }
8935
8936 /* Emit load exclusive.  */
8937
8938 static void
8939 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8940                              rtx mem, rtx model_rtx)
8941 {
8942   rtx (*gen) (rtx, rtx, rtx);
8943
8944   switch (mode)
8945     {
8946     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8947     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8948     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8949     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8950     default:
8951       gcc_unreachable ();
8952     }
8953
8954   emit_insn (gen (rval, mem, model_rtx));
8955 }
8956
8957 /* Emit store exclusive.  */
8958
8959 static void
8960 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
8961                               rtx rval, rtx mem, rtx model_rtx)
8962 {
8963   rtx (*gen) (rtx, rtx, rtx, rtx);
8964
8965   switch (mode)
8966     {
8967     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8968     case HImode: gen = gen_aarch64_store_exclusivehi; break;
8969     case SImode: gen = gen_aarch64_store_exclusivesi; break;
8970     case DImode: gen = gen_aarch64_store_exclusivedi; break;
8971     default:
8972       gcc_unreachable ();
8973     }
8974
8975   emit_insn (gen (bval, rval, mem, model_rtx));
8976 }
8977
8978 /* Mark the previous jump instruction as unlikely.  */
8979
8980 static void
8981 aarch64_emit_unlikely_jump (rtx insn)
8982 {
8983   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8984
8985   insn = emit_jump_insn (insn);
8986   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8987 }
8988
8989 /* Expand a compare and swap pattern.  */
8990
8991 void
8992 aarch64_expand_compare_and_swap (rtx operands[])
8993 {
8994   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8995   machine_mode mode, cmp_mode;
8996   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8997
8998   bval = operands[0];
8999   rval = operands[1];
9000   mem = operands[2];
9001   oldval = operands[3];
9002   newval = operands[4];
9003   is_weak = operands[5];
9004   mod_s = operands[6];
9005   mod_f = operands[7];
9006   mode = GET_MODE (mem);
9007   cmp_mode = mode;
9008
9009   /* Normally the succ memory model must be stronger than fail, but in the
9010      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9011      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
9012
9013   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9014       && INTVAL (mod_s) == MEMMODEL_RELEASE)
9015     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9016
9017   switch (mode)
9018     {
9019     case QImode:
9020     case HImode:
9021       /* For short modes, we're going to perform the comparison in SImode,
9022          so do the zero-extension now.  */
9023       cmp_mode = SImode;
9024       rval = gen_reg_rtx (SImode);
9025       oldval = convert_modes (SImode, mode, oldval, true);
9026       /* Fall through.  */
9027
9028     case SImode:
9029     case DImode:
9030       /* Force the value into a register if needed.  */
9031       if (!aarch64_plus_operand (oldval, mode))
9032         oldval = force_reg (cmp_mode, oldval);
9033       break;
9034
9035     default:
9036       gcc_unreachable ();
9037     }
9038
9039   switch (mode)
9040     {
9041     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9042     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9043     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9044     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9045     default:
9046       gcc_unreachable ();
9047     }
9048
9049   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9050
9051   if (mode == QImode || mode == HImode)
9052     emit_move_insn (operands[1], gen_lowpart (mode, rval));
9053
9054   x = gen_rtx_REG (CCmode, CC_REGNUM);
9055   x = gen_rtx_EQ (SImode, x, const0_rtx);
9056   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9057 }
9058
9059 /* Split a compare and swap pattern.  */
9060
9061 void
9062 aarch64_split_compare_and_swap (rtx operands[])
9063 {
9064   rtx rval, mem, oldval, newval, scratch;
9065   machine_mode mode;
9066   bool is_weak;
9067   rtx_code_label *label1, *label2;
9068   rtx x, cond;
9069
9070   rval = operands[0];
9071   mem = operands[1];
9072   oldval = operands[2];
9073   newval = operands[3];
9074   is_weak = (operands[4] != const0_rtx);
9075   scratch = operands[7];
9076   mode = GET_MODE (mem);
9077
9078   label1 = NULL;
9079   if (!is_weak)
9080     {
9081       label1 = gen_label_rtx ();
9082       emit_label (label1);
9083     }
9084   label2 = gen_label_rtx ();
9085
9086   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9087
9088   cond = aarch64_gen_compare_reg (NE, rval, oldval);
9089   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9090   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9091                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9092   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9093
9094   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9095
9096   if (!is_weak)
9097     {
9098       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9099       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9100                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9101       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9102     }
9103   else
9104     {
9105       cond = gen_rtx_REG (CCmode, CC_REGNUM);
9106       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9107       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9108     }
9109
9110   emit_label (label2);
9111 }
9112
9113 /* Split an atomic operation.  */
9114
9115 void
9116 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9117                      rtx value, rtx model_rtx, rtx cond)
9118 {
9119   machine_mode mode = GET_MODE (mem);
9120   machine_mode wmode = (mode == DImode ? DImode : SImode);
9121   rtx_code_label *label;
9122   rtx x;
9123
9124   label = gen_label_rtx ();
9125   emit_label (label);
9126
9127   if (new_out)
9128     new_out = gen_lowpart (wmode, new_out);
9129   if (old_out)
9130     old_out = gen_lowpart (wmode, old_out);
9131   else
9132     old_out = new_out;
9133   value = simplify_gen_subreg (wmode, value, mode, 0);
9134
9135   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9136
9137   switch (code)
9138     {
9139     case SET:
9140       new_out = value;
9141       break;
9142
9143     case NOT:
9144       x = gen_rtx_AND (wmode, old_out, value);
9145       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9146       x = gen_rtx_NOT (wmode, new_out);
9147       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9148       break;
9149
9150     case MINUS:
9151       if (CONST_INT_P (value))
9152         {
9153           value = GEN_INT (-INTVAL (value));
9154           code = PLUS;
9155         }
9156       /* Fall through.  */
9157
9158     default:
9159       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9160       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9161       break;
9162     }
9163
9164   aarch64_emit_store_exclusive (mode, cond, mem,
9165                                 gen_lowpart (mode, new_out), model_rtx);
9166
9167   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9168   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9169                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9170   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9171 }
9172
9173 static void
9174 aarch64_print_extension (void)
9175 {
9176   const struct aarch64_option_extension *opt = NULL;
9177
9178   for (opt = all_extensions; opt->name != NULL; opt++)
9179     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9180       asm_fprintf (asm_out_file, "+%s", opt->name);
9181
9182   asm_fprintf (asm_out_file, "\n");
9183 }
9184
9185 static void
9186 aarch64_start_file (void)
9187 {
9188   if (selected_arch)
9189     {
9190       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9191       aarch64_print_extension ();
9192     }
9193   else if (selected_cpu)
9194     {
9195       const char *truncated_name
9196             = aarch64_rewrite_selected_cpu (selected_cpu->name);
9197       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
9198       aarch64_print_extension ();
9199     }
9200   default_file_start();
9201 }
9202
9203 /* Target hook for c_mode_for_suffix.  */
9204 static machine_mode
9205 aarch64_c_mode_for_suffix (char suffix)
9206 {
9207   if (suffix == 'q')
9208     return TFmode;
9209
9210   return VOIDmode;
9211 }
9212
9213 /* We can only represent floating point constants which will fit in
9214    "quarter-precision" values.  These values are characterised by
9215    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
9216    by:
9217
9218    (-1)^s * (n/16) * 2^r
9219
9220    Where:
9221      's' is the sign bit.
9222      'n' is an integer in the range 16 <= n <= 31.
9223      'r' is an integer in the range -3 <= r <= 4.  */
9224
9225 /* Return true iff X can be represented by a quarter-precision
9226    floating point immediate operand X.  Note, we cannot represent 0.0.  */
9227 bool
9228 aarch64_float_const_representable_p (rtx x)
9229 {
9230   /* This represents our current view of how many bits
9231      make up the mantissa.  */
9232   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9233   int exponent;
9234   unsigned HOST_WIDE_INT mantissa, mask;
9235   REAL_VALUE_TYPE r, m;
9236   bool fail;
9237
9238   if (!CONST_DOUBLE_P (x))
9239     return false;
9240
9241   if (GET_MODE (x) == VOIDmode)
9242     return false;
9243
9244   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9245
9246   /* We cannot represent infinities, NaNs or +/-zero.  We won't
9247      know if we have +zero until we analyse the mantissa, but we
9248      can reject the other invalid values.  */
9249   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9250       || REAL_VALUE_MINUS_ZERO (r))
9251     return false;
9252
9253   /* Extract exponent.  */
9254   r = real_value_abs (&r);
9255   exponent = REAL_EXP (&r);
9256
9257   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9258      highest (sign) bit, with a fixed binary point at bit point_pos.
9259      m1 holds the low part of the mantissa, m2 the high part.
9260      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9261      bits for the mantissa, this can fail (low bits will be lost).  */
9262   real_ldexp (&m, &r, point_pos - exponent);
9263   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
9264
9265   /* If the low part of the mantissa has bits set we cannot represent
9266      the value.  */
9267   if (w.elt (0) != 0)
9268     return false;
9269   /* We have rejected the lower HOST_WIDE_INT, so update our
9270      understanding of how many bits lie in the mantissa and
9271      look only at the high HOST_WIDE_INT.  */
9272   mantissa = w.elt (1);
9273   point_pos -= HOST_BITS_PER_WIDE_INT;
9274
9275   /* We can only represent values with a mantissa of the form 1.xxxx.  */
9276   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9277   if ((mantissa & mask) != 0)
9278     return false;
9279
9280   /* Having filtered unrepresentable values, we may now remove all
9281      but the highest 5 bits.  */
9282   mantissa >>= point_pos - 5;
9283
9284   /* We cannot represent the value 0.0, so reject it.  This is handled
9285      elsewhere.  */
9286   if (mantissa == 0)
9287     return false;
9288
9289   /* Then, as bit 4 is always set, we can mask it off, leaving
9290      the mantissa in the range [0, 15].  */
9291   mantissa &= ~(1 << 4);
9292   gcc_assert (mantissa <= 15);
9293
9294   /* GCC internally does not use IEEE754-like encoding (where normalized
9295      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
9296      Our mantissa values are shifted 4 places to the left relative to
9297      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9298      by 5 places to correct for GCC's representation.  */
9299   exponent = 5 - exponent;
9300
9301   return (exponent >= 0 && exponent <= 7);
9302 }
9303
9304 char*
9305 aarch64_output_simd_mov_immediate (rtx const_vector,
9306                                    machine_mode mode,
9307                                    unsigned width)
9308 {
9309   bool is_valid;
9310   static char templ[40];
9311   const char *mnemonic;
9312   const char *shift_op;
9313   unsigned int lane_count = 0;
9314   char element_char;
9315
9316   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
9317
9318   /* This will return true to show const_vector is legal for use as either
9319      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
9320      also update INFO to show how the immediate should be generated.  */
9321   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
9322   gcc_assert (is_valid);
9323
9324   element_char = sizetochar (info.element_width);
9325   lane_count = width / info.element_width;
9326
9327   mode = GET_MODE_INNER (mode);
9328   if (mode == SFmode || mode == DFmode)
9329     {
9330       gcc_assert (info.shift == 0 && ! info.mvn);
9331       if (aarch64_float_const_zero_rtx_p (info.value))
9332         info.value = GEN_INT (0);
9333       else
9334         {
9335 #define buf_size 20
9336           REAL_VALUE_TYPE r;
9337           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9338           char float_buf[buf_size] = {'\0'};
9339           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9340 #undef buf_size
9341
9342           if (lane_count == 1)
9343             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9344           else
9345             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
9346                       lane_count, element_char, float_buf);
9347           return templ;
9348         }
9349     }
9350
9351   mnemonic = info.mvn ? "mvni" : "movi";
9352   shift_op = info.msl ? "msl" : "lsl";
9353
9354   if (lane_count == 1)
9355     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9356               mnemonic, UINTVAL (info.value));
9357   else if (info.shift)
9358     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9359               ", %s %d", mnemonic, lane_count, element_char,
9360               UINTVAL (info.value), shift_op, info.shift);
9361   else
9362     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9363               mnemonic, lane_count, element_char, UINTVAL (info.value));
9364   return templ;
9365 }
9366
9367 char*
9368 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9369                                           machine_mode mode)
9370 {
9371   machine_mode vmode;
9372
9373   gcc_assert (!VECTOR_MODE_P (mode));
9374   vmode = aarch64_simd_container_mode (mode, 64);
9375   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9376   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9377 }
9378
9379 /* Split operands into moves from op[1] + op[2] into op[0].  */
9380
9381 void
9382 aarch64_split_combinev16qi (rtx operands[3])
9383 {
9384   unsigned int dest = REGNO (operands[0]);
9385   unsigned int src1 = REGNO (operands[1]);
9386   unsigned int src2 = REGNO (operands[2]);
9387   machine_mode halfmode = GET_MODE (operands[1]);
9388   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9389   rtx destlo, desthi;
9390
9391   gcc_assert (halfmode == V16QImode);
9392
9393   if (src1 == dest && src2 == dest + halfregs)
9394     {
9395       /* No-op move.  Can't split to nothing; emit something.  */
9396       emit_note (NOTE_INSN_DELETED);
9397       return;
9398     }
9399
9400   /* Preserve register attributes for variable tracking.  */
9401   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9402   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9403                                GET_MODE_SIZE (halfmode));
9404
9405   /* Special case of reversed high/low parts.  */
9406   if (reg_overlap_mentioned_p (operands[2], destlo)
9407       && reg_overlap_mentioned_p (operands[1], desthi))
9408     {
9409       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9410       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9411       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9412     }
9413   else if (!reg_overlap_mentioned_p (operands[2], destlo))
9414     {
9415       /* Try to avoid unnecessary moves if part of the result
9416          is in the right place already.  */
9417       if (src1 != dest)
9418         emit_move_insn (destlo, operands[1]);
9419       if (src2 != dest + halfregs)
9420         emit_move_insn (desthi, operands[2]);
9421     }
9422   else
9423     {
9424       if (src2 != dest + halfregs)
9425         emit_move_insn (desthi, operands[2]);
9426       if (src1 != dest)
9427         emit_move_insn (destlo, operands[1]);
9428     }
9429 }
9430
9431 /* vec_perm support.  */
9432
9433 #define MAX_VECT_LEN 16
9434
9435 struct expand_vec_perm_d
9436 {
9437   rtx target, op0, op1;
9438   unsigned char perm[MAX_VECT_LEN];
9439   machine_mode vmode;
9440   unsigned char nelt;
9441   bool one_vector_p;
9442   bool testing_p;
9443 };
9444
9445 /* Generate a variable permutation.  */
9446
9447 static void
9448 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9449 {
9450   machine_mode vmode = GET_MODE (target);
9451   bool one_vector_p = rtx_equal_p (op0, op1);
9452
9453   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9454   gcc_checking_assert (GET_MODE (op0) == vmode);
9455   gcc_checking_assert (GET_MODE (op1) == vmode);
9456   gcc_checking_assert (GET_MODE (sel) == vmode);
9457   gcc_checking_assert (TARGET_SIMD);
9458
9459   if (one_vector_p)
9460     {
9461       if (vmode == V8QImode)
9462         {
9463           /* Expand the argument to a V16QI mode by duplicating it.  */
9464           rtx pair = gen_reg_rtx (V16QImode);
9465           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9466           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9467         }
9468       else
9469         {
9470           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9471         }
9472     }
9473   else
9474     {
9475       rtx pair;
9476
9477       if (vmode == V8QImode)
9478         {
9479           pair = gen_reg_rtx (V16QImode);
9480           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9481           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9482         }
9483       else
9484         {
9485           pair = gen_reg_rtx (OImode);
9486           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9487           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9488         }
9489     }
9490 }
9491
9492 void
9493 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9494 {
9495   machine_mode vmode = GET_MODE (target);
9496   unsigned int nelt = GET_MODE_NUNITS (vmode);
9497   bool one_vector_p = rtx_equal_p (op0, op1);
9498   rtx mask;
9499
9500   /* The TBL instruction does not use a modulo index, so we must take care
9501      of that ourselves.  */
9502   mask = aarch64_simd_gen_const_vector_dup (vmode,
9503       one_vector_p ? nelt - 1 : 2 * nelt - 1);
9504   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9505
9506   /* For big-endian, we also need to reverse the index within the vector
9507      (but not which vector).  */
9508   if (BYTES_BIG_ENDIAN)
9509     {
9510       /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
9511       if (!one_vector_p)
9512         mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9513       sel = expand_simple_binop (vmode, XOR, sel, mask,
9514                                  NULL, 0, OPTAB_LIB_WIDEN);
9515     }
9516   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9517 }
9518
9519 /* Recognize patterns suitable for the TRN instructions.  */
9520 static bool
9521 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9522 {
9523   unsigned int i, odd, mask, nelt = d->nelt;
9524   rtx out, in0, in1, x;
9525   rtx (*gen) (rtx, rtx, rtx);
9526   machine_mode vmode = d->vmode;
9527
9528   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9529     return false;
9530
9531   /* Note that these are little-endian tests.
9532      We correct for big-endian later.  */
9533   if (d->perm[0] == 0)
9534     odd = 0;
9535   else if (d->perm[0] == 1)
9536     odd = 1;
9537   else
9538     return false;
9539   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9540
9541   for (i = 0; i < nelt; i += 2)
9542     {
9543       if (d->perm[i] != i + odd)
9544         return false;
9545       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9546         return false;
9547     }
9548
9549   /* Success!  */
9550   if (d->testing_p)
9551     return true;
9552
9553   in0 = d->op0;
9554   in1 = d->op1;
9555   if (BYTES_BIG_ENDIAN)
9556     {
9557       x = in0, in0 = in1, in1 = x;
9558       odd = !odd;
9559     }
9560   out = d->target;
9561
9562   if (odd)
9563     {
9564       switch (vmode)
9565         {
9566         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9567         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9568         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9569         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9570         case V4SImode: gen = gen_aarch64_trn2v4si; break;
9571         case V2SImode: gen = gen_aarch64_trn2v2si; break;
9572         case V2DImode: gen = gen_aarch64_trn2v2di; break;
9573         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9574         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9575         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9576         default:
9577           return false;
9578         }
9579     }
9580   else
9581     {
9582       switch (vmode)
9583         {
9584         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9585         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9586         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9587         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9588         case V4SImode: gen = gen_aarch64_trn1v4si; break;
9589         case V2SImode: gen = gen_aarch64_trn1v2si; break;
9590         case V2DImode: gen = gen_aarch64_trn1v2di; break;
9591         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9592         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9593         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9594         default:
9595           return false;
9596         }
9597     }
9598
9599   emit_insn (gen (out, in0, in1));
9600   return true;
9601 }
9602
9603 /* Recognize patterns suitable for the UZP instructions.  */
9604 static bool
9605 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9606 {
9607   unsigned int i, odd, mask, nelt = d->nelt;
9608   rtx out, in0, in1, x;
9609   rtx (*gen) (rtx, rtx, rtx);
9610   machine_mode vmode = d->vmode;
9611
9612   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9613     return false;
9614
9615   /* Note that these are little-endian tests.
9616      We correct for big-endian later.  */
9617   if (d->perm[0] == 0)
9618     odd = 0;
9619   else if (d->perm[0] == 1)
9620     odd = 1;
9621   else
9622     return false;
9623   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9624
9625   for (i = 0; i < nelt; i++)
9626     {
9627       unsigned elt = (i * 2 + odd) & mask;
9628       if (d->perm[i] != elt)
9629         return false;
9630     }
9631
9632   /* Success!  */
9633   if (d->testing_p)
9634     return true;
9635
9636   in0 = d->op0;
9637   in1 = d->op1;
9638   if (BYTES_BIG_ENDIAN)
9639     {
9640       x = in0, in0 = in1, in1 = x;
9641       odd = !odd;
9642     }
9643   out = d->target;
9644
9645   if (odd)
9646     {
9647       switch (vmode)
9648         {
9649         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9650         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9651         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9652         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9653         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9654         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9655         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9656         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9657         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9658         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9659         default:
9660           return false;
9661         }
9662     }
9663   else
9664     {
9665       switch (vmode)
9666         {
9667         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9668         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9669         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9670         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9671         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9672         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9673         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9674         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9675         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9676         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9677         default:
9678           return false;
9679         }
9680     }
9681
9682   emit_insn (gen (out, in0, in1));
9683   return true;
9684 }
9685
9686 /* Recognize patterns suitable for the ZIP instructions.  */
9687 static bool
9688 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9689 {
9690   unsigned int i, high, mask, nelt = d->nelt;
9691   rtx out, in0, in1, x;
9692   rtx (*gen) (rtx, rtx, rtx);
9693   machine_mode vmode = d->vmode;
9694
9695   if (GET_MODE_UNIT_SIZE (vmode) > 8)
9696     return false;
9697
9698   /* Note that these are little-endian tests.
9699      We correct for big-endian later.  */
9700   high = nelt / 2;
9701   if (d->perm[0] == high)
9702     /* Do Nothing.  */
9703     ;
9704   else if (d->perm[0] == 0)
9705     high = 0;
9706   else
9707     return false;
9708   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9709
9710   for (i = 0; i < nelt / 2; i++)
9711     {
9712       unsigned elt = (i + high) & mask;
9713       if (d->perm[i * 2] != elt)
9714         return false;
9715       elt = (elt + nelt) & mask;
9716       if (d->perm[i * 2 + 1] != elt)
9717         return false;
9718     }
9719
9720   /* Success!  */
9721   if (d->testing_p)
9722     return true;
9723
9724   in0 = d->op0;
9725   in1 = d->op1;
9726   if (BYTES_BIG_ENDIAN)
9727     {
9728       x = in0, in0 = in1, in1 = x;
9729       high = !high;
9730     }
9731   out = d->target;
9732
9733   if (high)
9734     {
9735       switch (vmode)
9736         {
9737         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9738         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9739         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9740         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9741         case V4SImode: gen = gen_aarch64_zip2v4si; break;
9742         case V2SImode: gen = gen_aarch64_zip2v2si; break;
9743         case V2DImode: gen = gen_aarch64_zip2v2di; break;
9744         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9745         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9746         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9747         default:
9748           return false;
9749         }
9750     }
9751   else
9752     {
9753       switch (vmode)
9754         {
9755         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9756         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9757         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9758         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9759         case V4SImode: gen = gen_aarch64_zip1v4si; break;
9760         case V2SImode: gen = gen_aarch64_zip1v2si; break;
9761         case V2DImode: gen = gen_aarch64_zip1v2di; break;
9762         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9763         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9764         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9765         default:
9766           return false;
9767         }
9768     }
9769
9770   emit_insn (gen (out, in0, in1));
9771   return true;
9772 }
9773
9774 /* Recognize patterns for the EXT insn.  */
9775
9776 static bool
9777 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9778 {
9779   unsigned int i, nelt = d->nelt;
9780   rtx (*gen) (rtx, rtx, rtx, rtx);
9781   rtx offset;
9782
9783   unsigned int location = d->perm[0]; /* Always < nelt.  */
9784
9785   /* Check if the extracted indices are increasing by one.  */
9786   for (i = 1; i < nelt; i++)
9787     {
9788       unsigned int required = location + i;
9789       if (d->one_vector_p)
9790         {
9791           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9792           required &= (nelt - 1);
9793         }
9794       if (d->perm[i] != required)
9795         return false;
9796     }
9797
9798   switch (d->vmode)
9799     {
9800     case V16QImode: gen = gen_aarch64_extv16qi; break;
9801     case V8QImode: gen = gen_aarch64_extv8qi; break;
9802     case V4HImode: gen = gen_aarch64_extv4hi; break;
9803     case V8HImode: gen = gen_aarch64_extv8hi; break;
9804     case V2SImode: gen = gen_aarch64_extv2si; break;
9805     case V4SImode: gen = gen_aarch64_extv4si; break;
9806     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9807     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9808     case V2DImode: gen = gen_aarch64_extv2di; break;
9809     case V2DFmode: gen = gen_aarch64_extv2df; break;
9810     default:
9811       return false;
9812     }
9813
9814   /* Success! */
9815   if (d->testing_p)
9816     return true;
9817
9818   /* The case where (location == 0) is a no-op for both big- and little-endian,
9819      and is removed by the mid-end at optimization levels -O1 and higher.  */
9820
9821   if (BYTES_BIG_ENDIAN && (location != 0))
9822     {
9823       /* After setup, we want the high elements of the first vector (stored
9824          at the LSB end of the register), and the low elements of the second
9825          vector (stored at the MSB end of the register). So swap.  */
9826       rtx temp = d->op0;
9827       d->op0 = d->op1;
9828       d->op1 = temp;
9829       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9830       location = nelt - location;
9831     }
9832
9833   offset = GEN_INT (location);
9834   emit_insn (gen (d->target, d->op0, d->op1, offset));
9835   return true;
9836 }
9837
9838 /* Recognize patterns for the REV insns.  */
9839
9840 static bool
9841 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9842 {
9843   unsigned int i, j, diff, nelt = d->nelt;
9844   rtx (*gen) (rtx, rtx);
9845
9846   if (!d->one_vector_p)
9847     return false;
9848
9849   diff = d->perm[0];
9850   switch (diff)
9851     {
9852     case 7:
9853       switch (d->vmode)
9854         {
9855         case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9856         case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
9857         default:
9858           return false;
9859         }
9860       break;
9861     case 3:
9862       switch (d->vmode)
9863         {
9864         case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9865         case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
9866         case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
9867         case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
9868         default:
9869           return false;
9870         }
9871       break;
9872     case 1:
9873       switch (d->vmode)
9874         {
9875         case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9876         case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
9877         case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
9878         case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
9879         case V4SImode: gen = gen_aarch64_rev64v4si;  break;
9880         case V2SImode: gen = gen_aarch64_rev64v2si;  break;
9881         case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
9882         case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
9883         default:
9884           return false;
9885         }
9886       break;
9887     default:
9888       return false;
9889     }
9890
9891   for (i = 0; i < nelt ; i += diff + 1)
9892     for (j = 0; j <= diff; j += 1)
9893       {
9894         /* This is guaranteed to be true as the value of diff
9895            is 7, 3, 1 and we should have enough elements in the
9896            queue to generate this.  Getting a vector mask with a
9897            value of diff other than these values implies that
9898            something is wrong by the time we get here.  */
9899         gcc_assert (i + j < nelt);
9900         if (d->perm[i + j] != i + diff - j)
9901           return false;
9902       }
9903
9904   /* Success! */
9905   if (d->testing_p)
9906     return true;
9907
9908   emit_insn (gen (d->target, d->op0));
9909   return true;
9910 }
9911
9912 static bool
9913 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9914 {
9915   rtx (*gen) (rtx, rtx, rtx);
9916   rtx out = d->target;
9917   rtx in0;
9918   machine_mode vmode = d->vmode;
9919   unsigned int i, elt, nelt = d->nelt;
9920   rtx lane;
9921
9922   elt = d->perm[0];
9923   for (i = 1; i < nelt; i++)
9924     {
9925       if (elt != d->perm[i])
9926         return false;
9927     }
9928
9929   /* The generic preparation in aarch64_expand_vec_perm_const_1
9930      swaps the operand order and the permute indices if it finds
9931      d->perm[0] to be in the second operand.  Thus, we can always
9932      use d->op0 and need not do any extra arithmetic to get the
9933      correct lane number.  */
9934   in0 = d->op0;
9935   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
9936
9937   switch (vmode)
9938     {
9939     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9940     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9941     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9942     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9943     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9944     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9945     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9946     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9947     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9948     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9949     default:
9950       return false;
9951     }
9952
9953   emit_insn (gen (out, in0, lane));
9954   return true;
9955 }
9956
9957 static bool
9958 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9959 {
9960   rtx rperm[MAX_VECT_LEN], sel;
9961   machine_mode vmode = d->vmode;
9962   unsigned int i, nelt = d->nelt;
9963
9964   if (d->testing_p)
9965     return true;
9966
9967   /* Generic code will try constant permutation twice.  Once with the
9968      original mode and again with the elements lowered to QImode.
9969      So wait and don't do the selector expansion ourselves.  */
9970   if (vmode != V8QImode && vmode != V16QImode)
9971     return false;
9972
9973   for (i = 0; i < nelt; ++i)
9974     {
9975       int nunits = GET_MODE_NUNITS (vmode);
9976
9977       /* If big-endian and two vectors we end up with a weird mixed-endian
9978          mode on NEON.  Reverse the index within each word but not the word
9979          itself.  */
9980       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9981                                            : d->perm[i]);
9982     }
9983   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9984   sel = force_reg (vmode, sel);
9985
9986   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9987   return true;
9988 }
9989
9990 static bool
9991 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9992 {
9993   /* The pattern matching functions above are written to look for a small
9994      number to begin the sequence (0, 1, N/2).  If we begin with an index
9995      from the second operand, we can swap the operands.  */
9996   if (d->perm[0] >= d->nelt)
9997     {
9998       unsigned i, nelt = d->nelt;
9999       rtx x;
10000
10001       gcc_assert (nelt == (nelt & -nelt));
10002       for (i = 0; i < nelt; ++i)
10003         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
10004
10005       x = d->op0;
10006       d->op0 = d->op1;
10007       d->op1 = x;
10008     }
10009
10010   if (TARGET_SIMD)
10011     {
10012       if (aarch64_evpc_rev (d))
10013         return true;
10014       else if (aarch64_evpc_ext (d))
10015         return true;
10016       else if (aarch64_evpc_dup (d))
10017         return true;
10018       else if (aarch64_evpc_zip (d))
10019         return true;
10020       else if (aarch64_evpc_uzp (d))
10021         return true;
10022       else if (aarch64_evpc_trn (d))
10023         return true;
10024       return aarch64_evpc_tbl (d);
10025     }
10026   return false;
10027 }
10028
10029 /* Expand a vec_perm_const pattern.  */
10030
10031 bool
10032 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10033 {
10034   struct expand_vec_perm_d d;
10035   int i, nelt, which;
10036
10037   d.target = target;
10038   d.op0 = op0;
10039   d.op1 = op1;
10040
10041   d.vmode = GET_MODE (target);
10042   gcc_assert (VECTOR_MODE_P (d.vmode));
10043   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10044   d.testing_p = false;
10045
10046   for (i = which = 0; i < nelt; ++i)
10047     {
10048       rtx e = XVECEXP (sel, 0, i);
10049       int ei = INTVAL (e) & (2 * nelt - 1);
10050       which |= (ei < nelt ? 1 : 2);
10051       d.perm[i] = ei;
10052     }
10053
10054   switch (which)
10055     {
10056     default:
10057       gcc_unreachable ();
10058
10059     case 3:
10060       d.one_vector_p = false;
10061       if (!rtx_equal_p (op0, op1))
10062         break;
10063
10064       /* The elements of PERM do not suggest that only the first operand
10065          is used, but both operands are identical.  Allow easier matching
10066          of the permutation by folding the permutation into the single
10067          input vector.  */
10068       /* Fall Through.  */
10069     case 2:
10070       for (i = 0; i < nelt; ++i)
10071         d.perm[i] &= nelt - 1;
10072       d.op0 = op1;
10073       d.one_vector_p = true;
10074       break;
10075
10076     case 1:
10077       d.op1 = op0;
10078       d.one_vector_p = true;
10079       break;
10080     }
10081
10082   return aarch64_expand_vec_perm_const_1 (&d);
10083 }
10084
10085 static bool
10086 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
10087                                      const unsigned char *sel)
10088 {
10089   struct expand_vec_perm_d d;
10090   unsigned int i, nelt, which;
10091   bool ret;
10092
10093   d.vmode = vmode;
10094   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10095   d.testing_p = true;
10096   memcpy (d.perm, sel, nelt);
10097
10098   /* Calculate whether all elements are in one vector.  */
10099   for (i = which = 0; i < nelt; ++i)
10100     {
10101       unsigned char e = d.perm[i];
10102       gcc_assert (e < 2 * nelt);
10103       which |= (e < nelt ? 1 : 2);
10104     }
10105
10106   /* If all elements are from the second vector, reindex as if from the
10107      first vector.  */
10108   if (which == 2)
10109     for (i = 0; i < nelt; ++i)
10110       d.perm[i] -= nelt;
10111
10112   /* Check whether the mask can be applied to a single vector.  */
10113   d.one_vector_p = (which != 3);
10114
10115   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10116   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10117   if (!d.one_vector_p)
10118     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10119
10120   start_sequence ();
10121   ret = aarch64_expand_vec_perm_const_1 (&d);
10122   end_sequence ();
10123
10124   return ret;
10125 }
10126
10127 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
10128 bool
10129 aarch64_cannot_change_mode_class (machine_mode from,
10130                                   machine_mode to,
10131                                   enum reg_class rclass)
10132 {
10133   /* Full-reg subregs are allowed on general regs or any class if they are
10134      the same size.  */
10135   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10136       || !reg_classes_intersect_p (FP_REGS, rclass))
10137     return false;
10138
10139   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
10140      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10141      2. Scalar to Scalar for integer modes or same size float modes.
10142      3. Vector to Vector modes.
10143      4. On little-endian only, Vector-Structure to Vector modes.  */
10144   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
10145     {
10146       if (aarch64_vector_mode_supported_p (from)
10147           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
10148         return false;
10149
10150       if (GET_MODE_NUNITS (from) == 1
10151           && GET_MODE_NUNITS (to) == 1
10152           && (GET_MODE_CLASS (from) == MODE_INT
10153               || from == to))
10154         return false;
10155
10156       if (aarch64_vector_mode_supported_p (from)
10157           && aarch64_vector_mode_supported_p (to))
10158         return false;
10159
10160       /* Within an vector structure straddling multiple vector registers
10161          we are in a mixed-endian representation.  As such, we can't
10162          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
10163          switch between vectors and vector structures cheaply.  */
10164       if (!BYTES_BIG_ENDIAN)
10165         if ((aarch64_vector_mode_supported_p (from)
10166               && aarch64_vect_struct_mode_p (to))
10167             || (aarch64_vector_mode_supported_p (to)
10168               && aarch64_vect_struct_mode_p (from)))
10169           return false;
10170     }
10171
10172   return true;
10173 }
10174
10175 /* Implement MODES_TIEABLE_P.  */
10176
10177 bool
10178 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10179 {
10180   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10181     return true;
10182
10183   /* We specifically want to allow elements of "structure" modes to
10184      be tieable to the structure.  This more general condition allows
10185      other rarer situations too.  */
10186   if (TARGET_SIMD
10187       && aarch64_vector_mode_p (mode1)
10188       && aarch64_vector_mode_p (mode2))
10189     return true;
10190
10191   return false;
10192 }
10193
10194 /* Return a new RTX holding the result of moving POINTER forward by
10195    AMOUNT bytes.  */
10196
10197 static rtx
10198 aarch64_move_pointer (rtx pointer, int amount)
10199 {
10200   rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10201
10202   return adjust_automodify_address (pointer, GET_MODE (pointer),
10203                                     next, amount);
10204 }
10205
10206 /* Return a new RTX holding the result of moving POINTER forward by the
10207    size of the mode it points to.  */
10208
10209 static rtx
10210 aarch64_progress_pointer (rtx pointer)
10211 {
10212   HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10213
10214   return aarch64_move_pointer (pointer, amount);
10215 }
10216
10217 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10218    MODE bytes.  */
10219
10220 static void
10221 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
10222                                               machine_mode mode)
10223 {
10224   rtx reg = gen_reg_rtx (mode);
10225
10226   /* "Cast" the pointers to the correct mode.  */
10227   *src = adjust_address (*src, mode, 0);
10228   *dst = adjust_address (*dst, mode, 0);
10229   /* Emit the memcpy.  */
10230   emit_move_insn (reg, *src);
10231   emit_move_insn (*dst, reg);
10232   /* Move the pointers forward.  */
10233   *src = aarch64_progress_pointer (*src);
10234   *dst = aarch64_progress_pointer (*dst);
10235 }
10236
10237 /* Expand movmem, as if from a __builtin_memcpy.  Return true if
10238    we succeed, otherwise return false.  */
10239
10240 bool
10241 aarch64_expand_movmem (rtx *operands)
10242 {
10243   unsigned int n;
10244   rtx dst = operands[0];
10245   rtx src = operands[1];
10246   rtx base;
10247   bool speed_p = !optimize_function_for_size_p (cfun);
10248
10249   /* When optimizing for size, give a better estimate of the length of a
10250      memcpy call, but use the default otherwise.  */
10251   unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10252
10253   /* We can't do anything smart if the amount to copy is not constant.  */
10254   if (!CONST_INT_P (operands[2]))
10255     return false;
10256
10257   n = UINTVAL (operands[2]);
10258
10259   /* Try to keep the number of instructions low.  For cases below 16 bytes we
10260      need to make at most two moves.  For cases above 16 bytes it will be one
10261      move for each 16 byte chunk, then at most two additional moves.  */
10262   if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10263     return false;
10264
10265   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10266   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10267
10268   base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10269   src = adjust_automodify_address (src, VOIDmode, base, 0);
10270
10271   /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10272      1-byte chunk.  */
10273   if (n < 4)
10274     {
10275       if (n >= 2)
10276         {
10277           aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10278           n -= 2;
10279         }
10280
10281       if (n == 1)
10282         aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10283
10284       return true;
10285     }
10286
10287   /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
10288      4-byte chunk, partially overlapping with the previously copied chunk.  */
10289   if (n < 8)
10290     {
10291       aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10292       n -= 4;
10293       if (n > 0)
10294         {
10295           int move = n - 4;
10296
10297           src = aarch64_move_pointer (src, move);
10298           dst = aarch64_move_pointer (dst, move);
10299           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10300         }
10301       return true;
10302     }
10303
10304   /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
10305      them, then (if applicable) an 8-byte chunk.  */
10306   while (n >= 8)
10307     {
10308       if (n / 16)
10309         {
10310           aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10311           n -= 16;
10312         }
10313       else
10314         {
10315           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10316           n -= 8;
10317         }
10318     }
10319
10320   /* Finish the final bytes of the copy.  We can always do this in one
10321      instruction.  We either copy the exact amount we need, or partially
10322      overlap with the previous chunk we copied and copy 8-bytes.  */
10323   if (n == 0)
10324     return true;
10325   else if (n == 1)
10326     aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10327   else if (n == 2)
10328     aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10329   else if (n == 4)
10330     aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10331   else
10332     {
10333       if (n == 3)
10334         {
10335           src = aarch64_move_pointer (src, -1);
10336           dst = aarch64_move_pointer (dst, -1);
10337           aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10338         }
10339       else
10340         {
10341           int move = n - 8;
10342
10343           src = aarch64_move_pointer (src, move);
10344           dst = aarch64_move_pointer (dst, move);
10345           aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10346         }
10347     }
10348
10349   return true;
10350 }
10351
10352 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
10353
10354 static unsigned HOST_WIDE_INT
10355 aarch64_asan_shadow_offset (void)
10356 {
10357   return (HOST_WIDE_INT_1 << 36);
10358 }
10359
10360 static bool
10361 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10362                                         unsigned int align,
10363                                         enum by_pieces_operation op,
10364                                         bool speed_p)
10365 {
10366   /* STORE_BY_PIECES can be used when copying a constant string, but
10367      in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10368      For now we always fail this and let the move_by_pieces code copy
10369      the string from read-only memory.  */
10370   if (op == STORE_BY_PIECES)
10371     return false;
10372
10373   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10374 }
10375
10376 static enum machine_mode
10377 aarch64_code_to_ccmode (enum rtx_code code)
10378 {
10379   switch (code)
10380     {
10381     case NE:
10382       return CC_DNEmode;
10383
10384     case EQ:
10385       return CC_DEQmode;
10386
10387     case LE:
10388       return CC_DLEmode;
10389
10390     case LT:
10391       return CC_DLTmode;
10392
10393     case GE:
10394       return CC_DGEmode;
10395
10396     case GT:
10397       return CC_DGTmode;
10398
10399     case LEU:
10400       return CC_DLEUmode;
10401
10402     case LTU:
10403       return CC_DLTUmode;
10404
10405     case GEU:
10406       return CC_DGEUmode;
10407
10408     case GTU:
10409       return CC_DGTUmode;
10410
10411     default:
10412       return CCmode;
10413     }
10414 }
10415
10416 static rtx
10417 aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10418                         int code, tree treeop0, tree treeop1)
10419 {
10420   enum machine_mode op_mode, cmp_mode, cc_mode;
10421   rtx op0, op1, cmp, target;
10422   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10423   enum insn_code icode;
10424   struct expand_operand ops[4];
10425
10426   cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10427   if (cc_mode == CCmode)
10428     return NULL_RTX;
10429
10430   start_sequence ();
10431   expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10432
10433   op_mode = GET_MODE (op0);
10434   if (op_mode == VOIDmode)
10435     op_mode = GET_MODE (op1);
10436
10437   switch (op_mode)
10438     {
10439     case QImode:
10440     case HImode:
10441     case SImode:
10442       cmp_mode = SImode;
10443       icode = CODE_FOR_cmpsi;
10444       break;
10445
10446     case DImode:
10447       cmp_mode = DImode;
10448       icode = CODE_FOR_cmpdi;
10449       break;
10450
10451     default:
10452       end_sequence ();
10453       return NULL_RTX;
10454     }
10455
10456   op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10457   op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10458   if (!op0 || !op1)
10459     {
10460       end_sequence ();
10461       return NULL_RTX;
10462     }
10463   *prep_seq = get_insns ();
10464   end_sequence ();
10465
10466   cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10467   target = gen_rtx_REG (CCmode, CC_REGNUM);
10468
10469   create_output_operand (&ops[0], target, CCmode);
10470   create_fixed_operand (&ops[1], cmp);
10471   create_fixed_operand (&ops[2], op0);
10472   create_fixed_operand (&ops[3], op1);
10473
10474   start_sequence ();
10475   if (!maybe_expand_insn (icode, 4, ops))
10476     {
10477       end_sequence ();
10478       return NULL_RTX;
10479     }
10480   *gen_seq = get_insns ();
10481   end_sequence ();
10482
10483   return gen_rtx_REG (cc_mode, CC_REGNUM);
10484 }
10485
10486 static rtx
10487 aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10488                        tree treeop0, tree treeop1, int bit_code)
10489 {
10490   rtx op0, op1, cmp0, cmp1, target;
10491   enum machine_mode op_mode, cmp_mode, cc_mode;
10492   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10493   enum insn_code icode = CODE_FOR_ccmp_andsi;
10494   struct expand_operand ops[6];
10495
10496   cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10497   if (cc_mode == CCmode)
10498     return NULL_RTX;
10499
10500   push_to_sequence ((rtx_insn*) *prep_seq);
10501   expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10502
10503   op_mode = GET_MODE (op0);
10504   if (op_mode == VOIDmode)
10505     op_mode = GET_MODE (op1);
10506
10507   switch (op_mode)
10508     {
10509     case QImode:
10510     case HImode:
10511     case SImode:
10512       cmp_mode = SImode;
10513       icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10514                                                 : CODE_FOR_ccmp_iorsi;
10515       break;
10516
10517     case DImode:
10518       cmp_mode = DImode;
10519       icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10520                                                 : CODE_FOR_ccmp_iordi;
10521       break;
10522
10523     default:
10524       end_sequence ();
10525       return NULL_RTX;
10526     }
10527
10528   op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10529   op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10530   if (!op0 || !op1)
10531     {
10532       end_sequence ();
10533       return NULL_RTX;
10534     }
10535   *prep_seq = get_insns ();
10536   end_sequence ();
10537
10538   target = gen_rtx_REG (cc_mode, CC_REGNUM);
10539   cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10540   cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10541
10542   create_fixed_operand (&ops[0], prev);
10543   create_fixed_operand (&ops[1], target);
10544   create_fixed_operand (&ops[2], op0);
10545   create_fixed_operand (&ops[3], op1);
10546   create_fixed_operand (&ops[4], cmp0);
10547   create_fixed_operand (&ops[5], cmp1);
10548
10549   push_to_sequence ((rtx_insn*) *gen_seq);
10550   if (!maybe_expand_insn (icode, 6, ops))
10551     {
10552       end_sequence ();
10553       return NULL_RTX;
10554     }
10555
10556   *gen_seq = get_insns ();
10557   end_sequence ();
10558
10559   return target;
10560 }
10561
10562 #undef TARGET_GEN_CCMP_FIRST
10563 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10564
10565 #undef TARGET_GEN_CCMP_NEXT
10566 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10567
10568 /* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
10569    instruction fusion of some sort.  */
10570
10571 static bool
10572 aarch64_macro_fusion_p (void)
10573 {
10574   return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10575 }
10576
10577
10578 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
10579    should be kept together during scheduling.  */
10580
10581 static bool
10582 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10583 {
10584   rtx set_dest;
10585   rtx prev_set = single_set (prev);
10586   rtx curr_set = single_set (curr);
10587   /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
10588   bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10589
10590   if (!aarch64_macro_fusion_p ())
10591     return false;
10592
10593   if (simple_sets_p
10594       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10595     {
10596       /* We are trying to match:
10597          prev (mov)  == (set (reg r0) (const_int imm16))
10598          curr (movk) == (set (zero_extract (reg r0)
10599                                            (const_int 16)
10600                                            (const_int 16))
10601                              (const_int imm16_1))  */
10602
10603       set_dest = SET_DEST (curr_set);
10604
10605       if (GET_CODE (set_dest) == ZERO_EXTRACT
10606           && CONST_INT_P (SET_SRC (curr_set))
10607           && CONST_INT_P (SET_SRC (prev_set))
10608           && CONST_INT_P (XEXP (set_dest, 2))
10609           && INTVAL (XEXP (set_dest, 2)) == 16
10610           && REG_P (XEXP (set_dest, 0))
10611           && REG_P (SET_DEST (prev_set))
10612           && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10613         {
10614           return true;
10615         }
10616     }
10617
10618   if (simple_sets_p
10619       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10620     {
10621
10622       /*  We're trying to match:
10623           prev (adrp) == (set (reg r1)
10624                               (high (symbol_ref ("SYM"))))
10625           curr (add) == (set (reg r0)
10626                              (lo_sum (reg r1)
10627                                      (symbol_ref ("SYM"))))
10628           Note that r0 need not necessarily be the same as r1, especially
10629           during pre-regalloc scheduling.  */
10630
10631       if (satisfies_constraint_Ush (SET_SRC (prev_set))
10632           && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10633         {
10634           if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10635               && REG_P (XEXP (SET_SRC (curr_set), 0))
10636               && REGNO (XEXP (SET_SRC (curr_set), 0))
10637                  == REGNO (SET_DEST (prev_set))
10638               && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10639                               XEXP (SET_SRC (curr_set), 1)))
10640             return true;
10641         }
10642     }
10643
10644   if (simple_sets_p
10645       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10646     {
10647
10648       /* We're trying to match:
10649          prev (movk) == (set (zero_extract (reg r0)
10650                                            (const_int 16)
10651                                            (const_int 32))
10652                              (const_int imm16_1))
10653          curr (movk) == (set (zero_extract (reg r0)
10654                                            (const_int 16)
10655                                            (const_int 48))
10656                              (const_int imm16_2))  */
10657
10658       if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10659           && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10660           && REG_P (XEXP (SET_DEST (prev_set), 0))
10661           && REG_P (XEXP (SET_DEST (curr_set), 0))
10662           && REGNO (XEXP (SET_DEST (prev_set), 0))
10663              == REGNO (XEXP (SET_DEST (curr_set), 0))
10664           && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10665           && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10666           && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10667           && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10668           && CONST_INT_P (SET_SRC (prev_set))
10669           && CONST_INT_P (SET_SRC (curr_set)))
10670         return true;
10671
10672     }
10673   if (simple_sets_p
10674       && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10675     {
10676       /* We're trying to match:
10677           prev (adrp) == (set (reg r0)
10678                               (high (symbol_ref ("SYM"))))
10679           curr (ldr) == (set (reg r1)
10680                              (mem (lo_sum (reg r0)
10681                                              (symbol_ref ("SYM")))))
10682                  or
10683           curr (ldr) == (set (reg r1)
10684                              (zero_extend (mem
10685                                            (lo_sum (reg r0)
10686                                                    (symbol_ref ("SYM"))))))  */
10687       if (satisfies_constraint_Ush (SET_SRC (prev_set))
10688           && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10689         {
10690           rtx curr_src = SET_SRC (curr_set);
10691
10692           if (GET_CODE (curr_src) == ZERO_EXTEND)
10693             curr_src = XEXP (curr_src, 0);
10694
10695           if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10696               && REG_P (XEXP (XEXP (curr_src, 0), 0))
10697               && REGNO (XEXP (XEXP (curr_src, 0), 0))
10698                  == REGNO (SET_DEST (prev_set))
10699               && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10700                               XEXP (SET_SRC (prev_set), 0)))
10701               return true;
10702         }
10703     }
10704
10705   if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10706       && any_condjump_p (curr))
10707     {
10708       enum attr_type prev_type = get_attr_type (prev);
10709
10710       /* FIXME: this misses some which is considered simple arthematic
10711          instructions for ThunderX.  Simple shifts are missed here.  */
10712       if (prev_type == TYPE_ALUS_SREG
10713           || prev_type == TYPE_ALUS_IMM
10714           || prev_type == TYPE_LOGICS_REG
10715           || prev_type == TYPE_LOGICS_IMM)
10716         return true;
10717     }
10718
10719   return false;
10720 }
10721
10722 /* If MEM is in the form of [base+offset], extract the two parts
10723    of address and set to BASE and OFFSET, otherwise return false
10724    after clearing BASE and OFFSET.  */
10725
10726 bool
10727 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10728 {
10729   rtx addr;
10730
10731   gcc_assert (MEM_P (mem));
10732
10733   addr = XEXP (mem, 0);
10734
10735   if (REG_P (addr))
10736     {
10737       *base = addr;
10738       *offset = const0_rtx;
10739       return true;
10740     }
10741
10742   if (GET_CODE (addr) == PLUS
10743       && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10744     {
10745       *base = XEXP (addr, 0);
10746       *offset = XEXP (addr, 1);
10747       return true;
10748     }
10749
10750   *base = NULL_RTX;
10751   *offset = NULL_RTX;
10752
10753   return false;
10754 }
10755
10756 /* Types for scheduling fusion.  */
10757 enum sched_fusion_type
10758 {
10759   SCHED_FUSION_NONE = 0,
10760   SCHED_FUSION_LD_SIGN_EXTEND,
10761   SCHED_FUSION_LD_ZERO_EXTEND,
10762   SCHED_FUSION_LD,
10763   SCHED_FUSION_ST,
10764   SCHED_FUSION_NUM
10765 };
10766
10767 /* If INSN is a load or store of address in the form of [base+offset],
10768    extract the two parts and set to BASE and OFFSET.  Return scheduling
10769    fusion type this INSN is.  */
10770
10771 static enum sched_fusion_type
10772 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10773 {
10774   rtx x, dest, src;
10775   enum sched_fusion_type fusion = SCHED_FUSION_LD;
10776
10777   gcc_assert (INSN_P (insn));
10778   x = PATTERN (insn);
10779   if (GET_CODE (x) != SET)
10780     return SCHED_FUSION_NONE;
10781
10782   src = SET_SRC (x);
10783   dest = SET_DEST (x);
10784
10785   if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10786       && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
10787     return SCHED_FUSION_NONE;
10788
10789   if (GET_CODE (src) == SIGN_EXTEND)
10790     {
10791       fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10792       src = XEXP (src, 0);
10793       if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10794         return SCHED_FUSION_NONE;
10795     }
10796   else if (GET_CODE (src) == ZERO_EXTEND)
10797     {
10798       fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10799       src = XEXP (src, 0);
10800       if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10801         return SCHED_FUSION_NONE;
10802     }
10803
10804   if (GET_CODE (src) == MEM && REG_P (dest))
10805     extract_base_offset_in_addr (src, base, offset);
10806   else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10807     {
10808       fusion = SCHED_FUSION_ST;
10809       extract_base_offset_in_addr (dest, base, offset);
10810     }
10811   else
10812     return SCHED_FUSION_NONE;
10813
10814   if (*base == NULL_RTX || *offset == NULL_RTX)
10815     fusion = SCHED_FUSION_NONE;
10816
10817   return fusion;
10818 }
10819
10820 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10821
10822    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10823    and PRI are only calculated for these instructions.  For other instruction,
10824    FUSION_PRI and PRI are simply set to MAX_PRI - 1.  In the future, other
10825    type instruction fusion can be added by returning different priorities.
10826
10827    It's important that irrelevant instructions get the largest FUSION_PRI.  */
10828
10829 static void
10830 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10831                                int *fusion_pri, int *pri)
10832 {
10833   int tmp, off_val;
10834   rtx base, offset;
10835   enum sched_fusion_type fusion;
10836
10837   gcc_assert (INSN_P (insn));
10838
10839   tmp = max_pri - 1;
10840   fusion = fusion_load_store (insn, &base, &offset);
10841   if (fusion == SCHED_FUSION_NONE)
10842     {
10843       *pri = tmp;
10844       *fusion_pri = tmp;
10845       return;
10846     }
10847
10848   /* Set FUSION_PRI according to fusion type and base register.  */
10849   *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10850
10851   /* Calculate PRI.  */
10852   tmp /= 2;
10853
10854   /* INSN with smaller offset goes first.  */
10855   off_val = (int)(INTVAL (offset));
10856   if (off_val >= 0)
10857     tmp -= (off_val & 0xfffff);
10858   else
10859     tmp += ((- off_val) & 0xfffff);
10860
10861   *pri = tmp;
10862   return;
10863 }
10864
10865 /* Given OPERANDS of consecutive load/store, check if we can merge
10866    them into ldp/stp.  LOAD is true if they are load instructions.
10867    MODE is the mode of memory operands.  */
10868
10869 bool
10870 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10871                                 enum machine_mode mode)
10872 {
10873   HOST_WIDE_INT offval_1, offval_2, msize;
10874   enum reg_class rclass_1, rclass_2;
10875   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10876
10877   if (load)
10878     {
10879       mem_1 = operands[1];
10880       mem_2 = operands[3];
10881       reg_1 = operands[0];
10882       reg_2 = operands[2];
10883       gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10884       if (REGNO (reg_1) == REGNO (reg_2))
10885         return false;
10886     }
10887   else
10888     {
10889       mem_1 = operands[0];
10890       mem_2 = operands[2];
10891       reg_1 = operands[1];
10892       reg_2 = operands[3];
10893     }
10894
10895   /* The mems cannot be volatile.  */
10896   if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10897     return false;
10898
10899   /* Check if the addresses are in the form of [base+offset].  */
10900   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10901   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10902     return false;
10903   extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10904   if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10905     return false;
10906
10907   /* Check if the bases are same.  */
10908   if (!rtx_equal_p (base_1, base_2))
10909     return false;
10910
10911   offval_1 = INTVAL (offset_1);
10912   offval_2 = INTVAL (offset_2);
10913   msize = GET_MODE_SIZE (mode);
10914   /* Check if the offsets are consecutive.  */
10915   if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10916     return false;
10917
10918   /* Check if the addresses are clobbered by load.  */
10919   if (load)
10920     {
10921       if (reg_mentioned_p (reg_1, mem_1))
10922         return false;
10923
10924       /* In increasing order, the last load can clobber the address.  */
10925       if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10926       return false;
10927     }
10928
10929   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10930     rclass_1 = FP_REGS;
10931   else
10932     rclass_1 = GENERAL_REGS;
10933
10934   if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10935     rclass_2 = FP_REGS;
10936   else
10937     rclass_2 = GENERAL_REGS;
10938
10939   /* Check if the registers are of same class.  */
10940   if (rclass_1 != rclass_2)
10941     return false;
10942
10943   return true;
10944 }
10945
10946 /* Given OPERANDS of consecutive load/store, check if we can merge
10947    them into ldp/stp by adjusting the offset.  LOAD is true if they
10948    are load instructions.  MODE is the mode of memory operands.
10949
10950    Given below consecutive stores:
10951
10952      str  w1, [xb, 0x100]
10953      str  w1, [xb, 0x104]
10954      str  w1, [xb, 0x108]
10955      str  w1, [xb, 0x10c]
10956
10957    Though the offsets are out of the range supported by stp, we can
10958    still pair them after adjusting the offset, like:
10959
10960      add  scratch, xb, 0x100
10961      stp  w1, w1, [scratch]
10962      stp  w1, w1, [scratch, 0x8]
10963
10964    The peephole patterns detecting this opportunity should guarantee
10965    the scratch register is avaliable.  */
10966
10967 bool
10968 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10969                                        enum machine_mode mode)
10970 {
10971   enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10972   HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10973   rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10974   rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10975
10976   if (load)
10977     {
10978       reg_1 = operands[0];
10979       mem_1 = operands[1];
10980       reg_2 = operands[2];
10981       mem_2 = operands[3];
10982       reg_3 = operands[4];
10983       mem_3 = operands[5];
10984       reg_4 = operands[6];
10985       mem_4 = operands[7];
10986       gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10987                   && REG_P (reg_3) && REG_P (reg_4));
10988       if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10989         return false;
10990     }
10991   else
10992     {
10993       mem_1 = operands[0];
10994       reg_1 = operands[1];
10995       mem_2 = operands[2];
10996       reg_2 = operands[3];
10997       mem_3 = operands[4];
10998       reg_3 = operands[5];
10999       mem_4 = operands[6];
11000       reg_4 = operands[7];
11001     }
11002   /* Skip if memory operand is by itslef valid for ldp/stp.  */
11003   if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11004     return false;
11005
11006   /* The mems cannot be volatile.  */
11007   if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11008       || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11009     return false;
11010
11011   /* Check if the addresses are in the form of [base+offset].  */
11012   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11013   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11014     return false;
11015   extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11016   if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11017     return false;
11018   extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11019   if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11020     return false;
11021   extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11022   if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11023     return false;
11024
11025   /* Check if the bases are same.  */
11026   if (!rtx_equal_p (base_1, base_2)
11027       || !rtx_equal_p (base_2, base_3)
11028       || !rtx_equal_p (base_3, base_4))
11029     return false;
11030
11031   offval_1 = INTVAL (offset_1);
11032   offval_2 = INTVAL (offset_2);
11033   offval_3 = INTVAL (offset_3);
11034   offval_4 = INTVAL (offset_4);
11035   msize = GET_MODE_SIZE (mode);
11036   /* Check if the offsets are consecutive.  */
11037   if ((offval_1 != (offval_2 + msize)
11038        || offval_1 != (offval_3 + msize * 2)
11039        || offval_1 != (offval_4 + msize * 3))
11040       && (offval_4 != (offval_3 + msize)
11041           || offval_4 != (offval_2 + msize * 2)
11042           || offval_4 != (offval_1 + msize * 3)))
11043     return false;
11044
11045   /* Check if the addresses are clobbered by load.  */
11046   if (load)
11047     {
11048       if (reg_mentioned_p (reg_1, mem_1)
11049           || reg_mentioned_p (reg_2, mem_2)
11050           || reg_mentioned_p (reg_3, mem_3))
11051         return false;
11052
11053       /* In increasing order, the last load can clobber the address.  */
11054       if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11055         return false;
11056     }
11057
11058   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11059     rclass_1 = FP_REGS;
11060   else
11061     rclass_1 = GENERAL_REGS;
11062
11063   if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11064     rclass_2 = FP_REGS;
11065   else
11066     rclass_2 = GENERAL_REGS;
11067
11068   if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11069     rclass_3 = FP_REGS;
11070   else
11071     rclass_3 = GENERAL_REGS;
11072
11073   if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11074     rclass_4 = FP_REGS;
11075   else
11076     rclass_4 = GENERAL_REGS;
11077
11078   /* Check if the registers are of same class.  */
11079   if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11080     return false;
11081
11082   return true;
11083 }
11084
11085 /* Given OPERANDS of consecutive load/store, this function pairs them
11086    into ldp/stp after adjusting the offset.  It depends on the fact
11087    that addresses of load/store instructions are in increasing order.
11088    MODE is the mode of memory operands.  CODE is the rtl operator
11089    which should be applied to all memory operands, it's SIGN_EXTEND,
11090    ZERO_EXTEND or UNKNOWN.  */
11091
11092 bool
11093 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11094                              enum machine_mode mode, RTX_CODE code)
11095 {
11096   rtx base, offset, t1, t2;
11097   rtx mem_1, mem_2, mem_3, mem_4;
11098   HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11099
11100   if (load)
11101     {
11102       mem_1 = operands[1];
11103       mem_2 = operands[3];
11104       mem_3 = operands[5];
11105       mem_4 = operands[7];
11106     }
11107   else
11108     {
11109       mem_1 = operands[0];
11110       mem_2 = operands[2];
11111       mem_3 = operands[4];
11112       mem_4 = operands[6];
11113       gcc_assert (code == UNKNOWN);
11114     }
11115
11116   extract_base_offset_in_addr (mem_1, &base, &offset);
11117   gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11118
11119   /* Adjust offset thus it can fit in ldp/stp instruction.  */
11120   msize = GET_MODE_SIZE (mode);
11121   stp_off_limit = msize * 0x40;
11122   off_val = INTVAL (offset);
11123   abs_off = (off_val < 0) ? -off_val : off_val;
11124   new_off = abs_off % stp_off_limit;
11125   adj_off = abs_off - new_off;
11126
11127   /* Further adjust to make sure all offsets are OK.  */
11128   if ((new_off + msize * 2) >= stp_off_limit)
11129     {
11130       adj_off += stp_off_limit;
11131       new_off -= stp_off_limit;
11132     }
11133
11134   /* Make sure the adjustment can be done with ADD/SUB instructions.  */
11135   if (adj_off >= 0x1000)
11136     return false;
11137
11138   if (off_val < 0)
11139     {
11140       adj_off = -adj_off;
11141       new_off = -new_off;
11142     }
11143
11144   /* Create new memory references.  */
11145   mem_1 = change_address (mem_1, VOIDmode,
11146                           plus_constant (DImode, operands[8], new_off));
11147
11148   /* Check if the adjusted address is OK for ldp/stp.  */
11149   if (!aarch64_mem_pair_operand (mem_1, mode))
11150     return false;
11151
11152   msize = GET_MODE_SIZE (mode);
11153   mem_2 = change_address (mem_2, VOIDmode,
11154                           plus_constant (DImode,
11155                                          operands[8],
11156                                          new_off + msize));
11157   mem_3 = change_address (mem_3, VOIDmode,
11158                           plus_constant (DImode,
11159                                          operands[8],
11160                                          new_off + msize * 2));
11161   mem_4 = change_address (mem_4, VOIDmode,
11162                           plus_constant (DImode,
11163                                          operands[8],
11164                                          new_off + msize * 3));
11165
11166   if (code == ZERO_EXTEND)
11167     {
11168       mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11169       mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11170       mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11171       mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11172     }
11173   else if (code == SIGN_EXTEND)
11174     {
11175       mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11176       mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11177       mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11178       mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11179     }
11180
11181   if (load)
11182     {
11183       operands[1] = mem_1;
11184       operands[3] = mem_2;
11185       operands[5] = mem_3;
11186       operands[7] = mem_4;
11187     }
11188   else
11189     {
11190       operands[0] = mem_1;
11191       operands[2] = mem_2;
11192       operands[4] = mem_3;
11193       operands[6] = mem_4;
11194     }
11195
11196   /* Emit adjusting instruction.  */
11197   emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11198                           plus_constant (DImode, base, adj_off)));
11199   /* Emit ldp/stp instructions.  */
11200   t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11201   t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11202   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11203   t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11204   t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11205   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11206   return true;
11207 }
11208
11209 #undef TARGET_ADDRESS_COST
11210 #define TARGET_ADDRESS_COST aarch64_address_cost
11211
11212 /* This hook will determines whether unnamed bitfields affect the alignment
11213    of the containing structure.  The hook returns true if the structure
11214    should inherit the alignment requirements of an unnamed bitfield's
11215    type.  */
11216 #undef TARGET_ALIGN_ANON_BITFIELD
11217 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11218
11219 #undef TARGET_ASM_ALIGNED_DI_OP
11220 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11221
11222 #undef TARGET_ASM_ALIGNED_HI_OP
11223 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11224
11225 #undef TARGET_ASM_ALIGNED_SI_OP
11226 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11227
11228 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11229 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11230   hook_bool_const_tree_hwi_hwi_const_tree_true
11231
11232 #undef TARGET_ASM_FILE_START
11233 #define TARGET_ASM_FILE_START aarch64_start_file
11234
11235 #undef TARGET_ASM_OUTPUT_MI_THUNK
11236 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11237
11238 #undef TARGET_ASM_SELECT_RTX_SECTION
11239 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11240
11241 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11242 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11243
11244 #undef TARGET_BUILD_BUILTIN_VA_LIST
11245 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11246
11247 #undef TARGET_CALLEE_COPIES
11248 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11249
11250 #undef TARGET_CAN_ELIMINATE
11251 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11252
11253 #undef TARGET_CANNOT_FORCE_CONST_MEM
11254 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11255
11256 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11257 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11258
11259 /* Only the least significant bit is used for initialization guard
11260    variables.  */
11261 #undef TARGET_CXX_GUARD_MASK_BIT
11262 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11263
11264 #undef TARGET_C_MODE_FOR_SUFFIX
11265 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11266
11267 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11268 #undef  TARGET_DEFAULT_TARGET_FLAGS
11269 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11270 #endif
11271
11272 #undef TARGET_CLASS_MAX_NREGS
11273 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11274
11275 #undef TARGET_BUILTIN_DECL
11276 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11277
11278 #undef  TARGET_EXPAND_BUILTIN
11279 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11280
11281 #undef TARGET_EXPAND_BUILTIN_VA_START
11282 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11283
11284 #undef TARGET_FOLD_BUILTIN
11285 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11286
11287 #undef TARGET_FUNCTION_ARG
11288 #define TARGET_FUNCTION_ARG aarch64_function_arg
11289
11290 #undef TARGET_FUNCTION_ARG_ADVANCE
11291 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11292
11293 #undef TARGET_FUNCTION_ARG_BOUNDARY
11294 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11295
11296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11297 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11298
11299 #undef TARGET_FUNCTION_VALUE
11300 #define TARGET_FUNCTION_VALUE aarch64_function_value
11301
11302 #undef TARGET_FUNCTION_VALUE_REGNO_P
11303 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11304
11305 #undef TARGET_FRAME_POINTER_REQUIRED
11306 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11307
11308 #undef TARGET_GIMPLE_FOLD_BUILTIN
11309 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11310
11311 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11312 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11313
11314 #undef  TARGET_INIT_BUILTINS
11315 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
11316
11317 #undef TARGET_LEGITIMATE_ADDRESS_P
11318 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11319
11320 #undef TARGET_LEGITIMATE_CONSTANT_P
11321 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11322
11323 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11324 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11325
11326 #undef TARGET_LRA_P
11327 #define TARGET_LRA_P hook_bool_void_true
11328
11329 #undef TARGET_MANGLE_TYPE
11330 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11331
11332 #undef TARGET_MEMORY_MOVE_COST
11333 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11334
11335 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11336 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11337
11338 #undef TARGET_MUST_PASS_IN_STACK
11339 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11340
11341 /* This target hook should return true if accesses to volatile bitfields
11342    should use the narrowest mode possible.  It should return false if these
11343    accesses should use the bitfield container type.  */
11344 #undef TARGET_NARROW_VOLATILE_BITFIELD
11345 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11346
11347 #undef  TARGET_OPTION_OVERRIDE
11348 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11349
11350 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11351 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11352   aarch64_override_options_after_change
11353
11354 #undef TARGET_PASS_BY_REFERENCE
11355 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11356
11357 #undef TARGET_PREFERRED_RELOAD_CLASS
11358 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11359
11360 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11361 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11362
11363 #undef TARGET_SECONDARY_RELOAD
11364 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11365
11366 #undef TARGET_SHIFT_TRUNCATION_MASK
11367 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11368
11369 #undef TARGET_SETUP_INCOMING_VARARGS
11370 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11371
11372 #undef TARGET_STRUCT_VALUE_RTX
11373 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
11374
11375 #undef TARGET_REGISTER_MOVE_COST
11376 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11377
11378 #undef TARGET_RETURN_IN_MEMORY
11379 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11380
11381 #undef TARGET_RETURN_IN_MSB
11382 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11383
11384 #undef TARGET_RTX_COSTS
11385 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11386
11387 #undef TARGET_SCHED_ISSUE_RATE
11388 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11389
11390 #undef TARGET_TRAMPOLINE_INIT
11391 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11392
11393 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11394 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11395
11396 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11397 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11398
11399 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11400 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11401
11402 #undef TARGET_VECTORIZE_ADD_STMT_COST
11403 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11404
11405 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11406 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11407   aarch64_builtin_vectorization_cost
11408
11409 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11410 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11411
11412 #undef TARGET_VECTORIZE_BUILTINS
11413 #define TARGET_VECTORIZE_BUILTINS
11414
11415 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11416 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11417   aarch64_builtin_vectorized_function
11418
11419 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11420 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11421   aarch64_autovectorize_vector_sizes
11422
11423 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11424 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11425   aarch64_atomic_assign_expand_fenv
11426
11427 /* Section anchor support.  */
11428
11429 #undef TARGET_MIN_ANCHOR_OFFSET
11430 #define TARGET_MIN_ANCHOR_OFFSET -256
11431
11432 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11433    byte offset; we can do much more for larger data types, but have no way
11434    to determine the size of the access.  We assume accesses are aligned.  */
11435 #undef TARGET_MAX_ANCHOR_OFFSET
11436 #define TARGET_MAX_ANCHOR_OFFSET 4095
11437
11438 #undef TARGET_VECTOR_ALIGNMENT
11439 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11440
11441 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11442 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11443   aarch64_simd_vector_alignment_reachable
11444
11445 /* vec_perm support.  */
11446
11447 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11448 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11449   aarch64_vectorize_vec_perm_const_ok
11450
11451
11452 #undef TARGET_FIXED_CONDITION_CODE_REGS
11453 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11454
11455 #undef TARGET_FLAGS_REGNUM
11456 #define TARGET_FLAGS_REGNUM CC_REGNUM
11457
11458 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11459 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11460
11461 #undef TARGET_ASAN_SHADOW_OFFSET
11462 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11463
11464 #undef TARGET_LEGITIMIZE_ADDRESS
11465 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11466
11467 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11468 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11469   aarch64_use_by_pieces_infrastructure_p
11470
11471 #undef TARGET_CAN_USE_DOLOOP_P
11472 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11473
11474 #undef TARGET_SCHED_MACRO_FUSION_P
11475 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11476
11477 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11478 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11479
11480 #undef TARGET_SCHED_FUSION_PRIORITY
11481 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11482
11483 struct gcc_target targetm = TARGET_INITIALIZER;
11484
11485 #include "gt-aarch64.h"