gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2022 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73 #include "tree-vectorizer.h"
  74 #include "opts.h"
  75
  76 /* This file should be included last.  */
  77 #include "target-def.h"
  78
  79 /* Forward definitions of types.  */
  80 typedef struct minipool_node    Mnode;
  81 typedef struct minipool_fixup   Mfix;
  82
  83 void (*arm_lang_output_object_attributes_hook)(void);
  84
  85 struct four_ints
  86 {
  87   int i[4];
  88 };
  89
  90 /* Forward function declarations.  */
  91 static bool arm_const_not_ok_for_debug_p (rtx);
  92 static int arm_needs_doubleword_align (machine_mode, const_tree);
  93 static int arm_compute_static_chain_stack_bytes (void);
  94 static arm_stack_offsets *arm_get_frame_offsets (void);
  95 static void arm_compute_frame_layout (void);
  96 static void arm_add_gc_roots (void);
  97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  98                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  99 static unsigned bit_count (unsigned long);
 100 static unsigned bitmap_popcount (const sbitmap);
 101 static int arm_address_register_rtx_p (rtx, int);
 102 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 103 static bool is_called_in_ARM_mode (tree);
 104 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 105 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 106 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 107 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 108 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 109 inline static int thumb1_index_register_rtx_p (rtx, int);
 110 static int thumb_far_jump_used_p (void);
 111 static bool thumb_force_lr_save (void);
 112 static unsigned arm_size_return_regs (void);
 113 static bool arm_assemble_integer (rtx, unsigned int, int);
 114 static void arm_print_operand (FILE *, rtx, int);
 115 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 116 static bool arm_print_operand_punct_valid_p (unsigned char code);
 117 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 118 static arm_cc get_arm_condition_code (rtx);
 119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 120 static const char *output_multi_immediate (rtx *, const char *, const char *,
 121                                            int, HOST_WIDE_INT);
 122 static const char *shift_op (rtx, HOST_WIDE_INT *);
 123 static struct machine_function *arm_init_machine_status (void);
 124 static void thumb_exit (FILE *, int);
 125 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 126 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 127 static Mnode *add_minipool_forward_ref (Mfix *);
 128 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_backward_ref (Mfix *);
 130 static void assign_minipool_offsets (Mfix *);
 131 static void arm_print_value (FILE *, rtx);
 132 static void dump_minipool (rtx_insn *);
 133 static int arm_barrier_cost (rtx_insn *);
 134 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 135 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 136 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 137                                machine_mode, rtx);
 138 static void arm_reorg (void);
 139 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 141 static unsigned long arm_compute_save_core_reg_mask (void);
 142 static unsigned long arm_isr_value (tree);
 143 static unsigned long arm_compute_func_type (void);
 144 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 145 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 148 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 149 #endif
 150 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 151 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 152 static void arm_output_function_epilogue (FILE *);
 153 static void arm_output_function_prologue (FILE *);
 154 static int arm_comp_type_attributes (const_tree, const_tree);
 155 static void arm_set_default_type_attributes (tree);
 156 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 157 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 158 static int optimal_immediate_sequence (enum rtx_code code,
 159                                        unsigned HOST_WIDE_INT val,
 160                                        struct four_ints *return_sequence);
 161 static int optimal_immediate_sequence_1 (enum rtx_code code,
 162                                          unsigned HOST_WIDE_INT val,
 163                                          struct four_ints *return_sequence,
 164                                          int i);
 165 static int arm_get_strip_length (int);
 166 static bool arm_function_ok_for_sibcall (tree, tree);
 167 static machine_mode arm_promote_function_mode (const_tree,
 168                                                     machine_mode, int *,
 169                                                     const_tree, int);
 170 static bool arm_return_in_memory (const_tree, const_tree);
 171 static rtx arm_function_value (const_tree, const_tree, bool);
 172 static rtx arm_libcall_value_1 (machine_mode);
 173 static rtx arm_libcall_value (machine_mode, const_rtx);
 174 static bool arm_function_value_regno_p (const unsigned int);
 175 static void arm_internal_label (FILE *, const char *, unsigned long);
 176 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 177                                  tree);
 178 static bool arm_have_conditional_execution (void);
 179 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 180 static bool arm_legitimate_constant_p (machine_mode, rtx);
 181 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 182 static int arm_insn_cost (rtx_insn *, bool);
 183 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 184 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 185 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 186 static void emit_constant_insn (rtx cond, rtx pattern);
 187 static rtx_insn *emit_set_insn (rtx, rtx);
 188 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static void arm_emit_multi_reg_pop (unsigned long);
 191 static int vfp_emit_fstmd (int, int);
 192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 193 static int arm_arg_partial_bytes (cumulative_args_t,
 194                                   const function_arg_info &);
 195 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 196 static void arm_function_arg_advance (cumulative_args_t,
 197                                       const function_arg_info &);
 198 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 201                                       const_tree);
 202 static rtx aapcs_libcall_value (machine_mode);
 203 static int aapcs_select_return_coproc (const_tree, const_tree);
 204
 205 #ifdef OBJECT_FORMAT_ELF
 206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 208 #endif
 209 #ifndef ARM_PE
 210 static void arm_encode_section_info (tree, rtx, int);
 211 #endif
 212
 213 static void arm_file_end (void);
 214 static void arm_file_start (void);
 215 static void arm_insert_attributes (tree, tree *);
 216
 217 static void arm_setup_incoming_varargs (cumulative_args_t,
 218                                         const function_arg_info &, int *, int);
 219 static bool arm_pass_by_reference (cumulative_args_t,
 220                                    const function_arg_info &);
 221 static bool arm_promote_prototypes (const_tree);
 222 static bool arm_default_short_enums (void);
 223 static bool arm_align_anon_bitfield (void);
 224 static bool arm_return_in_msb (const_tree);
 225 static bool arm_must_pass_in_stack (const function_arg_info &);
 226 static bool arm_return_in_memory (const_tree, const_tree);
 227 #if ARM_UNWIND_INFO
 228 static void arm_unwind_emit (FILE *, rtx_insn *);
 229 static bool arm_output_ttype (rtx);
 230 static void arm_asm_emit_except_personality (rtx);
 231 #endif
 232 static void arm_asm_init_sections (void);
 233 static rtx arm_dwarf_register_span (rtx);
 234
 235 static tree arm_cxx_guard_type (void);
 236 static bool arm_cxx_guard_mask_bit (void);
 237 static tree arm_get_cookie_size (tree);
 238 static bool arm_cookie_has_size (void);
 239 static bool arm_cxx_cdtor_returns_this (void);
 240 static bool arm_cxx_key_method_may_be_inline (void);
 241 static void arm_cxx_determine_class_data_visibility (tree);
 242 static bool arm_cxx_class_data_always_comdat (void);
 243 static bool arm_cxx_use_aeabi_atexit (void);
 244 static void arm_init_libfuncs (void);
 245 static tree arm_build_builtin_va_list (void);
 246 static void arm_expand_builtin_va_start (tree, rtx);
 247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 248 static void arm_option_override (void);
 249 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 250                                 struct cl_target_option *);
 251 static void arm_override_options_after_change (void);
 252 static void arm_option_print (FILE *, int, struct cl_target_option *);
 253 static void arm_set_current_function (tree);
 254 static bool arm_can_inline_p (tree, tree);
 255 static void arm_relayout_function (tree);
 256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 258 static bool arm_sched_can_speculate_insn (rtx_insn *);
 259 static bool arm_macro_fusion_p (void);
 260 static bool arm_cannot_copy_insn_p (rtx_insn *);
 261 static int arm_issue_rate (void);
 262 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 263 static int arm_first_cycle_multipass_dfa_lookahead (void);
 264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 266 static bool arm_output_addr_const_extra (FILE *, rtx);
 267 static bool arm_allocate_stack_slots_for_args (void);
 268 static bool arm_warn_func_return (tree);
 269 static tree arm_promoted_type (const_tree t);
 270 static bool arm_scalar_mode_supported_p (scalar_mode);
 271 static bool arm_frame_pointer_required (void);
 272 static bool arm_can_eliminate (const int, const int);
 273 static void arm_asm_trampoline_template (FILE *);
 274 static void arm_trampoline_init (rtx, tree, rtx);
 275 static rtx arm_trampoline_adjust_address (rtx);
 276 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 279 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool arm_array_mode_supported_p (machine_mode,
 281                                         unsigned HOST_WIDE_INT);
 282 static machine_mode arm_preferred_simd_mode (scalar_mode);
 283 static bool arm_class_likely_spilled_p (reg_class_t);
 284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 287                                                      const_tree type,
 288                                                      int misalignment,
 289                                                      bool is_packed);
 290 static void arm_conditional_register_usage (void);
 291 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 293 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 294 static int arm_default_branch_cost (bool, bool);
 295 static int arm_cortex_a5_branch_cost (bool, bool);
 296 static int arm_cortex_m_branch_cost (bool, bool);
 297 static int arm_cortex_m7_branch_cost (bool, bool);
 298
 299 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 300                                           rtx, const vec_perm_indices &);
 301
 302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 303
 304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 305                                            tree vectype,
 306                                            int misalign ATTRIBUTE_UNUSED);
 307
 308 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 309                                          bool op0_preserve_value);
 310 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 311
 312 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 313 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 314                                      const_tree);
 315 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 316 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 317 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 318                                                 int reloc);
 319 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 320 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 323 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 324 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 325 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 326                                        vec<machine_mode> &,
 327                                        vec<const char *> &, vec<rtx> &,
 328                                        HARD_REG_SET &, location_t);
 329 static const char *arm_identify_fpu_from_isa (sbitmap);
 330 \f
 331 /* Table of machine attributes.  */
 332 static const struct attribute_spec arm_attribute_table[] =
 333 {
 334   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 335        affects_type_identity, handler, exclude } */
 336   /* Function calls made to this symbol must be done indirectly, because
 337      it may lie outside of the 26 bit addressing range of a normal function
 338      call.  */
 339   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 340   /* Whereas these functions are always known to reside within the 26 bit
 341      addressing range.  */
 342   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 343   /* Specify the procedure call conventions for a function.  */
 344   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 345     NULL },
 346   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 347   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 350     NULL },
 351   { "naked",        0, 0, true,  false, false, false,
 352     arm_handle_fndecl_attribute, NULL },
 353 #ifdef ARM_PE
 354   /* ARM/PE has three new attributes:
 355      interfacearm - ?
 356      dllexport - for exporting a function/variable that will live in a dll
 357      dllimport - for importing a function/variable from a dll
 358
 359      Microsoft allows multiple declspecs in one __declspec, separating
 360      them with spaces.  We do NOT support this.  Instead, use __declspec
 361      multiple times.
 362   */
 363   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 364   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 365   { "interfacearm", 0, 0, true,  false, false, false,
 366     arm_handle_fndecl_attribute, NULL },
 367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 368   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 371     NULL },
 372   { "notshared",    0, 0, false, true, false, false,
 373     arm_handle_notshared_attribute, NULL },
 374 #endif
 375   /* ARMv8-M Security Extensions support.  */
 376   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 377     arm_handle_cmse_nonsecure_entry, NULL },
 378   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 379     arm_handle_cmse_nonsecure_call, NULL },
 380   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
 381   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 382 };
 383 \f
 384 /* Initialize the GCC target structure.  */
 385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 386 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 388 #endif
 389
 390 #undef TARGET_CHECK_BUILTIN_CALL
 391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 392
 393 #undef TARGET_LEGITIMIZE_ADDRESS
 394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 395
 396 #undef  TARGET_ATTRIBUTE_TABLE
 397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 398
 399 #undef  TARGET_INSERT_ATTRIBUTES
 400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 401
 402 #undef TARGET_ASM_FILE_START
 403 #define TARGET_ASM_FILE_START arm_file_start
 404 #undef TARGET_ASM_FILE_END
 405 #define TARGET_ASM_FILE_END arm_file_end
 406
 407 #undef  TARGET_ASM_ALIGNED_SI_OP
 408 #define TARGET_ASM_ALIGNED_SI_OP NULL
 409 #undef  TARGET_ASM_INTEGER
 410 #define TARGET_ASM_INTEGER arm_assemble_integer
 411
 412 #undef TARGET_PRINT_OPERAND
 413 #define TARGET_PRINT_OPERAND arm_print_operand
 414 #undef TARGET_PRINT_OPERAND_ADDRESS
 415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 418
 419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 421
 422 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 424
 425 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 427
 428 #undef TARGET_CAN_INLINE_P
 429 #define TARGET_CAN_INLINE_P arm_can_inline_p
 430
 431 #undef TARGET_RELAYOUT_FUNCTION
 432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 433
 434 #undef  TARGET_OPTION_OVERRIDE
 435 #define TARGET_OPTION_OVERRIDE arm_option_override
 436
 437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 439
 440 #undef TARGET_OPTION_RESTORE
 441 #define TARGET_OPTION_RESTORE arm_option_restore
 442
 443 #undef TARGET_OPTION_PRINT
 444 #define TARGET_OPTION_PRINT arm_option_print
 445
 446 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 448
 449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_P
 453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 454
 455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 457
 458 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 460
 461 #undef  TARGET_SCHED_ADJUST_COST
 462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 463
 464 #undef TARGET_SET_CURRENT_FUNCTION
 465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 466
 467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 469
 470 #undef TARGET_SCHED_REORDER
 471 #define TARGET_SCHED_REORDER arm_sched_reorder
 472
 473 #undef TARGET_REGISTER_MOVE_COST
 474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 475
 476 #undef TARGET_MEMORY_MOVE_COST
 477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 478
 479 #undef TARGET_ENCODE_SECTION_INFO
 480 #ifdef ARM_PE
 481 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 482 #else
 483 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 484 #endif
 485
 486 #undef  TARGET_STRIP_NAME_ENCODING
 487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 488
 489 #undef  TARGET_ASM_INTERNAL_LABEL
 490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 491
 492 #undef TARGET_FLOATN_MODE
 493 #define TARGET_FLOATN_MODE arm_floatn_mode
 494
 495 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 497
 498 #undef  TARGET_FUNCTION_VALUE
 499 #define TARGET_FUNCTION_VALUE arm_function_value
 500
 501 #undef  TARGET_LIBCALL_VALUE
 502 #define TARGET_LIBCALL_VALUE arm_libcall_value
 503
 504 #undef TARGET_FUNCTION_VALUE_REGNO_P
 505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 506
 507 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 509 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 511
 512 #undef  TARGET_RTX_COSTS
 513 #define TARGET_RTX_COSTS arm_rtx_costs
 514 #undef  TARGET_ADDRESS_COST
 515 #define TARGET_ADDRESS_COST arm_address_cost
 516 #undef TARGET_INSN_COST
 517 #define TARGET_INSN_COST arm_insn_cost
 518
 519 #undef TARGET_SHIFT_TRUNCATION_MASK
 520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 529   arm_autovectorize_vector_modes
 530
 531 #undef  TARGET_MACHINE_DEPENDENT_REORG
 532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 533
 534 #undef  TARGET_INIT_BUILTINS
 535 #define TARGET_INIT_BUILTINS  arm_init_builtins
 536 #undef  TARGET_EXPAND_BUILTIN
 537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 538 #undef  TARGET_BUILTIN_DECL
 539 #define TARGET_BUILTIN_DECL arm_builtin_decl
 540
 541 #undef TARGET_INIT_LIBFUNCS
 542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 543
 544 #undef TARGET_PROMOTE_FUNCTION_MODE
 545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 546 #undef TARGET_PROMOTE_PROTOTYPES
 547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 548 #undef TARGET_PASS_BY_REFERENCE
 549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 550 #undef TARGET_ARG_PARTIAL_BYTES
 551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 552 #undef TARGET_FUNCTION_ARG
 553 #define TARGET_FUNCTION_ARG arm_function_arg
 554 #undef TARGET_FUNCTION_ARG_ADVANCE
 555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 556 #undef TARGET_FUNCTION_ARG_PADDING
 557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 558 #undef TARGET_FUNCTION_ARG_BOUNDARY
 559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 560
 561 #undef  TARGET_SETUP_INCOMING_VARARGS
 562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 563
 564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 566
 567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 569 #undef TARGET_TRAMPOLINE_INIT
 570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 573
 574 #undef TARGET_WARN_FUNC_RETURN
 575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 576
 577 #undef TARGET_DEFAULT_SHORT_ENUMS
 578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 579
 580 #undef TARGET_ALIGN_ANON_BITFIELD
 581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 582
 583 #undef TARGET_NARROW_VOLATILE_BITFIELD
 584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 585
 586 #undef TARGET_CXX_GUARD_TYPE
 587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 588
 589 #undef TARGET_CXX_GUARD_MASK_BIT
 590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 591
 592 #undef TARGET_CXX_GET_COOKIE_SIZE
 593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 594
 595 #undef TARGET_CXX_COOKIE_HAS_SIZE
 596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 597
 598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 600
 601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 603
 604 #undef TARGET_CXX_USE_AEABI_ATEXIT
 605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 606
 607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 609   arm_cxx_determine_class_data_visibility
 610
 611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 613
 614 #undef TARGET_RETURN_IN_MSB
 615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 616
 617 #undef TARGET_RETURN_IN_MEMORY
 618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 619
 620 #undef TARGET_MUST_PASS_IN_STACK
 621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 622
 623 #if ARM_UNWIND_INFO
 624 #undef TARGET_ASM_UNWIND_EMIT
 625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 626
 627 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 628 #undef TARGET_ASM_TTYPE
 629 #define TARGET_ASM_TTYPE arm_output_ttype
 630
 631 #undef TARGET_ARM_EABI_UNWINDER
 632 #define TARGET_ARM_EABI_UNWINDER true
 633
 634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 636
 637 #endif /* ARM_UNWIND_INFO */
 638
 639 #undef TARGET_ASM_INIT_SECTIONS
 640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 641
 642 #undef TARGET_DWARF_REGISTER_SPAN
 643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 644
 645 #undef  TARGET_CANNOT_COPY_INSN_P
 646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 647
 648 #ifdef HAVE_AS_TLS
 649 #undef TARGET_HAVE_TLS
 650 #define TARGET_HAVE_TLS true
 651 #endif
 652
 653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 655
 656 #undef TARGET_LEGITIMATE_CONSTANT_P
 657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 658
 659 #undef TARGET_CANNOT_FORCE_CONST_MEM
 660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 661
 662 #undef TARGET_MAX_ANCHOR_OFFSET
 663 #define TARGET_MAX_ANCHOR_OFFSET 4095
 664
 665 /* The minimum is set such that the total size of the block
 666    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 667    divisible by eight, ensuring natural spacing of anchors.  */
 668 #undef TARGET_MIN_ANCHOR_OFFSET
 669 #define TARGET_MIN_ANCHOR_OFFSET -4088
 670
 671 #undef TARGET_SCHED_ISSUE_RATE
 672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 673
 674 #undef TARGET_SCHED_VARIABLE_ISSUE
 675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 676
 677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 679   arm_first_cycle_multipass_dfa_lookahead
 680
 681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 683   arm_first_cycle_multipass_dfa_lookahead_guard
 684
 685 #undef TARGET_MANGLE_TYPE
 686 #define TARGET_MANGLE_TYPE arm_mangle_type
 687
 688 #undef TARGET_INVALID_CONVERSION
 689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 690
 691 #undef TARGET_INVALID_UNARY_OP
 692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 693
 694 #undef TARGET_INVALID_BINARY_OP
 695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 696
 697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 699
 700 #undef TARGET_BUILD_BUILTIN_VA_LIST
 701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 702 #undef TARGET_EXPAND_BUILTIN_VA_START
 703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 706
 707 #ifdef HAVE_AS_TLS
 708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 710 #endif
 711
 712 #undef TARGET_LEGITIMATE_ADDRESS_P
 713 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 714
 715 #undef TARGET_PREFERRED_RELOAD_CLASS
 716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 717
 718 #undef TARGET_PROMOTED_TYPE
 719 #define TARGET_PROMOTED_TYPE arm_promoted_type
 720
 721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 723
 724 #undef TARGET_COMPUTE_FRAME_LAYOUT
 725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 726
 727 #undef TARGET_FRAME_POINTER_REQUIRED
 728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 729
 730 #undef TARGET_CAN_ELIMINATE
 731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 732
 733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 735
 736 #undef TARGET_CLASS_LIKELY_SPILLED_P
 737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 738
 739 #undef TARGET_VECTORIZE_BUILTINS
 740 #define TARGET_VECTORIZE_BUILTINS
 741
 742 #undef TARGET_VECTOR_ALIGNMENT
 743 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 744
 745 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 746 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 747   arm_vector_alignment_reachable
 748
 749 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 750 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 751   arm_builtin_support_vector_misalignment
 752
 753 #undef TARGET_PREFERRED_RENAME_CLASS
 754 #define TARGET_PREFERRED_RENAME_CLASS \
 755   arm_preferred_rename_class
 756
 757 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 758 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 759
 760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 762   arm_builtin_vectorization_cost
 763
 764 #undef TARGET_CANONICALIZE_COMPARISON
 765 #define TARGET_CANONICALIZE_COMPARISON \
 766   arm_canonicalize_comparison
 767
 768 #undef TARGET_ASAN_SHADOW_OFFSET
 769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 770
 771 #undef MAX_INSN_PER_IT_BLOCK
 772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 773
 774 #undef TARGET_CAN_USE_DOLOOP_P
 775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 776
 777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 779
 780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 782
 783 #undef TARGET_SCHED_FUSION_PRIORITY
 784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 785
 786 #undef  TARGET_ASM_FUNCTION_SECTION
 787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 788
 789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 791
 792 #undef TARGET_SECTION_TYPE_FLAGS
 793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 794
 795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 797
 798 #undef TARGET_C_EXCESS_PRECISION
 799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 800
 801 /* Although the architecture reserves bits 0 and 1, only the former is
 802    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 805
 806 #undef TARGET_FIXED_CONDITION_CODE_REGS
 807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 808
 809 #undef TARGET_HARD_REGNO_NREGS
 810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 811 #undef TARGET_HARD_REGNO_MODE_OK
 812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 813
 814 #undef TARGET_MODES_TIEABLE_P
 815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 816
 817 #undef TARGET_CAN_CHANGE_MODE_CLASS
 818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 819
 820 #undef TARGET_CONSTANT_ALIGNMENT
 821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 822
 823 #undef TARGET_INVALID_WITHIN_DOLOOP
 824 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 825
 826 #undef TARGET_MD_ASM_ADJUST
 827 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 828
 829 #undef TARGET_STACK_PROTECT_GUARD
 830 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 831
 832 #undef TARGET_VECTORIZE_GET_MASK_MODE
 833 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 834 \f
 835 /* Obstack for minipool constant handling.  */
 836 static struct obstack minipool_obstack;
 837 static char *         minipool_startobj;
 838
 839 /* The maximum number of insns skipped which
 840    will be conditionalised if possible.  */
 841 static int max_insns_skipped = 5;
 842
 843 /* True if we are currently building a constant table.  */
 844 int making_const_table;
 845
 846 /* The processor for which instructions should be scheduled.  */
 847 enum processor_type arm_tune = TARGET_CPU_arm_none;
 848
 849 /* The current tuning set.  */
 850 const struct tune_params *current_tune;
 851
 852 /* Which floating point hardware to schedule for.  */
 853 int arm_fpu_attr;
 854
 855 /* Used for Thumb call_via trampolines.  */
 856 rtx thumb_call_via_label[14];
 857 static int thumb_call_reg_needed;
 858
 859 /* The bits in this mask specify which instruction scheduling options should
 860    be used.  */
 861 unsigned int tune_flags = 0;
 862
 863 /* The highest ARM architecture version supported by the
 864    target.  */
 865 enum base_architecture arm_base_arch = BASE_ARCH_0;
 866
 867 /* Active target architecture and tuning.  */
 868
 869 struct arm_build_target arm_active_target;
 870
 871 /* The following are used in the arm.md file as equivalents to bits
 872    in the above two flag variables.  */
 873
 874 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 875 int arm_arch4 = 0;
 876
 877 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 878 int arm_arch4t = 0;
 879
 880 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 881 int arm_arch5t = 0;
 882
 883 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 884 int arm_arch5te = 0;
 885
 886 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 887 int arm_arch6 = 0;
 888
 889 /* Nonzero if this chip supports the ARM 6K extensions.  */
 890 int arm_arch6k = 0;
 891
 892 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 893 int arm_arch6kz = 0;
 894
 895 /* Nonzero if instructions present in ARMv6-M can be used.  */
 896 int arm_arch6m = 0;
 897
 898 /* Nonzero if this chip supports the ARM 7 extensions.  */
 899 int arm_arch7 = 0;
 900
 901 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 902 int arm_arch_lpae = 0;
 903
 904 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 905 int arm_arch_notm = 0;
 906
 907 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 908 int arm_arch7em = 0;
 909
 910 /* Nonzero if instructions present in ARMv8 can be used.  */
 911 int arm_arch8 = 0;
 912
 913 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 914 int arm_arch8_1 = 0;
 915
 916 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 917 int arm_arch8_2 = 0;
 918
 919 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 920 int arm_arch8_3 = 0;
 921
 922 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 923 int arm_arch8_4 = 0;
 924 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 925    extensions.  */
 926 int arm_arch8_1m_main = 0;
 927
 928 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 929    Architecture 8.2.  */
 930 int arm_fp16_inst = 0;
 931
 932 /* Nonzero if this chip can benefit from load scheduling.  */
 933 int arm_ld_sched = 0;
 934
 935 /* Nonzero if this chip is a StrongARM.  */
 936 int arm_tune_strongarm = 0;
 937
 938 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 939 int arm_arch_iwmmxt = 0;
 940
 941 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 942 int arm_arch_iwmmxt2 = 0;
 943
 944 /* Nonzero if this chip is an XScale.  */
 945 int arm_arch_xscale = 0;
 946
 947 /* Nonzero if tuning for XScale  */
 948 int arm_tune_xscale = 0;
 949
 950 /* Nonzero if we want to tune for stores that access the write-buffer.
 951    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 952 int arm_tune_wbuf = 0;
 953
 954 /* Nonzero if tuning for Cortex-A9.  */
 955 int arm_tune_cortex_a9 = 0;
 956
 957 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 958    preprocessor.
 959    XXX This is a bit of a hack, it's intended to help work around
 960    problems in GLD which doesn't understand that armv5t code is
 961    interworking clean.  */
 962 int arm_cpp_interwork = 0;
 963
 964 /* Nonzero if chip supports Thumb 1.  */
 965 int arm_arch_thumb1;
 966
 967 /* Nonzero if chip supports Thumb 2.  */
 968 int arm_arch_thumb2;
 969
 970 /* Nonzero if chip supports integer division instruction.  */
 971 int arm_arch_arm_hwdiv;
 972 int arm_arch_thumb_hwdiv;
 973
 974 /* Nonzero if chip disallows volatile memory access in IT block.  */
 975 int arm_arch_no_volatile_ce;
 976
 977 /* Nonzero if we shouldn't use literal pools.  */
 978 bool arm_disable_literal_pool = false;
 979
 980 /* The register number to be used for the PIC offset register.  */
 981 unsigned arm_pic_register = INVALID_REGNUM;
 982
 983 enum arm_pcs arm_pcs_default;
 984
 985 /* For an explanation of these variables, see final_prescan_insn below.  */
 986 int arm_ccfsm_state;
 987 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 988 enum arm_cond_code arm_current_cc;
 989
 990 rtx arm_target_insn;
 991 int arm_target_label;
 992 /* The number of conditionally executed insns, including the current insn.  */
 993 int arm_condexec_count = 0;
 994 /* A bitmask specifying the patterns for the IT block.
 995    Zero means do not output an IT block before this insn. */
 996 int arm_condexec_mask = 0;
 997 /* The number of bits used in arm_condexec_mask.  */
 998 int arm_condexec_masklen = 0;
 999
1000 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1001 int arm_arch_crc = 0;
1002
1003 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1004 int arm_arch_dotprod = 0;
1005
1006 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1007 int arm_arch_cmse = 0;
1008
1009 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1010 int arm_m_profile_small_mul = 0;
1011
1012 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1013 int arm_arch_i8mm = 0;
1014
1015 /* Nonzero if chip supports the BFloat16 instructions.  */
1016 int arm_arch_bf16 = 0;
1017
1018 /* Nonzero if chip supports the Custom Datapath Extension.  */
1019 int arm_arch_cde = 0;
1020 int arm_arch_cde_coproc = 0;
1021 const int arm_arch_cde_coproc_bits[] = {
1022   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1023 };
1024
1025 /* The condition codes of the ARM, and the inverse function.  */
1026 static const char * const arm_condition_codes[] =
1027 {
1028   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1029   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1030 };
1031
1032 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1033 int arm_regs_in_sequence[] =
1034 {
1035   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1036 };
1037
1038 #define DEF_FP_SYSREG(reg) #reg,
1039 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1040   FP_SYSREGS
1041 };
1042 #undef DEF_FP_SYSREG
1043
1044 #define ARM_LSL_NAME "lsl"
1045 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1046
1047 #define THUMB2_WORK_REGS                                        \
1048   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1049             | (1 << SP_REGNUM)                                  \
1050             | (1 << PC_REGNUM)                                  \
1051             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1052                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1053                : 0)))
1054 \f
1055 /* Initialization code.  */
1056
1057 struct cpu_tune
1058 {
1059   enum processor_type scheduler;
1060   unsigned int tune_flags;
1061   const struct tune_params *tune;
1062 };
1063
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1066   {                                                             \
1067     num_slots,                                                  \
1068     l1_size,                                                    \
1069     l1_line_size                                                \
1070   }
1071
1072 /* arm generic vectorizer costs.  */
1073 static const
1074 struct cpu_vec_costs arm_default_vec_cost = {
1075   1,                                    /* scalar_stmt_cost.  */
1076   1,                                    /* scalar load_cost.  */
1077   1,                                    /* scalar_store_cost.  */
1078   1,                                    /* vec_stmt_cost.  */
1079   1,                                    /* vec_to_scalar_cost.  */
1080   1,                                    /* scalar_to_vec_cost.  */
1081   1,                                    /* vec_align_load_cost.  */
1082   1,                                    /* vec_unalign_load_cost.  */
1083   1,                                    /* vec_unalign_store_cost.  */
1084   1,                                    /* vec_store_cost.  */
1085   3,                                    /* cond_taken_branch_cost.  */
1086   1,                                    /* cond_not_taken_branch_cost.  */
1087 };
1088
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1090 #include "aarch-cost-tables.h"
1091
1092
1093
1094 const struct cpu_cost_table cortexa9_extra_costs =
1095 {
1096   /* ALU */
1097   {
1098     0,                  /* arith.  */
1099     0,                  /* logical.  */
1100     0,                  /* shift.  */
1101     COSTS_N_INSNS (1),  /* shift_reg.  */
1102     COSTS_N_INSNS (1),  /* arith_shift.  */
1103     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1104     0,                  /* log_shift.  */
1105     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1106     COSTS_N_INSNS (1),  /* extend.  */
1107     COSTS_N_INSNS (2),  /* extend_arith.  */
1108     COSTS_N_INSNS (1),  /* bfi.  */
1109     COSTS_N_INSNS (1),  /* bfx.  */
1110     0,                  /* clz.  */
1111     0,                  /* rev.  */
1112     0,                  /* non_exec.  */
1113     true                /* non_exec_costs_exec.  */
1114   },
1115   {
1116     /* MULT SImode */
1117     {
1118       COSTS_N_INSNS (3),        /* simple.  */
1119       COSTS_N_INSNS (3),        /* flag_setting.  */
1120       COSTS_N_INSNS (2),        /* extend.  */
1121       COSTS_N_INSNS (3),        /* add.  */
1122       COSTS_N_INSNS (2),        /* extend_add.  */
1123       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1124     },
1125     /* MULT DImode */
1126     {
1127       0,                        /* simple (N/A).  */
1128       0,                        /* flag_setting (N/A).  */
1129       COSTS_N_INSNS (4),        /* extend.  */
1130       0,                        /* add (N/A).  */
1131       COSTS_N_INSNS (4),        /* extend_add.  */
1132       0                         /* idiv (N/A).  */
1133     }
1134   },
1135   /* LD/ST */
1136   {
1137     COSTS_N_INSNS (2),  /* load.  */
1138     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1139     COSTS_N_INSNS (2),  /* ldrd.  */
1140     COSTS_N_INSNS (2),  /* ldm_1st.  */
1141     1,                  /* ldm_regs_per_insn_1st.  */
1142     2,                  /* ldm_regs_per_insn_subsequent.  */
1143     COSTS_N_INSNS (5),  /* loadf.  */
1144     COSTS_N_INSNS (5),  /* loadd.  */
1145     COSTS_N_INSNS (1),  /* load_unaligned.  */
1146     COSTS_N_INSNS (2),  /* store.  */
1147     COSTS_N_INSNS (2),  /* strd.  */
1148     COSTS_N_INSNS (2),  /* stm_1st.  */
1149     1,                  /* stm_regs_per_insn_1st.  */
1150     2,                  /* stm_regs_per_insn_subsequent.  */
1151     COSTS_N_INSNS (1),  /* storef.  */
1152     COSTS_N_INSNS (1),  /* stored.  */
1153     COSTS_N_INSNS (1),  /* store_unaligned.  */
1154     COSTS_N_INSNS (1),  /* loadv.  */
1155     COSTS_N_INSNS (1)   /* storev.  */
1156   },
1157   {
1158     /* FP SFmode */
1159     {
1160       COSTS_N_INSNS (14),       /* div.  */
1161       COSTS_N_INSNS (4),        /* mult.  */
1162       COSTS_N_INSNS (7),        /* mult_addsub. */
1163       COSTS_N_INSNS (30),       /* fma.  */
1164       COSTS_N_INSNS (3),        /* addsub.  */
1165       COSTS_N_INSNS (1),        /* fpconst.  */
1166       COSTS_N_INSNS (1),        /* neg.  */
1167       COSTS_N_INSNS (3),        /* compare.  */
1168       COSTS_N_INSNS (3),        /* widen.  */
1169       COSTS_N_INSNS (3),        /* narrow.  */
1170       COSTS_N_INSNS (3),        /* toint.  */
1171       COSTS_N_INSNS (3),        /* fromint.  */
1172       COSTS_N_INSNS (3)         /* roundint.  */
1173     },
1174     /* FP DFmode */
1175     {
1176       COSTS_N_INSNS (24),       /* div.  */
1177       COSTS_N_INSNS (5),        /* mult.  */
1178       COSTS_N_INSNS (8),        /* mult_addsub.  */
1179       COSTS_N_INSNS (30),       /* fma.  */
1180       COSTS_N_INSNS (3),        /* addsub.  */
1181       COSTS_N_INSNS (1),        /* fpconst.  */
1182       COSTS_N_INSNS (1),        /* neg.  */
1183       COSTS_N_INSNS (3),        /* compare.  */
1184       COSTS_N_INSNS (3),        /* widen.  */
1185       COSTS_N_INSNS (3),        /* narrow.  */
1186       COSTS_N_INSNS (3),        /* toint.  */
1187       COSTS_N_INSNS (3),        /* fromint.  */
1188       COSTS_N_INSNS (3)         /* roundint.  */
1189     }
1190   },
1191   /* Vector */
1192   {
1193     COSTS_N_INSNS (1),  /* alu.  */
1194     COSTS_N_INSNS (4),  /* mult.  */
1195     COSTS_N_INSNS (1),  /* movi.  */
1196     COSTS_N_INSNS (2),  /* dup.  */
1197     COSTS_N_INSNS (2)   /* extract.  */
1198   }
1199 };
1200
1201 const struct cpu_cost_table cortexa8_extra_costs =
1202 {
1203   /* ALU */
1204   {
1205     0,                  /* arith.  */
1206     0,                  /* logical.  */
1207     COSTS_N_INSNS (1),  /* shift.  */
1208     0,                  /* shift_reg.  */
1209     COSTS_N_INSNS (1),  /* arith_shift.  */
1210     0,                  /* arith_shift_reg.  */
1211     COSTS_N_INSNS (1),  /* log_shift.  */
1212     0,                  /* log_shift_reg.  */
1213     0,                  /* extend.  */
1214     0,                  /* extend_arith.  */
1215     0,                  /* bfi.  */
1216     0,                  /* bfx.  */
1217     0,                  /* clz.  */
1218     0,                  /* rev.  */
1219     0,                  /* non_exec.  */
1220     true                /* non_exec_costs_exec.  */
1221   },
1222   {
1223     /* MULT SImode */
1224     {
1225       COSTS_N_INSNS (1),        /* simple.  */
1226       COSTS_N_INSNS (1),        /* flag_setting.  */
1227       COSTS_N_INSNS (1),        /* extend.  */
1228       COSTS_N_INSNS (1),        /* add.  */
1229       COSTS_N_INSNS (1),        /* extend_add.  */
1230       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1231     },
1232     /* MULT DImode */
1233     {
1234       0,                        /* simple (N/A).  */
1235       0,                        /* flag_setting (N/A).  */
1236       COSTS_N_INSNS (2),        /* extend.  */
1237       0,                        /* add (N/A).  */
1238       COSTS_N_INSNS (2),        /* extend_add.  */
1239       0                         /* idiv (N/A).  */
1240     }
1241   },
1242   /* LD/ST */
1243   {
1244     COSTS_N_INSNS (1),  /* load.  */
1245     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1246     COSTS_N_INSNS (1),  /* ldrd.  */
1247     COSTS_N_INSNS (1),  /* ldm_1st.  */
1248     1,                  /* ldm_regs_per_insn_1st.  */
1249     2,                  /* ldm_regs_per_insn_subsequent.  */
1250     COSTS_N_INSNS (1),  /* loadf.  */
1251     COSTS_N_INSNS (1),  /* loadd.  */
1252     COSTS_N_INSNS (1),  /* load_unaligned.  */
1253     COSTS_N_INSNS (1),  /* store.  */
1254     COSTS_N_INSNS (1),  /* strd.  */
1255     COSTS_N_INSNS (1),  /* stm_1st.  */
1256     1,                  /* stm_regs_per_insn_1st.  */
1257     2,                  /* stm_regs_per_insn_subsequent.  */
1258     COSTS_N_INSNS (1),  /* storef.  */
1259     COSTS_N_INSNS (1),  /* stored.  */
1260     COSTS_N_INSNS (1),  /* store_unaligned.  */
1261     COSTS_N_INSNS (1),  /* loadv.  */
1262     COSTS_N_INSNS (1)   /* storev.  */
1263   },
1264   {
1265     /* FP SFmode */
1266     {
1267       COSTS_N_INSNS (36),       /* div.  */
1268       COSTS_N_INSNS (11),       /* mult.  */
1269       COSTS_N_INSNS (20),       /* mult_addsub. */
1270       COSTS_N_INSNS (30),       /* fma.  */
1271       COSTS_N_INSNS (9),        /* addsub.  */
1272       COSTS_N_INSNS (3),        /* fpconst.  */
1273       COSTS_N_INSNS (3),        /* neg.  */
1274       COSTS_N_INSNS (6),        /* compare.  */
1275       COSTS_N_INSNS (4),        /* widen.  */
1276       COSTS_N_INSNS (4),        /* narrow.  */
1277       COSTS_N_INSNS (8),        /* toint.  */
1278       COSTS_N_INSNS (8),        /* fromint.  */
1279       COSTS_N_INSNS (8)         /* roundint.  */
1280     },
1281     /* FP DFmode */
1282     {
1283       COSTS_N_INSNS (64),       /* div.  */
1284       COSTS_N_INSNS (16),       /* mult.  */
1285       COSTS_N_INSNS (25),       /* mult_addsub.  */
1286       COSTS_N_INSNS (30),       /* fma.  */
1287       COSTS_N_INSNS (9),        /* addsub.  */
1288       COSTS_N_INSNS (3),        /* fpconst.  */
1289       COSTS_N_INSNS (3),        /* neg.  */
1290       COSTS_N_INSNS (6),        /* compare.  */
1291       COSTS_N_INSNS (6),        /* widen.  */
1292       COSTS_N_INSNS (6),        /* narrow.  */
1293       COSTS_N_INSNS (8),        /* toint.  */
1294       COSTS_N_INSNS (8),        /* fromint.  */
1295       COSTS_N_INSNS (8)         /* roundint.  */
1296     }
1297   },
1298   /* Vector */
1299   {
1300     COSTS_N_INSNS (1),  /* alu.  */
1301     COSTS_N_INSNS (4),  /* mult.  */
1302     COSTS_N_INSNS (1),  /* movi.  */
1303     COSTS_N_INSNS (2),  /* dup.  */
1304     COSTS_N_INSNS (2)   /* extract.  */
1305   }
1306 };
1307
1308 const struct cpu_cost_table cortexa5_extra_costs =
1309 {
1310   /* ALU */
1311   {
1312     0,                  /* arith.  */
1313     0,                  /* logical.  */
1314     COSTS_N_INSNS (1),  /* shift.  */
1315     COSTS_N_INSNS (1),  /* shift_reg.  */
1316     COSTS_N_INSNS (1),  /* arith_shift.  */
1317     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1318     COSTS_N_INSNS (1),  /* log_shift.  */
1319     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1320     COSTS_N_INSNS (1),  /* extend.  */
1321     COSTS_N_INSNS (1),  /* extend_arith.  */
1322     COSTS_N_INSNS (1),  /* bfi.  */
1323     COSTS_N_INSNS (1),  /* bfx.  */
1324     COSTS_N_INSNS (1),  /* clz.  */
1325     COSTS_N_INSNS (1),  /* rev.  */
1326     0,                  /* non_exec.  */
1327     true                /* non_exec_costs_exec.  */
1328   },
1329
1330   {
1331     /* MULT SImode */
1332     {
1333       0,                        /* simple.  */
1334       COSTS_N_INSNS (1),        /* flag_setting.  */
1335       COSTS_N_INSNS (1),        /* extend.  */
1336       COSTS_N_INSNS (1),        /* add.  */
1337       COSTS_N_INSNS (1),        /* extend_add.  */
1338       COSTS_N_INSNS (7)         /* idiv.  */
1339     },
1340     /* MULT DImode */
1341     {
1342       0,                        /* simple (N/A).  */
1343       0,                        /* flag_setting (N/A).  */
1344       COSTS_N_INSNS (1),        /* extend.  */
1345       0,                        /* add.  */
1346       COSTS_N_INSNS (2),        /* extend_add.  */
1347       0                         /* idiv (N/A).  */
1348     }
1349   },
1350   /* LD/ST */
1351   {
1352     COSTS_N_INSNS (1),  /* load.  */
1353     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1354     COSTS_N_INSNS (6),  /* ldrd.  */
1355     COSTS_N_INSNS (1),  /* ldm_1st.  */
1356     1,                  /* ldm_regs_per_insn_1st.  */
1357     2,                  /* ldm_regs_per_insn_subsequent.  */
1358     COSTS_N_INSNS (2),  /* loadf.  */
1359     COSTS_N_INSNS (4),  /* loadd.  */
1360     COSTS_N_INSNS (1),  /* load_unaligned.  */
1361     COSTS_N_INSNS (1),  /* store.  */
1362     COSTS_N_INSNS (3),  /* strd.  */
1363     COSTS_N_INSNS (1),  /* stm_1st.  */
1364     1,                  /* stm_regs_per_insn_1st.  */
1365     2,                  /* stm_regs_per_insn_subsequent.  */
1366     COSTS_N_INSNS (2),  /* storef.  */
1367     COSTS_N_INSNS (2),  /* stored.  */
1368     COSTS_N_INSNS (1),  /* store_unaligned.  */
1369     COSTS_N_INSNS (1),  /* loadv.  */
1370     COSTS_N_INSNS (1)   /* storev.  */
1371   },
1372   {
1373     /* FP SFmode */
1374     {
1375       COSTS_N_INSNS (15),       /* div.  */
1376       COSTS_N_INSNS (3),        /* mult.  */
1377       COSTS_N_INSNS (7),        /* mult_addsub. */
1378       COSTS_N_INSNS (7),        /* fma.  */
1379       COSTS_N_INSNS (3),        /* addsub.  */
1380       COSTS_N_INSNS (3),        /* fpconst.  */
1381       COSTS_N_INSNS (3),        /* neg.  */
1382       COSTS_N_INSNS (3),        /* compare.  */
1383       COSTS_N_INSNS (3),        /* widen.  */
1384       COSTS_N_INSNS (3),        /* narrow.  */
1385       COSTS_N_INSNS (3),        /* toint.  */
1386       COSTS_N_INSNS (3),        /* fromint.  */
1387       COSTS_N_INSNS (3)         /* roundint.  */
1388     },
1389     /* FP DFmode */
1390     {
1391       COSTS_N_INSNS (30),       /* div.  */
1392       COSTS_N_INSNS (6),        /* mult.  */
1393       COSTS_N_INSNS (10),       /* mult_addsub.  */
1394       COSTS_N_INSNS (7),        /* fma.  */
1395       COSTS_N_INSNS (3),        /* addsub.  */
1396       COSTS_N_INSNS (3),        /* fpconst.  */
1397       COSTS_N_INSNS (3),        /* neg.  */
1398       COSTS_N_INSNS (3),        /* compare.  */
1399       COSTS_N_INSNS (3),        /* widen.  */
1400       COSTS_N_INSNS (3),        /* narrow.  */
1401       COSTS_N_INSNS (3),        /* toint.  */
1402       COSTS_N_INSNS (3),        /* fromint.  */
1403       COSTS_N_INSNS (3)         /* roundint.  */
1404     }
1405   },
1406   /* Vector */
1407   {
1408     COSTS_N_INSNS (1),  /* alu.  */
1409     COSTS_N_INSNS (4),  /* mult.  */
1410     COSTS_N_INSNS (1),  /* movi.  */
1411     COSTS_N_INSNS (2),  /* dup.  */
1412     COSTS_N_INSNS (2)   /* extract.  */
1413   }
1414 };
1415
1416
1417 const struct cpu_cost_table cortexa7_extra_costs =
1418 {
1419   /* ALU */
1420   {
1421     0,                  /* arith.  */
1422     0,                  /* logical.  */
1423     COSTS_N_INSNS (1),  /* shift.  */
1424     COSTS_N_INSNS (1),  /* shift_reg.  */
1425     COSTS_N_INSNS (1),  /* arith_shift.  */
1426     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1427     COSTS_N_INSNS (1),  /* log_shift.  */
1428     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1429     COSTS_N_INSNS (1),  /* extend.  */
1430     COSTS_N_INSNS (1),  /* extend_arith.  */
1431     COSTS_N_INSNS (1),  /* bfi.  */
1432     COSTS_N_INSNS (1),  /* bfx.  */
1433     COSTS_N_INSNS (1),  /* clz.  */
1434     COSTS_N_INSNS (1),  /* rev.  */
1435     0,                  /* non_exec.  */
1436     true                /* non_exec_costs_exec.  */
1437   },
1438
1439   {
1440     /* MULT SImode */
1441     {
1442       0,                        /* simple.  */
1443       COSTS_N_INSNS (1),        /* flag_setting.  */
1444       COSTS_N_INSNS (1),        /* extend.  */
1445       COSTS_N_INSNS (1),        /* add.  */
1446       COSTS_N_INSNS (1),        /* extend_add.  */
1447       COSTS_N_INSNS (7)         /* idiv.  */
1448     },
1449     /* MULT DImode */
1450     {
1451       0,                        /* simple (N/A).  */
1452       0,                        /* flag_setting (N/A).  */
1453       COSTS_N_INSNS (1),        /* extend.  */
1454       0,                        /* add.  */
1455       COSTS_N_INSNS (2),        /* extend_add.  */
1456       0                         /* idiv (N/A).  */
1457     }
1458   },
1459   /* LD/ST */
1460   {
1461     COSTS_N_INSNS (1),  /* load.  */
1462     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1463     COSTS_N_INSNS (3),  /* ldrd.  */
1464     COSTS_N_INSNS (1),  /* ldm_1st.  */
1465     1,                  /* ldm_regs_per_insn_1st.  */
1466     2,                  /* ldm_regs_per_insn_subsequent.  */
1467     COSTS_N_INSNS (2),  /* loadf.  */
1468     COSTS_N_INSNS (2),  /* loadd.  */
1469     COSTS_N_INSNS (1),  /* load_unaligned.  */
1470     COSTS_N_INSNS (1),  /* store.  */
1471     COSTS_N_INSNS (3),  /* strd.  */
1472     COSTS_N_INSNS (1),  /* stm_1st.  */
1473     1,                  /* stm_regs_per_insn_1st.  */
1474     2,                  /* stm_regs_per_insn_subsequent.  */
1475     COSTS_N_INSNS (2),  /* storef.  */
1476     COSTS_N_INSNS (2),  /* stored.  */
1477     COSTS_N_INSNS (1),  /* store_unaligned.  */
1478     COSTS_N_INSNS (1),  /* loadv.  */
1479     COSTS_N_INSNS (1)   /* storev.  */
1480   },
1481   {
1482     /* FP SFmode */
1483     {
1484       COSTS_N_INSNS (15),       /* div.  */
1485       COSTS_N_INSNS (3),        /* mult.  */
1486       COSTS_N_INSNS (7),        /* mult_addsub. */
1487       COSTS_N_INSNS (7),        /* fma.  */
1488       COSTS_N_INSNS (3),        /* addsub.  */
1489       COSTS_N_INSNS (3),        /* fpconst.  */
1490       COSTS_N_INSNS (3),        /* neg.  */
1491       COSTS_N_INSNS (3),        /* compare.  */
1492       COSTS_N_INSNS (3),        /* widen.  */
1493       COSTS_N_INSNS (3),        /* narrow.  */
1494       COSTS_N_INSNS (3),        /* toint.  */
1495       COSTS_N_INSNS (3),        /* fromint.  */
1496       COSTS_N_INSNS (3)         /* roundint.  */
1497     },
1498     /* FP DFmode */
1499     {
1500       COSTS_N_INSNS (30),       /* div.  */
1501       COSTS_N_INSNS (6),        /* mult.  */
1502       COSTS_N_INSNS (10),       /* mult_addsub.  */
1503       COSTS_N_INSNS (7),        /* fma.  */
1504       COSTS_N_INSNS (3),        /* addsub.  */
1505       COSTS_N_INSNS (3),        /* fpconst.  */
1506       COSTS_N_INSNS (3),        /* neg.  */
1507       COSTS_N_INSNS (3),        /* compare.  */
1508       COSTS_N_INSNS (3),        /* widen.  */
1509       COSTS_N_INSNS (3),        /* narrow.  */
1510       COSTS_N_INSNS (3),        /* toint.  */
1511       COSTS_N_INSNS (3),        /* fromint.  */
1512       COSTS_N_INSNS (3)         /* roundint.  */
1513     }
1514   },
1515   /* Vector */
1516   {
1517     COSTS_N_INSNS (1),  /* alu.  */
1518     COSTS_N_INSNS (4),  /* mult.  */
1519     COSTS_N_INSNS (1),  /* movi.  */
1520     COSTS_N_INSNS (2),  /* dup.  */
1521     COSTS_N_INSNS (2)   /* extract.  */
1522   }
1523 };
1524
1525 const struct cpu_cost_table cortexa12_extra_costs =
1526 {
1527   /* ALU */
1528   {
1529     0,                  /* arith.  */
1530     0,                  /* logical.  */
1531     0,                  /* shift.  */
1532     COSTS_N_INSNS (1),  /* shift_reg.  */
1533     COSTS_N_INSNS (1),  /* arith_shift.  */
1534     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1535     COSTS_N_INSNS (1),  /* log_shift.  */
1536     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1537     0,                  /* extend.  */
1538     COSTS_N_INSNS (1),  /* extend_arith.  */
1539     0,                  /* bfi.  */
1540     COSTS_N_INSNS (1),  /* bfx.  */
1541     COSTS_N_INSNS (1),  /* clz.  */
1542     COSTS_N_INSNS (1),  /* rev.  */
1543     0,                  /* non_exec.  */
1544     true                /* non_exec_costs_exec.  */
1545   },
1546   /* MULT SImode */
1547   {
1548     {
1549       COSTS_N_INSNS (2),        /* simple.  */
1550       COSTS_N_INSNS (3),        /* flag_setting.  */
1551       COSTS_N_INSNS (2),        /* extend.  */
1552       COSTS_N_INSNS (3),        /* add.  */
1553       COSTS_N_INSNS (2),        /* extend_add.  */
1554       COSTS_N_INSNS (18)        /* idiv.  */
1555     },
1556     /* MULT DImode */
1557     {
1558       0,                        /* simple (N/A).  */
1559       0,                        /* flag_setting (N/A).  */
1560       COSTS_N_INSNS (3),        /* extend.  */
1561       0,                        /* add (N/A).  */
1562       COSTS_N_INSNS (3),        /* extend_add.  */
1563       0                         /* idiv (N/A).  */
1564     }
1565   },
1566   /* LD/ST */
1567   {
1568     COSTS_N_INSNS (3),  /* load.  */
1569     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1570     COSTS_N_INSNS (3),  /* ldrd.  */
1571     COSTS_N_INSNS (3),  /* ldm_1st.  */
1572     1,                  /* ldm_regs_per_insn_1st.  */
1573     2,                  /* ldm_regs_per_insn_subsequent.  */
1574     COSTS_N_INSNS (3),  /* loadf.  */
1575     COSTS_N_INSNS (3),  /* loadd.  */
1576     0,                  /* load_unaligned.  */
1577     0,                  /* store.  */
1578     0,                  /* strd.  */
1579     0,                  /* stm_1st.  */
1580     1,                  /* stm_regs_per_insn_1st.  */
1581     2,                  /* stm_regs_per_insn_subsequent.  */
1582     COSTS_N_INSNS (2),  /* storef.  */
1583     COSTS_N_INSNS (2),  /* stored.  */
1584     0,                  /* store_unaligned.  */
1585     COSTS_N_INSNS (1),  /* loadv.  */
1586     COSTS_N_INSNS (1)   /* storev.  */
1587   },
1588   {
1589     /* FP SFmode */
1590     {
1591       COSTS_N_INSNS (17),       /* div.  */
1592       COSTS_N_INSNS (4),        /* mult.  */
1593       COSTS_N_INSNS (8),        /* mult_addsub. */
1594       COSTS_N_INSNS (8),        /* fma.  */
1595       COSTS_N_INSNS (4),        /* addsub.  */
1596       COSTS_N_INSNS (2),        /* fpconst. */
1597       COSTS_N_INSNS (2),        /* neg.  */
1598       COSTS_N_INSNS (2),        /* compare.  */
1599       COSTS_N_INSNS (4),        /* widen.  */
1600       COSTS_N_INSNS (4),        /* narrow.  */
1601       COSTS_N_INSNS (4),        /* toint.  */
1602       COSTS_N_INSNS (4),        /* fromint.  */
1603       COSTS_N_INSNS (4)         /* roundint.  */
1604     },
1605     /* FP DFmode */
1606     {
1607       COSTS_N_INSNS (31),       /* div.  */
1608       COSTS_N_INSNS (4),        /* mult.  */
1609       COSTS_N_INSNS (8),        /* mult_addsub.  */
1610       COSTS_N_INSNS (8),        /* fma.  */
1611       COSTS_N_INSNS (4),        /* addsub.  */
1612       COSTS_N_INSNS (2),        /* fpconst.  */
1613       COSTS_N_INSNS (2),        /* neg.  */
1614       COSTS_N_INSNS (2),        /* compare.  */
1615       COSTS_N_INSNS (4),        /* widen.  */
1616       COSTS_N_INSNS (4),        /* narrow.  */
1617       COSTS_N_INSNS (4),        /* toint.  */
1618       COSTS_N_INSNS (4),        /* fromint.  */
1619       COSTS_N_INSNS (4)         /* roundint.  */
1620     }
1621   },
1622   /* Vector */
1623   {
1624     COSTS_N_INSNS (1),  /* alu.  */
1625     COSTS_N_INSNS (4),  /* mult.  */
1626     COSTS_N_INSNS (1),  /* movi.  */
1627     COSTS_N_INSNS (2),  /* dup.  */
1628     COSTS_N_INSNS (2)   /* extract.  */
1629   }
1630 };
1631
1632 const struct cpu_cost_table cortexa15_extra_costs =
1633 {
1634   /* ALU */
1635   {
1636     0,                  /* arith.  */
1637     0,                  /* logical.  */
1638     0,                  /* shift.  */
1639     0,                  /* shift_reg.  */
1640     COSTS_N_INSNS (1),  /* arith_shift.  */
1641     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1642     COSTS_N_INSNS (1),  /* log_shift.  */
1643     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1644     0,                  /* extend.  */
1645     COSTS_N_INSNS (1),  /* extend_arith.  */
1646     COSTS_N_INSNS (1),  /* bfi.  */
1647     0,                  /* bfx.  */
1648     0,                  /* clz.  */
1649     0,                  /* rev.  */
1650     0,                  /* non_exec.  */
1651     true                /* non_exec_costs_exec.  */
1652   },
1653   /* MULT SImode */
1654   {
1655     {
1656       COSTS_N_INSNS (2),        /* simple.  */
1657       COSTS_N_INSNS (3),        /* flag_setting.  */
1658       COSTS_N_INSNS (2),        /* extend.  */
1659       COSTS_N_INSNS (2),        /* add.  */
1660       COSTS_N_INSNS (2),        /* extend_add.  */
1661       COSTS_N_INSNS (18)        /* idiv.  */
1662     },
1663     /* MULT DImode */
1664     {
1665       0,                        /* simple (N/A).  */
1666       0,                        /* flag_setting (N/A).  */
1667       COSTS_N_INSNS (3),        /* extend.  */
1668       0,                        /* add (N/A).  */
1669       COSTS_N_INSNS (3),        /* extend_add.  */
1670       0                         /* idiv (N/A).  */
1671     }
1672   },
1673   /* LD/ST */
1674   {
1675     COSTS_N_INSNS (3),  /* load.  */
1676     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1677     COSTS_N_INSNS (3),  /* ldrd.  */
1678     COSTS_N_INSNS (4),  /* ldm_1st.  */
1679     1,                  /* ldm_regs_per_insn_1st.  */
1680     2,                  /* ldm_regs_per_insn_subsequent.  */
1681     COSTS_N_INSNS (4),  /* loadf.  */
1682     COSTS_N_INSNS (4),  /* loadd.  */
1683     0,                  /* load_unaligned.  */
1684     0,                  /* store.  */
1685     0,                  /* strd.  */
1686     COSTS_N_INSNS (1),  /* stm_1st.  */
1687     1,                  /* stm_regs_per_insn_1st.  */
1688     2,                  /* stm_regs_per_insn_subsequent.  */
1689     0,                  /* storef.  */
1690     0,                  /* stored.  */
1691     0,                  /* store_unaligned.  */
1692     COSTS_N_INSNS (1),  /* loadv.  */
1693     COSTS_N_INSNS (1)   /* storev.  */
1694   },
1695   {
1696     /* FP SFmode */
1697     {
1698       COSTS_N_INSNS (17),       /* div.  */
1699       COSTS_N_INSNS (4),        /* mult.  */
1700       COSTS_N_INSNS (8),        /* mult_addsub. */
1701       COSTS_N_INSNS (8),        /* fma.  */
1702       COSTS_N_INSNS (4),        /* addsub.  */
1703       COSTS_N_INSNS (2),        /* fpconst. */
1704       COSTS_N_INSNS (2),        /* neg.  */
1705       COSTS_N_INSNS (5),        /* compare.  */
1706       COSTS_N_INSNS (4),        /* widen.  */
1707       COSTS_N_INSNS (4),        /* narrow.  */
1708       COSTS_N_INSNS (4),        /* toint.  */
1709       COSTS_N_INSNS (4),        /* fromint.  */
1710       COSTS_N_INSNS (4)         /* roundint.  */
1711     },
1712     /* FP DFmode */
1713     {
1714       COSTS_N_INSNS (31),       /* div.  */
1715       COSTS_N_INSNS (4),        /* mult.  */
1716       COSTS_N_INSNS (8),        /* mult_addsub.  */
1717       COSTS_N_INSNS (8),        /* fma.  */
1718       COSTS_N_INSNS (4),        /* addsub.  */
1719       COSTS_N_INSNS (2),        /* fpconst.  */
1720       COSTS_N_INSNS (2),        /* neg.  */
1721       COSTS_N_INSNS (2),        /* compare.  */
1722       COSTS_N_INSNS (4),        /* widen.  */
1723       COSTS_N_INSNS (4),        /* narrow.  */
1724       COSTS_N_INSNS (4),        /* toint.  */
1725       COSTS_N_INSNS (4),        /* fromint.  */
1726       COSTS_N_INSNS (4)         /* roundint.  */
1727     }
1728   },
1729   /* Vector */
1730   {
1731     COSTS_N_INSNS (1),  /* alu.  */
1732     COSTS_N_INSNS (4),  /* mult.  */
1733     COSTS_N_INSNS (1),  /* movi.  */
1734     COSTS_N_INSNS (2),  /* dup.  */
1735     COSTS_N_INSNS (2)   /* extract.  */
1736   }
1737 };
1738
1739 const struct cpu_cost_table v7m_extra_costs =
1740 {
1741   /* ALU */
1742   {
1743     0,                  /* arith.  */
1744     0,                  /* logical.  */
1745     0,                  /* shift.  */
1746     0,                  /* shift_reg.  */
1747     0,                  /* arith_shift.  */
1748     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1749     0,                  /* log_shift.  */
1750     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1751     0,                  /* extend.  */
1752     COSTS_N_INSNS (1),  /* extend_arith.  */
1753     0,                  /* bfi.  */
1754     0,                  /* bfx.  */
1755     0,                  /* clz.  */
1756     0,                  /* rev.  */
1757     COSTS_N_INSNS (1),  /* non_exec.  */
1758     false               /* non_exec_costs_exec.  */
1759   },
1760   {
1761     /* MULT SImode */
1762     {
1763       COSTS_N_INSNS (1),        /* simple.  */
1764       COSTS_N_INSNS (1),        /* flag_setting.  */
1765       COSTS_N_INSNS (2),        /* extend.  */
1766       COSTS_N_INSNS (1),        /* add.  */
1767       COSTS_N_INSNS (3),        /* extend_add.  */
1768       COSTS_N_INSNS (8)         /* idiv.  */
1769     },
1770     /* MULT DImode */
1771     {
1772       0,                        /* simple (N/A).  */
1773       0,                        /* flag_setting (N/A).  */
1774       COSTS_N_INSNS (2),        /* extend.  */
1775       0,                        /* add (N/A).  */
1776       COSTS_N_INSNS (3),        /* extend_add.  */
1777       0                         /* idiv (N/A).  */
1778     }
1779   },
1780   /* LD/ST */
1781   {
1782     COSTS_N_INSNS (2),  /* load.  */
1783     0,                  /* load_sign_extend.  */
1784     COSTS_N_INSNS (3),  /* ldrd.  */
1785     COSTS_N_INSNS (2),  /* ldm_1st.  */
1786     1,                  /* ldm_regs_per_insn_1st.  */
1787     1,                  /* ldm_regs_per_insn_subsequent.  */
1788     COSTS_N_INSNS (2),  /* loadf.  */
1789     COSTS_N_INSNS (3),  /* loadd.  */
1790     COSTS_N_INSNS (1),  /* load_unaligned.  */
1791     COSTS_N_INSNS (2),  /* store.  */
1792     COSTS_N_INSNS (3),  /* strd.  */
1793     COSTS_N_INSNS (2),  /* stm_1st.  */
1794     1,                  /* stm_regs_per_insn_1st.  */
1795     1,                  /* stm_regs_per_insn_subsequent.  */
1796     COSTS_N_INSNS (2),  /* storef.  */
1797     COSTS_N_INSNS (3),  /* stored.  */
1798     COSTS_N_INSNS (1),  /* store_unaligned.  */
1799     COSTS_N_INSNS (1),  /* loadv.  */
1800     COSTS_N_INSNS (1)   /* storev.  */
1801   },
1802   {
1803     /* FP SFmode */
1804     {
1805       COSTS_N_INSNS (7),        /* div.  */
1806       COSTS_N_INSNS (2),        /* mult.  */
1807       COSTS_N_INSNS (5),        /* mult_addsub.  */
1808       COSTS_N_INSNS (3),        /* fma.  */
1809       COSTS_N_INSNS (1),        /* addsub.  */
1810       0,                        /* fpconst.  */
1811       0,                        /* neg.  */
1812       0,                        /* compare.  */
1813       0,                        /* widen.  */
1814       0,                        /* narrow.  */
1815       0,                        /* toint.  */
1816       0,                        /* fromint.  */
1817       0                         /* roundint.  */
1818     },
1819     /* FP DFmode */
1820     {
1821       COSTS_N_INSNS (15),       /* div.  */
1822       COSTS_N_INSNS (5),        /* mult.  */
1823       COSTS_N_INSNS (7),        /* mult_addsub.  */
1824       COSTS_N_INSNS (7),        /* fma.  */
1825       COSTS_N_INSNS (3),        /* addsub.  */
1826       0,                        /* fpconst.  */
1827       0,                        /* neg.  */
1828       0,                        /* compare.  */
1829       0,                        /* widen.  */
1830       0,                        /* narrow.  */
1831       0,                        /* toint.  */
1832       0,                        /* fromint.  */
1833       0                         /* roundint.  */
1834     }
1835   },
1836   /* Vector */
1837   {
1838     COSTS_N_INSNS (1),  /* alu.  */
1839     COSTS_N_INSNS (4),  /* mult.  */
1840     COSTS_N_INSNS (1),  /* movi.  */
1841     COSTS_N_INSNS (2),  /* dup.  */
1842     COSTS_N_INSNS (2)   /* extract.  */
1843   }
1844 };
1845
1846 const struct addr_mode_cost_table generic_addr_mode_costs =
1847 {
1848   /* int.  */
1849   {
1850     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1851     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1852     COSTS_N_INSNS (0)   /* AMO_WB.  */
1853   },
1854   /* float.  */
1855   {
1856     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1857     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1858     COSTS_N_INSNS (0)   /* AMO_WB.  */
1859   },
1860   /* vector.  */
1861   {
1862     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1863     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1864     COSTS_N_INSNS (0)   /* AMO_WB.  */
1865   }
1866 };
1867
1868 const struct tune_params arm_slowmul_tune =
1869 {
1870   &generic_extra_costs,                 /* Insn extra costs.  */
1871   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1872   NULL,                                 /* Sched adj cost.  */
1873   arm_default_branch_cost,
1874   &arm_default_vec_cost,
1875   3,                                            /* Constant limit.  */
1876   5,                                            /* Max cond insns.  */
1877   8,                                            /* Memset max inline.  */
1878   1,                                            /* Issue rate.  */
1879   ARM_PREFETCH_NOT_BENEFICIAL,
1880   tune_params::PREF_CONST_POOL_TRUE,
1881   tune_params::PREF_LDRD_FALSE,
1882   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1883   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1884   tune_params::DISPARAGE_FLAGS_NEITHER,
1885   tune_params::PREF_NEON_STRINGOPS_FALSE,
1886   tune_params::FUSE_NOTHING,
1887   tune_params::SCHED_AUTOPREF_OFF
1888 };
1889
1890 const struct tune_params arm_fastmul_tune =
1891 {
1892   &generic_extra_costs,                 /* Insn extra costs.  */
1893   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1894   NULL,                                 /* Sched adj cost.  */
1895   arm_default_branch_cost,
1896   &arm_default_vec_cost,
1897   1,                                            /* Constant limit.  */
1898   5,                                            /* Max cond insns.  */
1899   8,                                            /* Memset max inline.  */
1900   1,                                            /* Issue rate.  */
1901   ARM_PREFETCH_NOT_BENEFICIAL,
1902   tune_params::PREF_CONST_POOL_TRUE,
1903   tune_params::PREF_LDRD_FALSE,
1904   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1905   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1906   tune_params::DISPARAGE_FLAGS_NEITHER,
1907   tune_params::PREF_NEON_STRINGOPS_FALSE,
1908   tune_params::FUSE_NOTHING,
1909   tune_params::SCHED_AUTOPREF_OFF
1910 };
1911
1912 /* StrongARM has early execution of branches, so a sequence that is worth
1913    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1914
1915 const struct tune_params arm_strongarm_tune =
1916 {
1917   &generic_extra_costs,                 /* Insn extra costs.  */
1918   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1919   NULL,                                 /* Sched adj cost.  */
1920   arm_default_branch_cost,
1921   &arm_default_vec_cost,
1922   1,                                            /* Constant limit.  */
1923   3,                                            /* Max cond insns.  */
1924   8,                                            /* Memset max inline.  */
1925   1,                                            /* Issue rate.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   tune_params::PREF_CONST_POOL_TRUE,
1928   tune_params::PREF_LDRD_FALSE,
1929   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1930   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1931   tune_params::DISPARAGE_FLAGS_NEITHER,
1932   tune_params::PREF_NEON_STRINGOPS_FALSE,
1933   tune_params::FUSE_NOTHING,
1934   tune_params::SCHED_AUTOPREF_OFF
1935 };
1936
1937 const struct tune_params arm_xscale_tune =
1938 {
1939   &generic_extra_costs,                 /* Insn extra costs.  */
1940   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1941   xscale_sched_adjust_cost,
1942   arm_default_branch_cost,
1943   &arm_default_vec_cost,
1944   2,                                            /* Constant limit.  */
1945   3,                                            /* Max cond insns.  */
1946   8,                                            /* Memset max inline.  */
1947   1,                                            /* Issue rate.  */
1948   ARM_PREFETCH_NOT_BENEFICIAL,
1949   tune_params::PREF_CONST_POOL_TRUE,
1950   tune_params::PREF_LDRD_FALSE,
1951   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1952   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1953   tune_params::DISPARAGE_FLAGS_NEITHER,
1954   tune_params::PREF_NEON_STRINGOPS_FALSE,
1955   tune_params::FUSE_NOTHING,
1956   tune_params::SCHED_AUTOPREF_OFF
1957 };
1958
1959 const struct tune_params arm_9e_tune =
1960 {
1961   &generic_extra_costs,                 /* Insn extra costs.  */
1962   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1963   NULL,                                 /* Sched adj cost.  */
1964   arm_default_branch_cost,
1965   &arm_default_vec_cost,
1966   1,                                            /* Constant limit.  */
1967   5,                                            /* Max cond insns.  */
1968   8,                                            /* Memset max inline.  */
1969   1,                                            /* Issue rate.  */
1970   ARM_PREFETCH_NOT_BENEFICIAL,
1971   tune_params::PREF_CONST_POOL_TRUE,
1972   tune_params::PREF_LDRD_FALSE,
1973   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1974   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1975   tune_params::DISPARAGE_FLAGS_NEITHER,
1976   tune_params::PREF_NEON_STRINGOPS_FALSE,
1977   tune_params::FUSE_NOTHING,
1978   tune_params::SCHED_AUTOPREF_OFF
1979 };
1980
1981 const struct tune_params arm_marvell_pj4_tune =
1982 {
1983   &generic_extra_costs,                 /* Insn extra costs.  */
1984   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1985   NULL,                                 /* Sched adj cost.  */
1986   arm_default_branch_cost,
1987   &arm_default_vec_cost,
1988   1,                                            /* Constant limit.  */
1989   5,                                            /* Max cond insns.  */
1990   8,                                            /* Memset max inline.  */
1991   2,                                            /* Issue rate.  */
1992   ARM_PREFETCH_NOT_BENEFICIAL,
1993   tune_params::PREF_CONST_POOL_TRUE,
1994   tune_params::PREF_LDRD_FALSE,
1995   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1996   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1997   tune_params::DISPARAGE_FLAGS_NEITHER,
1998   tune_params::PREF_NEON_STRINGOPS_FALSE,
1999   tune_params::FUSE_NOTHING,
2000   tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_v6t2_tune =
2004 {
2005   &generic_extra_costs,                 /* Insn extra costs.  */
2006   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2007   NULL,                                 /* Sched adj cost.  */
2008   arm_default_branch_cost,
2009   &arm_default_vec_cost,
2010   1,                                            /* Constant limit.  */
2011   5,                                            /* Max cond insns.  */
2012   8,                                            /* Memset max inline.  */
2013   1,                                            /* Issue rate.  */
2014   ARM_PREFETCH_NOT_BENEFICIAL,
2015   tune_params::PREF_CONST_POOL_FALSE,
2016   tune_params::PREF_LDRD_FALSE,
2017   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2018   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2019   tune_params::DISPARAGE_FLAGS_NEITHER,
2020   tune_params::PREF_NEON_STRINGOPS_FALSE,
2021   tune_params::FUSE_NOTHING,
2022   tune_params::SCHED_AUTOPREF_OFF
2023 };
2024
2025
2026 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2027 const struct tune_params arm_cortex_tune =
2028 {
2029   &generic_extra_costs,
2030   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2031   NULL,                                 /* Sched adj cost.  */
2032   arm_default_branch_cost,
2033   &arm_default_vec_cost,
2034   1,                                            /* Constant limit.  */
2035   5,                                            /* Max cond insns.  */
2036   8,                                            /* Memset max inline.  */
2037   2,                                            /* Issue rate.  */
2038   ARM_PREFETCH_NOT_BENEFICIAL,
2039   tune_params::PREF_CONST_POOL_FALSE,
2040   tune_params::PREF_LDRD_FALSE,
2041   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2042   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2043   tune_params::DISPARAGE_FLAGS_NEITHER,
2044   tune_params::PREF_NEON_STRINGOPS_FALSE,
2045   tune_params::FUSE_NOTHING,
2046   tune_params::SCHED_AUTOPREF_OFF
2047 };
2048
2049 const struct tune_params arm_cortex_a8_tune =
2050 {
2051   &cortexa8_extra_costs,
2052   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2053   NULL,                                 /* Sched adj cost.  */
2054   arm_default_branch_cost,
2055   &arm_default_vec_cost,
2056   1,                                            /* Constant limit.  */
2057   5,                                            /* Max cond insns.  */
2058   8,                                            /* Memset max inline.  */
2059   2,                                            /* Issue rate.  */
2060   ARM_PREFETCH_NOT_BENEFICIAL,
2061   tune_params::PREF_CONST_POOL_FALSE,
2062   tune_params::PREF_LDRD_FALSE,
2063   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2064   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2065   tune_params::DISPARAGE_FLAGS_NEITHER,
2066   tune_params::PREF_NEON_STRINGOPS_TRUE,
2067   tune_params::FUSE_NOTHING,
2068   tune_params::SCHED_AUTOPREF_OFF
2069 };
2070
2071 const struct tune_params arm_cortex_a7_tune =
2072 {
2073   &cortexa7_extra_costs,
2074   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2075   NULL,                                 /* Sched adj cost.  */
2076   arm_default_branch_cost,
2077   &arm_default_vec_cost,
2078   1,                                            /* Constant limit.  */
2079   5,                                            /* Max cond insns.  */
2080   8,                                            /* Memset max inline.  */
2081   2,                                            /* Issue rate.  */
2082   ARM_PREFETCH_NOT_BENEFICIAL,
2083   tune_params::PREF_CONST_POOL_FALSE,
2084   tune_params::PREF_LDRD_FALSE,
2085   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2086   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2087   tune_params::DISPARAGE_FLAGS_NEITHER,
2088   tune_params::PREF_NEON_STRINGOPS_TRUE,
2089   tune_params::FUSE_NOTHING,
2090   tune_params::SCHED_AUTOPREF_OFF
2091 };
2092
2093 const struct tune_params arm_cortex_a15_tune =
2094 {
2095   &cortexa15_extra_costs,
2096   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2097   NULL,                                 /* Sched adj cost.  */
2098   arm_default_branch_cost,
2099   &arm_default_vec_cost,
2100   1,                                            /* Constant limit.  */
2101   2,                                            /* Max cond insns.  */
2102   8,                                            /* Memset max inline.  */
2103   3,                                            /* Issue rate.  */
2104   ARM_PREFETCH_NOT_BENEFICIAL,
2105   tune_params::PREF_CONST_POOL_FALSE,
2106   tune_params::PREF_LDRD_TRUE,
2107   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2108   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2109   tune_params::DISPARAGE_FLAGS_ALL,
2110   tune_params::PREF_NEON_STRINGOPS_TRUE,
2111   tune_params::FUSE_NOTHING,
2112   tune_params::SCHED_AUTOPREF_FULL
2113 };
2114
2115 const struct tune_params arm_cortex_a35_tune =
2116 {
2117   &cortexa53_extra_costs,
2118   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2119   NULL,                                 /* Sched adj cost.  */
2120   arm_default_branch_cost,
2121   &arm_default_vec_cost,
2122   1,                                            /* Constant limit.  */
2123   5,                                            /* Max cond insns.  */
2124   8,                                            /* Memset max inline.  */
2125   1,                                            /* Issue rate.  */
2126   ARM_PREFETCH_NOT_BENEFICIAL,
2127   tune_params::PREF_CONST_POOL_FALSE,
2128   tune_params::PREF_LDRD_FALSE,
2129   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2130   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2131   tune_params::DISPARAGE_FLAGS_NEITHER,
2132   tune_params::PREF_NEON_STRINGOPS_TRUE,
2133   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2134   tune_params::SCHED_AUTOPREF_OFF
2135 };
2136
2137 const struct tune_params arm_cortex_a53_tune =
2138 {
2139   &cortexa53_extra_costs,
2140   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2141   NULL,                                 /* Sched adj cost.  */
2142   arm_default_branch_cost,
2143   &arm_default_vec_cost,
2144   1,                                            /* Constant limit.  */
2145   5,                                            /* Max cond insns.  */
2146   8,                                            /* Memset max inline.  */
2147   2,                                            /* Issue rate.  */
2148   ARM_PREFETCH_NOT_BENEFICIAL,
2149   tune_params::PREF_CONST_POOL_FALSE,
2150   tune_params::PREF_LDRD_FALSE,
2151   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2152   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2153   tune_params::DISPARAGE_FLAGS_NEITHER,
2154   tune_params::PREF_NEON_STRINGOPS_TRUE,
2155   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2156   tune_params::SCHED_AUTOPREF_OFF
2157 };
2158
2159 const struct tune_params arm_cortex_a57_tune =
2160 {
2161   &cortexa57_extra_costs,
2162   &generic_addr_mode_costs,             /* addressing mode costs */
2163   NULL,                                 /* Sched adj cost.  */
2164   arm_default_branch_cost,
2165   &arm_default_vec_cost,
2166   1,                                            /* Constant limit.  */
2167   2,                                            /* Max cond insns.  */
2168   8,                                            /* Memset max inline.  */
2169   3,                                            /* Issue rate.  */
2170   ARM_PREFETCH_NOT_BENEFICIAL,
2171   tune_params::PREF_CONST_POOL_FALSE,
2172   tune_params::PREF_LDRD_TRUE,
2173   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2174   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2175   tune_params::DISPARAGE_FLAGS_ALL,
2176   tune_params::PREF_NEON_STRINGOPS_TRUE,
2177   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2178   tune_params::SCHED_AUTOPREF_FULL
2179 };
2180
2181 const struct tune_params arm_exynosm1_tune =
2182 {
2183   &exynosm1_extra_costs,
2184   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2185   NULL,                                         /* Sched adj cost.  */
2186   arm_default_branch_cost,
2187   &arm_default_vec_cost,
2188   1,                                            /* Constant limit.  */
2189   2,                                            /* Max cond insns.  */
2190   8,                                            /* Memset max inline.  */
2191   3,                                            /* Issue rate.  */
2192   ARM_PREFETCH_NOT_BENEFICIAL,
2193   tune_params::PREF_CONST_POOL_FALSE,
2194   tune_params::PREF_LDRD_TRUE,
2195   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2196   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2197   tune_params::DISPARAGE_FLAGS_ALL,
2198   tune_params::PREF_NEON_STRINGOPS_TRUE,
2199   tune_params::FUSE_NOTHING,
2200   tune_params::SCHED_AUTOPREF_OFF
2201 };
2202
2203 const struct tune_params arm_xgene1_tune =
2204 {
2205   &xgene1_extra_costs,
2206   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2207   NULL,                                 /* Sched adj cost.  */
2208   arm_default_branch_cost,
2209   &arm_default_vec_cost,
2210   1,                                            /* Constant limit.  */
2211   2,                                            /* Max cond insns.  */
2212   32,                                           /* Memset max inline.  */
2213   4,                                            /* Issue rate.  */
2214   ARM_PREFETCH_NOT_BENEFICIAL,
2215   tune_params::PREF_CONST_POOL_FALSE,
2216   tune_params::PREF_LDRD_TRUE,
2217   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2218   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2219   tune_params::DISPARAGE_FLAGS_ALL,
2220   tune_params::PREF_NEON_STRINGOPS_FALSE,
2221   tune_params::FUSE_NOTHING,
2222   tune_params::SCHED_AUTOPREF_OFF
2223 };
2224
2225 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2226    less appealing.  Set max_insns_skipped to a low value.  */
2227
2228 const struct tune_params arm_cortex_a5_tune =
2229 {
2230   &cortexa5_extra_costs,
2231   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2232   NULL,                                 /* Sched adj cost.  */
2233   arm_cortex_a5_branch_cost,
2234   &arm_default_vec_cost,
2235   1,                                            /* Constant limit.  */
2236   1,                                            /* Max cond insns.  */
2237   8,                                            /* Memset max inline.  */
2238   2,                                            /* Issue rate.  */
2239   ARM_PREFETCH_NOT_BENEFICIAL,
2240   tune_params::PREF_CONST_POOL_FALSE,
2241   tune_params::PREF_LDRD_FALSE,
2242   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2243   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2244   tune_params::DISPARAGE_FLAGS_NEITHER,
2245   tune_params::PREF_NEON_STRINGOPS_TRUE,
2246   tune_params::FUSE_NOTHING,
2247   tune_params::SCHED_AUTOPREF_OFF
2248 };
2249
2250 const struct tune_params arm_cortex_a9_tune =
2251 {
2252   &cortexa9_extra_costs,
2253   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2254   cortex_a9_sched_adjust_cost,
2255   arm_default_branch_cost,
2256   &arm_default_vec_cost,
2257   1,                                            /* Constant limit.  */
2258   5,                                            /* Max cond insns.  */
2259   8,                                            /* Memset max inline.  */
2260   2,                                            /* Issue rate.  */
2261   ARM_PREFETCH_BENEFICIAL(4,32,32),
2262   tune_params::PREF_CONST_POOL_FALSE,
2263   tune_params::PREF_LDRD_FALSE,
2264   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2265   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2266   tune_params::DISPARAGE_FLAGS_NEITHER,
2267   tune_params::PREF_NEON_STRINGOPS_FALSE,
2268   tune_params::FUSE_NOTHING,
2269   tune_params::SCHED_AUTOPREF_OFF
2270 };
2271
2272 const struct tune_params arm_cortex_a12_tune =
2273 {
2274   &cortexa12_extra_costs,
2275   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2276   NULL,                                 /* Sched adj cost.  */
2277   arm_default_branch_cost,
2278   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2279   1,                                            /* Constant limit.  */
2280   2,                                            /* Max cond insns.  */
2281   8,                                            /* Memset max inline.  */
2282   2,                                            /* Issue rate.  */
2283   ARM_PREFETCH_NOT_BENEFICIAL,
2284   tune_params::PREF_CONST_POOL_FALSE,
2285   tune_params::PREF_LDRD_TRUE,
2286   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2287   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2288   tune_params::DISPARAGE_FLAGS_ALL,
2289   tune_params::PREF_NEON_STRINGOPS_TRUE,
2290   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2291   tune_params::SCHED_AUTOPREF_OFF
2292 };
2293
2294 const struct tune_params arm_cortex_a73_tune =
2295 {
2296   &cortexa57_extra_costs,
2297   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2298   NULL,                                         /* Sched adj cost.  */
2299   arm_default_branch_cost,
2300   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2301   1,                                            /* Constant limit.  */
2302   2,                                            /* Max cond insns.  */
2303   8,                                            /* Memset max inline.  */
2304   2,                                            /* Issue rate.  */
2305   ARM_PREFETCH_NOT_BENEFICIAL,
2306   tune_params::PREF_CONST_POOL_FALSE,
2307   tune_params::PREF_LDRD_TRUE,
2308   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2309   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2310   tune_params::DISPARAGE_FLAGS_ALL,
2311   tune_params::PREF_NEON_STRINGOPS_TRUE,
2312   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2313   tune_params::SCHED_AUTOPREF_FULL
2314 };
2315
2316 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2317    cycle to execute each.  An LDR from the constant pool also takes two cycles
2318    to execute, but mildly increases pipelining opportunity (consecutive
2319    loads/stores can be pipelined together, saving one cycle), and may also
2320    improve icache utilisation.  Hence we prefer the constant pool for such
2321    processors.  */
2322
2323 const struct tune_params arm_v7m_tune =
2324 {
2325   &v7m_extra_costs,
2326   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2327   NULL,                                 /* Sched adj cost.  */
2328   arm_cortex_m_branch_cost,
2329   &arm_default_vec_cost,
2330   1,                                            /* Constant limit.  */
2331   2,                                            /* Max cond insns.  */
2332   8,                                            /* Memset max inline.  */
2333   1,                                            /* Issue rate.  */
2334   ARM_PREFETCH_NOT_BENEFICIAL,
2335   tune_params::PREF_CONST_POOL_TRUE,
2336   tune_params::PREF_LDRD_FALSE,
2337   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2338   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2339   tune_params::DISPARAGE_FLAGS_NEITHER,
2340   tune_params::PREF_NEON_STRINGOPS_FALSE,
2341   tune_params::FUSE_NOTHING,
2342   tune_params::SCHED_AUTOPREF_OFF
2343 };
2344
2345 /* Cortex-M7 tuning.  */
2346
2347 const struct tune_params arm_cortex_m7_tune =
2348 {
2349   &v7m_extra_costs,
2350   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2351   NULL,                                 /* Sched adj cost.  */
2352   arm_cortex_m7_branch_cost,
2353   &arm_default_vec_cost,
2354   0,                                            /* Constant limit.  */
2355   1,                                            /* Max cond insns.  */
2356   8,                                            /* Memset max inline.  */
2357   2,                                            /* Issue rate.  */
2358   ARM_PREFETCH_NOT_BENEFICIAL,
2359   tune_params::PREF_CONST_POOL_TRUE,
2360   tune_params::PREF_LDRD_FALSE,
2361   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2362   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2363   tune_params::DISPARAGE_FLAGS_NEITHER,
2364   tune_params::PREF_NEON_STRINGOPS_FALSE,
2365   tune_params::FUSE_NOTHING,
2366   tune_params::SCHED_AUTOPREF_OFF
2367 };
2368
2369 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2370    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2371    cortex-m23.  */
2372 const struct tune_params arm_v6m_tune =
2373 {
2374   &generic_extra_costs,                 /* Insn extra costs.  */
2375   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2376   NULL,                                 /* Sched adj cost.  */
2377   arm_default_branch_cost,
2378   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2379   1,                                            /* Constant limit.  */
2380   5,                                            /* Max cond insns.  */
2381   8,                                            /* Memset max inline.  */
2382   1,                                            /* Issue rate.  */
2383   ARM_PREFETCH_NOT_BENEFICIAL,
2384   tune_params::PREF_CONST_POOL_FALSE,
2385   tune_params::PREF_LDRD_FALSE,
2386   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2387   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2388   tune_params::DISPARAGE_FLAGS_NEITHER,
2389   tune_params::PREF_NEON_STRINGOPS_FALSE,
2390   tune_params::FUSE_NOTHING,
2391   tune_params::SCHED_AUTOPREF_OFF
2392 };
2393
2394 const struct tune_params arm_fa726te_tune =
2395 {
2396   &generic_extra_costs,                         /* Insn extra costs.  */
2397   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2398   fa726te_sched_adjust_cost,
2399   arm_default_branch_cost,
2400   &arm_default_vec_cost,
2401   1,                                            /* Constant limit.  */
2402   5,                                            /* Max cond insns.  */
2403   8,                                            /* Memset max inline.  */
2404   2,                                            /* Issue rate.  */
2405   ARM_PREFETCH_NOT_BENEFICIAL,
2406   tune_params::PREF_CONST_POOL_TRUE,
2407   tune_params::PREF_LDRD_FALSE,
2408   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2409   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2410   tune_params::DISPARAGE_FLAGS_NEITHER,
2411   tune_params::PREF_NEON_STRINGOPS_FALSE,
2412   tune_params::FUSE_NOTHING,
2413   tune_params::SCHED_AUTOPREF_OFF
2414 };
2415
2416 /* Auto-generated CPU, FPU and architecture tables.  */
2417 #include "arm-cpu-data.h"
2418
2419 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2420    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2421    is thus chosen to be big enough to hold the longest architecture name.  */
2422
2423 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2424
2425 /* Supported TLS relocations.  */
2426
2427 enum tls_reloc {
2428   TLS_GD32,
2429   TLS_GD32_FDPIC,
2430   TLS_LDM32,
2431   TLS_LDM32_FDPIC,
2432   TLS_LDO32,
2433   TLS_IE32,
2434   TLS_IE32_FDPIC,
2435   TLS_LE32,
2436   TLS_DESCSEQ   /* GNU scheme */
2437 };
2438
2439 /* The maximum number of insns to be used when loading a constant.  */
2440 inline static int
2441 arm_constant_limit (bool size_p)
2442 {
2443   return size_p ? 1 : current_tune->constant_limit;
2444 }
2445
2446 /* Emit an insn that's a simple single-set.  Both the operands must be known
2447    to be valid.  */
2448 inline static rtx_insn *
2449 emit_set_insn (rtx x, rtx y)
2450 {
2451   return emit_insn (gen_rtx_SET (x, y));
2452 }
2453
2454 /* Return the number of bits set in VALUE.  */
2455 static unsigned
2456 bit_count (unsigned long value)
2457 {
2458   unsigned long count = 0;
2459
2460   while (value)
2461     {
2462       count++;
2463       value &= value - 1;  /* Clear the least-significant set bit.  */
2464     }
2465
2466   return count;
2467 }
2468
2469 /* Return the number of bits set in BMAP.  */
2470 static unsigned
2471 bitmap_popcount (const sbitmap bmap)
2472 {
2473   unsigned int count = 0;
2474   unsigned int n = 0;
2475   sbitmap_iterator sbi;
2476
2477   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2478     count++;
2479   return count;
2480 }
2481
2482 typedef struct
2483 {
2484   machine_mode mode;
2485   const char *name;
2486 } arm_fixed_mode_set;
2487
2488 /* A small helper for setting fixed-point library libfuncs.  */
2489
2490 static void
2491 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2492                              const char *funcname, const char *modename,
2493                              int num_suffix)
2494 {
2495   char buffer[50];
2496
2497   if (num_suffix == 0)
2498     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2499   else
2500     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2501
2502   set_optab_libfunc (optable, mode, buffer);
2503 }
2504
2505 static void
2506 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2507                             machine_mode from, const char *funcname,
2508                             const char *toname, const char *fromname)
2509 {
2510   char buffer[50];
2511   const char *maybe_suffix_2 = "";
2512
2513   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2514   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2515       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2516       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2517     maybe_suffix_2 = "2";
2518
2519   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2520            maybe_suffix_2);
2521
2522   set_conv_libfunc (optable, to, from, buffer);
2523 }
2524
2525 static GTY(()) rtx speculation_barrier_libfunc;
2526
2527 /* Record that we have no arithmetic or comparison libfuncs for
2528    machine mode MODE.  */
2529
2530 static void
2531 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2532 {
2533   /* Arithmetic.  */
2534   set_optab_libfunc (add_optab, mode, NULL);
2535   set_optab_libfunc (sdiv_optab, mode, NULL);
2536   set_optab_libfunc (smul_optab, mode, NULL);
2537   set_optab_libfunc (neg_optab, mode, NULL);
2538   set_optab_libfunc (sub_optab, mode, NULL);
2539
2540   /* Comparisons.  */
2541   set_optab_libfunc (eq_optab, mode, NULL);
2542   set_optab_libfunc (ne_optab, mode, NULL);
2543   set_optab_libfunc (lt_optab, mode, NULL);
2544   set_optab_libfunc (le_optab, mode, NULL);
2545   set_optab_libfunc (ge_optab, mode, NULL);
2546   set_optab_libfunc (gt_optab, mode, NULL);
2547   set_optab_libfunc (unord_optab, mode, NULL);
2548 }
2549
2550 /* Set up library functions unique to ARM.  */
2551 static void
2552 arm_init_libfuncs (void)
2553 {
2554   machine_mode mode_iter;
2555
2556   /* For Linux, we have access to kernel support for atomic operations.  */
2557   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2558     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2559
2560   /* There are no special library functions unless we are using the
2561      ARM BPABI.  */
2562   if (!TARGET_BPABI)
2563     return;
2564
2565   /* The functions below are described in Section 4 of the "Run-Time
2566      ABI for the ARM architecture", Version 1.0.  */
2567
2568   /* Double-precision floating-point arithmetic.  Table 2.  */
2569   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2570   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2571   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2572   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2573   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2574
2575   /* Double-precision comparisons.  Table 3.  */
2576   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2577   set_optab_libfunc (ne_optab, DFmode, NULL);
2578   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2579   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2580   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2581   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2582   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2583
2584   /* Single-precision floating-point arithmetic.  Table 4.  */
2585   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2586   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2587   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2588   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2589   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2590
2591   /* Single-precision comparisons.  Table 5.  */
2592   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2593   set_optab_libfunc (ne_optab, SFmode, NULL);
2594   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2595   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2596   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2597   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2598   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2599
2600   /* Floating-point to integer conversions.  Table 6.  */
2601   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2602   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2603   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2604   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2605   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2606   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2607   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2608   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2609
2610   /* Conversions between floating types.  Table 7.  */
2611   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2612   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2613
2614   /* Integer to floating-point conversions.  Table 8.  */
2615   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2616   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2617   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2618   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2619   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2620   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2621   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2622   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2623
2624   /* Long long.  Table 9.  */
2625   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2626   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2627   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2628   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2629   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2630   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2631   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2632   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2633
2634   /* Integer (32/32->32) division.  \S 4.3.1.  */
2635   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2636   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2637
2638   /* The divmod functions are designed so that they can be used for
2639      plain division, even though they return both the quotient and the
2640      remainder.  The quotient is returned in the usual location (i.e.,
2641      r0 for SImode, {r0, r1} for DImode), just as would be expected
2642      for an ordinary division routine.  Because the AAPCS calling
2643      conventions specify that all of { r0, r1, r2, r3 } are
2644      callee-saved registers, there is no need to tell the compiler
2645      explicitly that those registers are clobbered by these
2646      routines.  */
2647   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2648   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2649
2650   /* For SImode division the ABI provides div-without-mod routines,
2651      which are faster.  */
2652   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2653   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2654
2655   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2656      divmod libcalls instead.  */
2657   set_optab_libfunc (smod_optab, DImode, NULL);
2658   set_optab_libfunc (umod_optab, DImode, NULL);
2659   set_optab_libfunc (smod_optab, SImode, NULL);
2660   set_optab_libfunc (umod_optab, SImode, NULL);
2661
2662   /* Half-precision float operations.  The compiler handles all operations
2663      with NULL libfuncs by converting the SFmode.  */
2664   switch (arm_fp16_format)
2665     {
2666     case ARM_FP16_FORMAT_IEEE:
2667     case ARM_FP16_FORMAT_ALTERNATIVE:
2668
2669       /* Conversions.  */
2670       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2671                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2672                          ? "__gnu_f2h_ieee"
2673                          : "__gnu_f2h_alternative"));
2674       set_conv_libfunc (sext_optab, SFmode, HFmode,
2675                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2676                          ? "__gnu_h2f_ieee"
2677                          : "__gnu_h2f_alternative"));
2678
2679       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2680                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2681                          ? "__gnu_d2h_ieee"
2682                          : "__gnu_d2h_alternative"));
2683
2684       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2685       break;
2686
2687     default:
2688       break;
2689     }
2690
2691   /* For all possible libcalls in BFmode, record NULL.  */
2692   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2693     {
2694       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2695       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2696       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2697       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2698     }
2699   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2700
2701   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2702   {
2703     const arm_fixed_mode_set fixed_arith_modes[] =
2704       {
2705         { E_QQmode, "qq" },
2706         { E_UQQmode, "uqq" },
2707         { E_HQmode, "hq" },
2708         { E_UHQmode, "uhq" },
2709         { E_SQmode, "sq" },
2710         { E_USQmode, "usq" },
2711         { E_DQmode, "dq" },
2712         { E_UDQmode, "udq" },
2713         { E_TQmode, "tq" },
2714         { E_UTQmode, "utq" },
2715         { E_HAmode, "ha" },
2716         { E_UHAmode, "uha" },
2717         { E_SAmode, "sa" },
2718         { E_USAmode, "usa" },
2719         { E_DAmode, "da" },
2720         { E_UDAmode, "uda" },
2721         { E_TAmode, "ta" },
2722         { E_UTAmode, "uta" }
2723       };
2724     const arm_fixed_mode_set fixed_conv_modes[] =
2725       {
2726         { E_QQmode, "qq" },
2727         { E_UQQmode, "uqq" },
2728         { E_HQmode, "hq" },
2729         { E_UHQmode, "uhq" },
2730         { E_SQmode, "sq" },
2731         { E_USQmode, "usq" },
2732         { E_DQmode, "dq" },
2733         { E_UDQmode, "udq" },
2734         { E_TQmode, "tq" },
2735         { E_UTQmode, "utq" },
2736         { E_HAmode, "ha" },
2737         { E_UHAmode, "uha" },
2738         { E_SAmode, "sa" },
2739         { E_USAmode, "usa" },
2740         { E_DAmode, "da" },
2741         { E_UDAmode, "uda" },
2742         { E_TAmode, "ta" },
2743         { E_UTAmode, "uta" },
2744         { E_QImode, "qi" },
2745         { E_HImode, "hi" },
2746         { E_SImode, "si" },
2747         { E_DImode, "di" },
2748         { E_TImode, "ti" },
2749         { E_SFmode, "sf" },
2750         { E_DFmode, "df" }
2751       };
2752     unsigned int i, j;
2753
2754     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2755       {
2756         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2757                                      "add", fixed_arith_modes[i].name, 3);
2758         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2759                                      "ssadd", fixed_arith_modes[i].name, 3);
2760         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2761                                      "usadd", fixed_arith_modes[i].name, 3);
2762         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2763                                      "sub", fixed_arith_modes[i].name, 3);
2764         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2765                                      "sssub", fixed_arith_modes[i].name, 3);
2766         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2767                                      "ussub", fixed_arith_modes[i].name, 3);
2768         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2769                                      "mul", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2771                                      "ssmul", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2773                                      "usmul", fixed_arith_modes[i].name, 3);
2774         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2775                                      "div", fixed_arith_modes[i].name, 3);
2776         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2777                                      "udiv", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2779                                      "ssdiv", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2781                                      "usdiv", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2783                                      "neg", fixed_arith_modes[i].name, 2);
2784         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2785                                      "ssneg", fixed_arith_modes[i].name, 2);
2786         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2787                                      "usneg", fixed_arith_modes[i].name, 2);
2788         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2789                                      "ashl", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2791                                      "ashr", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2793                                      "lshr", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2795                                      "ssashl", fixed_arith_modes[i].name, 3);
2796         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2797                                      "usashl", fixed_arith_modes[i].name, 3);
2798         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2799                                      "cmp", fixed_arith_modes[i].name, 2);
2800       }
2801
2802     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2803       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2804         {
2805           if (i == j
2806               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2807                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2808             continue;
2809
2810           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2811                                       fixed_conv_modes[j].mode, "fract",
2812                                       fixed_conv_modes[i].name,
2813                                       fixed_conv_modes[j].name);
2814           arm_set_fixed_conv_libfunc (satfract_optab,
2815                                       fixed_conv_modes[i].mode,
2816                                       fixed_conv_modes[j].mode, "satfract",
2817                                       fixed_conv_modes[i].name,
2818                                       fixed_conv_modes[j].name);
2819           arm_set_fixed_conv_libfunc (fractuns_optab,
2820                                       fixed_conv_modes[i].mode,
2821                                       fixed_conv_modes[j].mode, "fractuns",
2822                                       fixed_conv_modes[i].name,
2823                                       fixed_conv_modes[j].name);
2824           arm_set_fixed_conv_libfunc (satfractuns_optab,
2825                                       fixed_conv_modes[i].mode,
2826                                       fixed_conv_modes[j].mode, "satfractuns",
2827                                       fixed_conv_modes[i].name,
2828                                       fixed_conv_modes[j].name);
2829         }
2830   }
2831
2832   if (TARGET_AAPCS_BASED)
2833     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2834
2835   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2836 }
2837
2838 /* On AAPCS systems, this is the "struct __va_list".  */
2839 static GTY(()) tree va_list_type;
2840
2841 /* Return the type to use as __builtin_va_list.  */
2842 static tree
2843 arm_build_builtin_va_list (void)
2844 {
2845   tree va_list_name;
2846   tree ap_field;
2847
2848   if (!TARGET_AAPCS_BASED)
2849     return std_build_builtin_va_list ();
2850
2851   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2852      defined as:
2853
2854        struct __va_list
2855        {
2856          void *__ap;
2857        };
2858
2859      The C Library ABI further reinforces this definition in \S
2860      4.1.
2861
2862      We must follow this definition exactly.  The structure tag
2863      name is visible in C++ mangled names, and thus forms a part
2864      of the ABI.  The field name may be used by people who
2865      #include <stdarg.h>.  */
2866   /* Create the type.  */
2867   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2868   /* Give it the required name.  */
2869   va_list_name = build_decl (BUILTINS_LOCATION,
2870                              TYPE_DECL,
2871                              get_identifier ("__va_list"),
2872                              va_list_type);
2873   DECL_ARTIFICIAL (va_list_name) = 1;
2874   TYPE_NAME (va_list_type) = va_list_name;
2875   TYPE_STUB_DECL (va_list_type) = va_list_name;
2876   /* Create the __ap field.  */
2877   ap_field = build_decl (BUILTINS_LOCATION,
2878                          FIELD_DECL,
2879                          get_identifier ("__ap"),
2880                          ptr_type_node);
2881   DECL_ARTIFICIAL (ap_field) = 1;
2882   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2883   TYPE_FIELDS (va_list_type) = ap_field;
2884   /* Compute its layout.  */
2885   layout_type (va_list_type);
2886
2887   return va_list_type;
2888 }
2889
2890 /* Return an expression of type "void *" pointing to the next
2891    available argument in a variable-argument list.  VALIST is the
2892    user-level va_list object, of type __builtin_va_list.  */
2893 static tree
2894 arm_extract_valist_ptr (tree valist)
2895 {
2896   if (TREE_TYPE (valist) == error_mark_node)
2897     return error_mark_node;
2898
2899   /* On an AAPCS target, the pointer is stored within "struct
2900      va_list".  */
2901   if (TARGET_AAPCS_BASED)
2902     {
2903       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2904       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2905                        valist, ap_field, NULL_TREE);
2906     }
2907
2908   return valist;
2909 }
2910
2911 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2912 static void
2913 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2914 {
2915   valist = arm_extract_valist_ptr (valist);
2916   std_expand_builtin_va_start (valist, nextarg);
2917 }
2918
2919 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2920 static tree
2921 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2922                           gimple_seq *post_p)
2923 {
2924   valist = arm_extract_valist_ptr (valist);
2925   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2926 }
2927
2928 /* Check any incompatible options that the user has specified.  */
2929 static void
2930 arm_option_check_internal (struct gcc_options *opts)
2931 {
2932   int flags = opts->x_target_flags;
2933
2934   /* iWMMXt and NEON are incompatible.  */
2935   if (TARGET_IWMMXT
2936       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2937     error ("iWMMXt and NEON are incompatible");
2938
2939   /* Make sure that the processor choice does not conflict with any of the
2940      other command line choices.  */
2941   if (TARGET_ARM_P (flags)
2942       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2943     error ("target CPU does not support ARM mode");
2944
2945   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2946   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2947     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2948
2949   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2950     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2951
2952   /* If this target is normally configured to use APCS frames, warn if they
2953      are turned off and debugging is turned on.  */
2954   if (TARGET_ARM_P (flags)
2955       && write_symbols != NO_DEBUG
2956       && !TARGET_APCS_FRAME
2957       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2958     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2959              "debugging");
2960
2961   /* iWMMXt unsupported under Thumb mode.  */
2962   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2963     error ("iWMMXt unsupported under Thumb mode");
2964
2965   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2966     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2967
2968   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2969     {
2970       error ("RTP PIC is incompatible with Thumb");
2971       flag_pic = 0;
2972     }
2973
2974   if (target_pure_code || target_slow_flash_data)
2975     {
2976       const char *flag = (target_pure_code ? "-mpure-code" :
2977                                              "-mslow-flash-data");
2978       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2979
2980       /* We only support -mslow-flash-data on M-profile targets with
2981          MOVT.  */
2982       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2983         error ("%s only supports non-pic code on M-profile targets with the "
2984                "MOVT instruction", flag);
2985
2986       /* We only support -mpure-code on M-profile targets.  */
2987       if (target_pure_code && common_unsupported_modes)
2988         error ("%s only supports non-pic code on M-profile targets", flag);
2989
2990       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2991          -mword-relocations forbids relocation of MOVT/MOVW.  */
2992       if (target_word_relocations)
2993         error ("%s incompatible with %<-mword-relocations%>", flag);
2994     }
2995 }
2996
2997 /* Recompute the global settings depending on target attribute options.  */
2998
2999 static void
3000 arm_option_params_internal (void)
3001 {
3002   /* If we are not using the default (ARM mode) section anchor offset
3003      ranges, then set the correct ranges now.  */
3004   if (TARGET_THUMB1)
3005     {
3006       /* Thumb-1 LDR instructions cannot have negative offsets.
3007          Permissible positive offset ranges are 5-bit (for byte loads),
3008          6-bit (for halfword loads), or 7-bit (for word loads).
3009          Empirical results suggest a 7-bit anchor range gives the best
3010          overall code size.  */
3011       targetm.min_anchor_offset = 0;
3012       targetm.max_anchor_offset = 127;
3013     }
3014   else if (TARGET_THUMB2)
3015     {
3016       /* The minimum is set such that the total size of the block
3017          for a particular anchor is 248 + 1 + 4095 bytes, which is
3018          divisible by eight, ensuring natural spacing of anchors.  */
3019       targetm.min_anchor_offset = -248;
3020       targetm.max_anchor_offset = 4095;
3021     }
3022   else
3023     {
3024       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3025       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3026     }
3027
3028   /* Increase the number of conditional instructions with -Os.  */
3029   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3030
3031   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3032   if (TARGET_THUMB2)
3033     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3034
3035   if (TARGET_THUMB1)
3036     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3037   else
3038     targetm.md_asm_adjust = arm_md_asm_adjust;
3039 }
3040
3041 /* True if -mflip-thumb should next add an attribute for the default
3042    mode, false if it should next add an attribute for the opposite mode.  */
3043 static GTY(()) bool thumb_flipper;
3044
3045 /* Options after initial target override.  */
3046 static GTY(()) tree init_optimize;
3047
3048 static void
3049 arm_override_options_after_change_1 (struct gcc_options *opts,
3050                                      struct gcc_options *opts_set)
3051 {
3052   /* -falign-functions without argument: supply one.  */
3053   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3054     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3055       && opts->x_optimize_size ? "2" : "4";
3056 }
3057
3058 /* Implement targetm.override_options_after_change.  */
3059
3060 static void
3061 arm_override_options_after_change (void)
3062 {
3063   arm_override_options_after_change_1 (&global_options, &global_options_set);
3064 }
3065
3066 /* Implement TARGET_OPTION_RESTORE.  */
3067 static void
3068 arm_option_restore (struct gcc_options */* opts */,
3069                     struct gcc_options */* opts_set */,
3070                     struct cl_target_option *ptr)
3071 {
3072   arm_configure_build_target (&arm_active_target, ptr, false);
3073   arm_option_reconfigure_globals ();
3074 }
3075
3076 /* Reset options between modes that the user has specified.  */
3077 static void
3078 arm_option_override_internal (struct gcc_options *opts,
3079                               struct gcc_options *opts_set)
3080 {
3081   arm_override_options_after_change_1 (opts, opts_set);
3082
3083   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3084     {
3085       /* The default is to enable interworking, so this warning message would
3086          be confusing to users who have just compiled with
3087          eg, -march=armv4.  */
3088       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3089       opts->x_target_flags &= ~MASK_INTERWORK;
3090     }
3091
3092   if (TARGET_THUMB_P (opts->x_target_flags)
3093       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3094     {
3095       warning (0, "target CPU does not support THUMB instructions");
3096       opts->x_target_flags &= ~MASK_THUMB;
3097     }
3098
3099   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3100     {
3101       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3102       opts->x_target_flags &= ~MASK_APCS_FRAME;
3103     }
3104
3105   /* Callee super interworking implies thumb interworking.  Adding
3106      this to the flags here simplifies the logic elsewhere.  */
3107   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3108     opts->x_target_flags |= MASK_INTERWORK;
3109
3110   /* need to remember initial values so combinaisons of options like
3111      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3112   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3113
3114   if (! opts_set->x_arm_restrict_it)
3115     opts->x_arm_restrict_it = arm_arch8;
3116
3117   /* ARM execution state and M profile don't have [restrict] IT.  */
3118   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3119     opts->x_arm_restrict_it = 0;
3120
3121   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3122   if (!opts_set->x_arm_restrict_it
3123       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3124     opts->x_arm_restrict_it = 0;
3125
3126   /* Enable -munaligned-access by default for
3127      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3128      i.e. Thumb2 and ARM state only.
3129      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3130      - ARMv8 architecture-base processors.
3131
3132      Disable -munaligned-access by default for
3133      - all pre-ARMv6 architecture-based processors
3134      - ARMv6-M architecture-based processors
3135      - ARMv8-M Baseline processors.  */
3136
3137   if (! opts_set->x_unaligned_access)
3138     {
3139       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3140                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3141     }
3142   else if (opts->x_unaligned_access == 1
3143            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3144     {
3145       warning (0, "target CPU does not support unaligned accesses");
3146      opts->x_unaligned_access = 0;
3147     }
3148
3149   /* Don't warn since it's on by default in -O2.  */
3150   if (TARGET_THUMB1_P (opts->x_target_flags))
3151     opts->x_flag_schedule_insns = 0;
3152   else
3153     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3154
3155   /* Disable shrink-wrap when optimizing function for size, since it tends to
3156      generate additional returns.  */
3157   if (optimize_function_for_size_p (cfun)
3158       && TARGET_THUMB2_P (opts->x_target_flags))
3159     opts->x_flag_shrink_wrap = false;
3160   else
3161     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3162
3163   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3164      - epilogue_insns - does not accurately model the corresponding insns
3165      emitted in the asm file.  In particular, see the comment in thumb_exit
3166      'Find out how many of the (return) argument registers we can corrupt'.
3167      As a consequence, the epilogue may clobber registers without fipa-ra
3168      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3169      TODO: Accurately model clobbers for epilogue_insns and reenable
3170      fipa-ra.  */
3171   if (TARGET_THUMB1_P (opts->x_target_flags))
3172     opts->x_flag_ipa_ra = 0;
3173   else
3174     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3175
3176   /* Thumb2 inline assembly code should always use unified syntax.
3177      This will apply to ARM and Thumb1 eventually.  */
3178   if (TARGET_THUMB2_P (opts->x_target_flags))
3179     opts->x_inline_asm_unified = true;
3180
3181   if (arm_stack_protector_guard == SSP_GLOBAL
3182       && opts->x_arm_stack_protector_guard_offset_str)
3183     {
3184       error ("incompatible options %<-mstack-protector-guard=global%> and "
3185              "%<-mstack-protector-guard-offset=%s%>",
3186              arm_stack_protector_guard_offset_str);
3187     }
3188
3189   if (opts->x_arm_stack_protector_guard_offset_str)
3190     {
3191       char *end;
3192       const char *str = arm_stack_protector_guard_offset_str;
3193       errno = 0;
3194       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3195       if (!*str || *end || errno)
3196         error ("%qs is not a valid offset in %qs", str,
3197                "-mstack-protector-guard-offset=");
3198       arm_stack_protector_guard_offset = offs;
3199     }
3200
3201 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3202   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3203 #endif
3204 }
3205
3206 static sbitmap isa_all_fpubits_internal;
3207 static sbitmap isa_all_fpbits;
3208 static sbitmap isa_quirkbits;
3209
3210 /* Configure a build target TARGET from the user-specified options OPTS and
3211    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3212    architecture have been specified, but the two are not identical.  */
3213 void
3214 arm_configure_build_target (struct arm_build_target *target,
3215                             struct cl_target_option *opts,
3216                             bool warn_compatible)
3217 {
3218   const cpu_option *arm_selected_tune = NULL;
3219   const arch_option *arm_selected_arch = NULL;
3220   const cpu_option *arm_selected_cpu = NULL;
3221   const arm_fpu_desc *arm_selected_fpu = NULL;
3222   const char *tune_opts = NULL;
3223   const char *arch_opts = NULL;
3224   const char *cpu_opts = NULL;
3225
3226   bitmap_clear (target->isa);
3227   target->core_name = NULL;
3228   target->arch_name = NULL;
3229
3230   if (opts->x_arm_arch_string)
3231     {
3232       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3233                                                       "-march",
3234                                                       opts->x_arm_arch_string);
3235       arch_opts = strchr (opts->x_arm_arch_string, '+');
3236     }
3237
3238   if (opts->x_arm_cpu_string)
3239     {
3240       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3241                                                     opts->x_arm_cpu_string);
3242       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3243       arm_selected_tune = arm_selected_cpu;
3244       /* If taking the tuning from -mcpu, we don't need to rescan the
3245          options for tuning.  */
3246     }
3247
3248   if (opts->x_arm_tune_string)
3249     {
3250       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3251                                                      opts->x_arm_tune_string);
3252       tune_opts = strchr (opts->x_arm_tune_string, '+');
3253     }
3254
3255   if (arm_selected_arch)
3256     {
3257       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3258       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3259                                  arch_opts);
3260
3261       if (arm_selected_cpu)
3262         {
3263           auto_sbitmap cpu_isa (isa_num_bits);
3264           auto_sbitmap isa_delta (isa_num_bits);
3265
3266           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3267           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3268                                      cpu_opts);
3269           bitmap_xor (isa_delta, cpu_isa, target->isa);
3270           /* Ignore any bits that are quirk bits.  */
3271           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3272           /* If the user (or the default configuration) has specified a
3273              specific FPU, then ignore any bits that depend on the FPU
3274              configuration.  Do similarly if using the soft-float
3275              ABI.  */
3276           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3277               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3278             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3279
3280           if (!bitmap_empty_p (isa_delta))
3281             {
3282               if (warn_compatible)
3283                 warning (0, "switch %<-mcpu=%s%> conflicts "
3284                          "with switch %<-march=%s%>",
3285                          opts->x_arm_cpu_string,
3286                          opts->x_arm_arch_string);
3287
3288               /* -march wins for code generation.
3289                  -mcpu wins for default tuning.  */
3290               if (!arm_selected_tune)
3291                 arm_selected_tune = arm_selected_cpu;
3292
3293               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3294               target->arch_name = arm_selected_arch->common.name;
3295             }
3296           else
3297             {
3298               /* Architecture and CPU are essentially the same.
3299                  Prefer the CPU setting.  */
3300               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3301               target->core_name = arm_selected_cpu->common.name;
3302               /* Copy the CPU's capabilities, so that we inherit the
3303                  appropriate extensions and quirks.  */
3304               bitmap_copy (target->isa, cpu_isa);
3305             }
3306         }
3307       else
3308         {
3309           /* Pick a CPU based on the architecture.  */
3310           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3311           target->arch_name = arm_selected_arch->common.name;
3312           /* Note: target->core_name is left unset in this path.  */
3313         }
3314     }
3315   else if (arm_selected_cpu)
3316     {
3317       target->core_name = arm_selected_cpu->common.name;
3318       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3319       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3320                                  cpu_opts);
3321       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3322     }
3323   /* If the user did not specify a processor or architecture, choose
3324      one for them.  */
3325   else
3326     {
3327       const cpu_option *sel;
3328       auto_sbitmap sought_isa (isa_num_bits);
3329       bitmap_clear (sought_isa);
3330       auto_sbitmap default_isa (isa_num_bits);
3331
3332       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3333                                                     TARGET_CPU_DEFAULT);
3334       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3335       gcc_assert (arm_selected_cpu->common.name);
3336
3337       /* RWE: All of the selection logic below (to the end of this
3338          'if' clause) looks somewhat suspect.  It appears to be mostly
3339          there to support forcing thumb support when the default CPU
3340          does not have thumb (somewhat dubious in terms of what the
3341          user might be expecting).  I think it should be removed once
3342          support for the pre-thumb era cores is removed.  */
3343       sel = arm_selected_cpu;
3344       arm_initialize_isa (default_isa, sel->common.isa_bits);
3345       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3346                                  cpu_opts);
3347
3348       /* Now check to see if the user has specified any command line
3349          switches that require certain abilities from the cpu.  */
3350
3351       if (TARGET_INTERWORK || TARGET_THUMB)
3352         bitmap_set_bit (sought_isa, isa_bit_thumb);
3353
3354       /* If there are such requirements and the default CPU does not
3355          satisfy them, we need to run over the complete list of
3356          cores looking for one that is satisfactory.  */
3357       if (!bitmap_empty_p (sought_isa)
3358           && !bitmap_subset_p (sought_isa, default_isa))
3359         {
3360           auto_sbitmap candidate_isa (isa_num_bits);
3361           /* We're only interested in a CPU with at least the
3362              capabilities of the default CPU and the required
3363              additional features.  */
3364           bitmap_ior (default_isa, default_isa, sought_isa);
3365
3366           /* Try to locate a CPU type that supports all of the abilities
3367              of the default CPU, plus the extra abilities requested by
3368              the user.  */
3369           for (sel = all_cores; sel->common.name != NULL; sel++)
3370             {
3371               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3372               /* An exact match?  */
3373               if (bitmap_equal_p (default_isa, candidate_isa))
3374                 break;
3375             }
3376
3377           if (sel->common.name == NULL)
3378             {
3379               unsigned current_bit_count = isa_num_bits;
3380               const cpu_option *best_fit = NULL;
3381
3382               /* Ideally we would like to issue an error message here
3383                  saying that it was not possible to find a CPU compatible
3384                  with the default CPU, but which also supports the command
3385                  line options specified by the programmer, and so they
3386                  ought to use the -mcpu=<name> command line option to
3387                  override the default CPU type.
3388
3389                  If we cannot find a CPU that has exactly the
3390                  characteristics of the default CPU and the given
3391                  command line options we scan the array again looking
3392                  for a best match.  The best match must have at least
3393                  the capabilities of the perfect match.  */
3394               for (sel = all_cores; sel->common.name != NULL; sel++)
3395                 {
3396                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3397
3398                   if (bitmap_subset_p (default_isa, candidate_isa))
3399                     {
3400                       unsigned count;
3401
3402                       bitmap_and_compl (candidate_isa, candidate_isa,
3403                                         default_isa);
3404                       count = bitmap_popcount (candidate_isa);
3405
3406                       if (count < current_bit_count)
3407                         {
3408                           best_fit = sel;
3409                           current_bit_count = count;
3410                         }
3411                     }
3412
3413                   gcc_assert (best_fit);
3414                   sel = best_fit;
3415                 }
3416             }
3417           arm_selected_cpu = sel;
3418         }
3419
3420       /* Now we know the CPU, we can finally initialize the target
3421          structure.  */
3422       target->core_name = arm_selected_cpu->common.name;
3423       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3424       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3425                                  cpu_opts);
3426       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3427     }
3428
3429   gcc_assert (arm_selected_cpu);
3430   gcc_assert (arm_selected_arch);
3431
3432   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3433     {
3434       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3435       auto_sbitmap fpu_bits (isa_num_bits);
3436
3437       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3438       /* This should clear out ALL bits relating to the FPU/simd
3439          extensions, to avoid potentially invalid combinations later on
3440          that we can't match.  At present we only clear out those bits
3441          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3442       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3443       bitmap_ior (target->isa, target->isa, fpu_bits);
3444     }
3445
3446   /* If we have the soft-float ABI, clear any feature bits relating to use of
3447      floating-point operations.  They'll just confuse things later on.  */
3448   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3449     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3450
3451   /* There may be implied bits which we still need to enable. These are
3452      non-named features which are needed to complete other sets of features,
3453      but cannot be enabled from arm-cpus.in due to being shared between
3454      multiple fgroups. Each entry in all_implied_fbits is of the form
3455      ante -> cons, meaning that if the feature "ante" is enabled, we should
3456      implicitly enable "cons".  */
3457   const struct fbit_implication *impl = all_implied_fbits;
3458   while (impl->ante)
3459     {
3460       if (bitmap_bit_p (target->isa, impl->ante))
3461         bitmap_set_bit (target->isa, impl->cons);
3462       impl++;
3463     }
3464
3465   if (!arm_selected_tune)
3466     arm_selected_tune = arm_selected_cpu;
3467   else /* Validate the features passed to -mtune.  */
3468     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3469
3470   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3471
3472   /* Finish initializing the target structure.  */
3473   if (!target->arch_name)
3474     target->arch_name = arm_selected_arch->common.name;
3475   target->arch_pp_name = arm_selected_arch->arch;
3476   target->base_arch = arm_selected_arch->base_arch;
3477   target->profile = arm_selected_arch->profile;
3478
3479   target->tune_flags = tune_data->tune_flags;
3480   target->tune = tune_data->tune;
3481   target->tune_core = tune_data->scheduler;
3482 }
3483
3484 /* Fix up any incompatible options that the user has specified.  */
3485 static void
3486 arm_option_override (void)
3487 {
3488   static const enum isa_feature fpu_bitlist_internal[]
3489     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3490   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3491   static const enum isa_feature fp_bitlist[]
3492     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3493   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3494   cl_target_option opts;
3495
3496   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3497   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3498
3499   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3500   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3501   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3502   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3503
3504   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3505
3506   if (!OPTION_SET_P (arm_fpu_index))
3507     {
3508       bool ok;
3509       int fpu_index;
3510
3511       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3512                                   CL_TARGET);
3513       gcc_assert (ok);
3514       arm_fpu_index = (enum fpu_type) fpu_index;
3515     }
3516
3517   cl_target_option_save (&opts, &global_options, &global_options_set);
3518   arm_configure_build_target (&arm_active_target, &opts, true);
3519
3520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3521   SUBTARGET_OVERRIDE_OPTIONS;
3522 #endif
3523
3524   /* Initialize boolean versions of the architectural flags, for use
3525      in the arm.md file and for enabling feature flags.  */
3526   arm_option_reconfigure_globals ();
3527
3528   arm_tune = arm_active_target.tune_core;
3529   tune_flags = arm_active_target.tune_flags;
3530   current_tune = arm_active_target.tune;
3531
3532   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3533   if (TARGET_APCS_FRAME)
3534     flag_shrink_wrap = false;
3535
3536   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3537     {
3538       warning (0, "%<-mapcs-stack-check%> incompatible with "
3539                "%<-mno-apcs-frame%>");
3540       target_flags |= MASK_APCS_FRAME;
3541     }
3542
3543   if (TARGET_POKE_FUNCTION_NAME)
3544     target_flags |= MASK_APCS_FRAME;
3545
3546   if (TARGET_APCS_REENT && flag_pic)
3547     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3548
3549   if (TARGET_APCS_REENT)
3550     warning (0, "APCS reentrant code not supported.  Ignored");
3551
3552   /* Set up some tuning parameters.  */
3553   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3554   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3555   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3556   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3557   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3558   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3559
3560   /* For arm2/3 there is no need to do any scheduling if we are doing
3561      software floating-point.  */
3562   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3563     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3564
3565   /* Override the default structure alignment for AAPCS ABI.  */
3566   if (!OPTION_SET_P (arm_structure_size_boundary))
3567     {
3568       if (TARGET_AAPCS_BASED)
3569         arm_structure_size_boundary = 8;
3570     }
3571   else
3572     {
3573       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3574
3575       if (arm_structure_size_boundary != 8
3576           && arm_structure_size_boundary != 32
3577           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3578         {
3579           if (ARM_DOUBLEWORD_ALIGN)
3580             warning (0,
3581                      "structure size boundary can only be set to 8, 32 or 64");
3582           else
3583             warning (0, "structure size boundary can only be set to 8 or 32");
3584           arm_structure_size_boundary
3585             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3586         }
3587     }
3588
3589   if (TARGET_VXWORKS_RTP)
3590     {
3591       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3592         arm_pic_data_is_text_relative = 0;
3593     }
3594   else if (flag_pic
3595            && !arm_pic_data_is_text_relative
3596            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3597     /* When text & data segments don't have a fixed displacement, the
3598        intended use is with a single, read only, pic base register.
3599        Unless the user explicitly requested not to do that, set
3600        it.  */
3601     target_flags |= MASK_SINGLE_PIC_BASE;
3602
3603   /* If stack checking is disabled, we can use r10 as the PIC register,
3604      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3605   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3606     {
3607       if (TARGET_VXWORKS_RTP)
3608         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3609       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3610     }
3611
3612   if (flag_pic && TARGET_VXWORKS_RTP)
3613     arm_pic_register = 9;
3614
3615   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3616   if (TARGET_FDPIC)
3617     {
3618       arm_pic_register = FDPIC_REGNUM;
3619       if (TARGET_THUMB1)
3620         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3621     }
3622
3623   if (arm_pic_register_string != NULL)
3624     {
3625       int pic_register = decode_reg_name (arm_pic_register_string);
3626
3627       if (!flag_pic)
3628         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3629
3630       /* Prevent the user from choosing an obviously stupid PIC register.  */
3631       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3632                || pic_register == HARD_FRAME_POINTER_REGNUM
3633                || pic_register == STACK_POINTER_REGNUM
3634                || pic_register >= PC_REGNUM
3635                || (TARGET_VXWORKS_RTP
3636                    && (unsigned int) pic_register != arm_pic_register))
3637         error ("unable to use %qs for PIC register", arm_pic_register_string);
3638       else
3639         arm_pic_register = pic_register;
3640     }
3641
3642   if (flag_pic)
3643     target_word_relocations = 1;
3644
3645   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3646   if (fix_cm3_ldrd == 2)
3647     {
3648       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3649         fix_cm3_ldrd = 1;
3650       else
3651         fix_cm3_ldrd = 0;
3652     }
3653
3654   /* Enable fix_vlldm by default if required.  */
3655   if (fix_vlldm == 2)
3656     {
3657       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3658         fix_vlldm = 1;
3659       else
3660         fix_vlldm = 0;
3661     }
3662
3663   /* Enable fix_aes by default if required.  */
3664   if (fix_aes_erratum_1742098 == 2)
3665     {
3666       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3667         fix_aes_erratum_1742098 = 1;
3668       else
3669         fix_aes_erratum_1742098 = 0;
3670     }
3671
3672   /* Hot/Cold partitioning is not currently supported, since we can't
3673      handle literal pool placement in that case.  */
3674   if (flag_reorder_blocks_and_partition)
3675     {
3676       inform (input_location,
3677               "%<-freorder-blocks-and-partition%> not supported "
3678               "on this architecture");
3679       flag_reorder_blocks_and_partition = 0;
3680       flag_reorder_blocks = 1;
3681     }
3682
3683   if (flag_pic)
3684     /* Hoisting PIC address calculations more aggressively provides a small,
3685        but measurable, size reduction for PIC code.  Therefore, we decrease
3686        the bar for unrestricted expression hoisting to the cost of PIC address
3687        calculation, which is 2 instructions.  */
3688     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3689                          param_gcse_unrestricted_cost, 2);
3690
3691   /* ARM EABI defaults to strict volatile bitfields.  */
3692   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3693       && abi_version_at_least(2))
3694     flag_strict_volatile_bitfields = 1;
3695
3696   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3697      have deemed it beneficial (signified by setting
3698      prefetch.num_slots to 1 or more).  */
3699   if (flag_prefetch_loop_arrays < 0
3700       && HAVE_prefetch
3701       && optimize >= 3
3702       && current_tune->prefetch.num_slots > 0)
3703     flag_prefetch_loop_arrays = 1;
3704
3705   /* Set up parameters to be used in prefetching algorithm.  Do not
3706      override the defaults unless we are tuning for a core we have
3707      researched values for.  */
3708   if (current_tune->prefetch.num_slots > 0)
3709     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3710                          param_simultaneous_prefetches,
3711                          current_tune->prefetch.num_slots);
3712   if (current_tune->prefetch.l1_cache_line_size >= 0)
3713     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3714                          param_l1_cache_line_size,
3715                          current_tune->prefetch.l1_cache_line_size);
3716   if (current_tune->prefetch.l1_cache_line_size >= 0)
3717     {
3718       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3719                            param_destruct_interfere_size,
3720                            current_tune->prefetch.l1_cache_line_size);
3721       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3722                            param_construct_interfere_size,
3723                            current_tune->prefetch.l1_cache_line_size);
3724     }
3725   else
3726     {
3727       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3728       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3729          constructive?  */
3730       /* More recent Cortex chips have a 64-byte cache line, but are marked
3731          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3732       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3733                            param_destruct_interfere_size, 64);
3734       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3735                            param_construct_interfere_size, 64);
3736     }
3737
3738   if (current_tune->prefetch.l1_cache_size >= 0)
3739     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3740                          param_l1_cache_size,
3741                          current_tune->prefetch.l1_cache_size);
3742
3743   /* Look through ready list and all of queue for instructions
3744      relevant for L2 auto-prefetcher.  */
3745   int sched_autopref_queue_depth;
3746
3747   switch (current_tune->sched_autopref)
3748     {
3749     case tune_params::SCHED_AUTOPREF_OFF:
3750       sched_autopref_queue_depth = -1;
3751       break;
3752
3753     case tune_params::SCHED_AUTOPREF_RANK:
3754       sched_autopref_queue_depth = 0;
3755       break;
3756
3757     case tune_params::SCHED_AUTOPREF_FULL:
3758       sched_autopref_queue_depth = max_insn_queue_index + 1;
3759       break;
3760
3761     default:
3762       gcc_unreachable ();
3763     }
3764
3765   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3766                        param_sched_autopref_queue_depth,
3767                        sched_autopref_queue_depth);
3768
3769   /* Currently, for slow flash data, we just disable literal pools.  We also
3770      disable it for pure-code.  */
3771   if (target_slow_flash_data || target_pure_code)
3772     arm_disable_literal_pool = true;
3773
3774   /* Disable scheduling fusion by default if it's not armv7 processor
3775      or doesn't prefer ldrd/strd.  */
3776   if (flag_schedule_fusion == 2
3777       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3778     flag_schedule_fusion = 0;
3779
3780   /* Need to remember initial options before they are overriden.  */
3781   init_optimize = build_optimization_node (&global_options,
3782                                            &global_options_set);
3783
3784   arm_options_perform_arch_sanity_checks ();
3785   arm_option_override_internal (&global_options, &global_options_set);
3786   arm_option_check_internal (&global_options);
3787   arm_option_params_internal ();
3788
3789   /* Create the default target_options structure.  */
3790   target_option_default_node = target_option_current_node
3791     = build_target_option_node (&global_options, &global_options_set);
3792
3793   /* Register global variables with the garbage collector.  */
3794   arm_add_gc_roots ();
3795
3796   /* Init initial mode for testing.  */
3797   thumb_flipper = TARGET_THUMB;
3798 }
3799
3800
3801 /* Reconfigure global status flags from the active_target.isa.  */
3802 void
3803 arm_option_reconfigure_globals (void)
3804 {
3805   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3806   arm_base_arch = arm_active_target.base_arch;
3807
3808   /* Initialize boolean versions of the architectural flags, for use
3809      in the arm.md file.  */
3810   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3811   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3812   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3813   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3814   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3815   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3816   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3817   arm_arch6m = arm_arch6 && !arm_arch_notm;
3818   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3819   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3820   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3821   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3822   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3823   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3824   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3825   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3826                                     isa_bit_armv8_1m_main);
3827   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3828   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3829   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3830   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3831   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3832   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3833   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3834   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3835   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3836   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3837   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3838   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3839
3840   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3841   if (arm_fp16_inst)
3842     {
3843       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3844         error ("selected fp16 options are incompatible");
3845       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3846     }
3847
3848   arm_arch_cde = 0;
3849   arm_arch_cde_coproc = 0;
3850   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3851                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3852                     isa_bit_cdecp6, isa_bit_cdecp7};
3853   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3854     {
3855       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3856       if (cde_bit)
3857         {
3858           arm_arch_cde |= cde_bit;
3859           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3860         }
3861     }
3862
3863   /* And finally, set up some quirks.  */
3864   arm_arch_no_volatile_ce
3865     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3866   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3867                                             isa_bit_quirk_armv6kz);
3868
3869   /* Use the cp15 method if it is available.  */
3870   if (target_thread_pointer == TP_AUTO)
3871     {
3872       if (arm_arch6k && !TARGET_THUMB1)
3873         target_thread_pointer = TP_CP15;
3874       else
3875         target_thread_pointer = TP_SOFT;
3876     }
3877
3878   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3879     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3880 }
3881
3882 /* Perform some validation between the desired architecture and the rest of the
3883    options.  */
3884 void
3885 arm_options_perform_arch_sanity_checks (void)
3886 {
3887   /* V5T code we generate is completely interworking capable, so we turn off
3888      TARGET_INTERWORK here to avoid many tests later on.  */
3889
3890   /* XXX However, we must pass the right pre-processor defines to CPP
3891      or GLD can get confused.  This is a hack.  */
3892   if (TARGET_INTERWORK)
3893     arm_cpp_interwork = 1;
3894
3895   if (arm_arch5t)
3896     target_flags &= ~MASK_INTERWORK;
3897
3898   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3899     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3900
3901   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3902     error ("iwmmxt abi requires an iwmmxt capable cpu");
3903
3904   /* BPABI targets use linker tricks to allow interworking on cores
3905      without thumb support.  */
3906   if (TARGET_INTERWORK
3907       && !TARGET_BPABI
3908       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3909     {
3910       warning (0, "target CPU does not support interworking" );
3911       target_flags &= ~MASK_INTERWORK;
3912     }
3913
3914   /* If soft-float is specified then don't use FPU.  */
3915   if (TARGET_SOFT_FLOAT)
3916     arm_fpu_attr = FPU_NONE;
3917   else
3918     arm_fpu_attr = FPU_VFP;
3919
3920   if (TARGET_AAPCS_BASED)
3921     {
3922       if (TARGET_CALLER_INTERWORKING)
3923         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3924       else
3925         if (TARGET_CALLEE_INTERWORKING)
3926           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3927     }
3928
3929   /* __fp16 support currently assumes the core has ldrh.  */
3930   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3931     sorry ("%<__fp16%> and no ldrh");
3932
3933   if (use_cmse && !arm_arch_cmse)
3934     error ("target CPU does not support ARMv8-M Security Extensions");
3935
3936   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3937      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3938   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3939     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3940
3941
3942   if (TARGET_AAPCS_BASED)
3943     {
3944       if (arm_abi == ARM_ABI_IWMMXT)
3945         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3946       else if (TARGET_HARD_FLOAT_ABI)
3947         {
3948           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3949           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3950               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3951             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3952         }
3953       else
3954         arm_pcs_default = ARM_PCS_AAPCS;
3955     }
3956   else
3957     {
3958       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3959         sorry ("%<-mfloat-abi=hard%> and VFP");
3960
3961       if (arm_abi == ARM_ABI_APCS)
3962         arm_pcs_default = ARM_PCS_APCS;
3963       else
3964         arm_pcs_default = ARM_PCS_ATPCS;
3965     }
3966 }
3967
3968 /* Test whether a local function descriptor is canonical, i.e.,
3969    whether we can use GOTOFFFUNCDESC to compute the address of the
3970    function.  */
3971 static bool
3972 arm_fdpic_local_funcdesc_p (rtx fnx)
3973 {
3974   tree fn;
3975   enum symbol_visibility vis;
3976   bool ret;
3977
3978   if (!TARGET_FDPIC)
3979     return true;
3980
3981   if (! SYMBOL_REF_LOCAL_P (fnx))
3982     return false;
3983
3984   fn = SYMBOL_REF_DECL (fnx);
3985
3986   if (! fn)
3987     return false;
3988
3989   vis = DECL_VISIBILITY (fn);
3990
3991   if (vis == VISIBILITY_PROTECTED)
3992     /* Private function descriptors for protected functions are not
3993        canonical.  Temporarily change the visibility to global so that
3994        we can ensure uniqueness of funcdesc pointers.  */
3995     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3996
3997   ret = default_binds_local_p_1 (fn, flag_pic);
3998
3999   DECL_VISIBILITY (fn) = vis;
4000
4001   return ret;
4002 }
4003
4004 static void
4005 arm_add_gc_roots (void)
4006 {
4007   gcc_obstack_init(&minipool_obstack);
4008   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4009 }
4010 \f
4011 /* A table of known ARM exception types.
4012    For use with the interrupt function attribute.  */
4013
4014 typedef struct
4015 {
4016   const char *const arg;
4017   const unsigned long return_value;
4018 }
4019 isr_attribute_arg;
4020
4021 static const isr_attribute_arg isr_attribute_args [] =
4022 {
4023   { "IRQ",   ARM_FT_ISR },
4024   { "irq",   ARM_FT_ISR },
4025   { "FIQ",   ARM_FT_FIQ },
4026   { "fiq",   ARM_FT_FIQ },
4027   { "ABORT", ARM_FT_ISR },
4028   { "abort", ARM_FT_ISR },
4029   { "UNDEF", ARM_FT_EXCEPTION },
4030   { "undef", ARM_FT_EXCEPTION },
4031   { "SWI",   ARM_FT_EXCEPTION },
4032   { "swi",   ARM_FT_EXCEPTION },
4033   { NULL,    ARM_FT_NORMAL }
4034 };
4035
4036 /* Returns the (interrupt) function type of the current
4037    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4038
4039 static unsigned long
4040 arm_isr_value (tree argument)
4041 {
4042   const isr_attribute_arg * ptr;
4043   const char *              arg;
4044
4045   if (!arm_arch_notm)
4046     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4047
4048   /* No argument - default to IRQ.  */
4049   if (argument == NULL_TREE)
4050     return ARM_FT_ISR;
4051
4052   /* Get the value of the argument.  */
4053   if (TREE_VALUE (argument) == NULL_TREE
4054       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4055     return ARM_FT_UNKNOWN;
4056
4057   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4058
4059   /* Check it against the list of known arguments.  */
4060   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4061     if (streq (arg, ptr->arg))
4062       return ptr->return_value;
4063
4064   /* An unrecognized interrupt type.  */
4065   return ARM_FT_UNKNOWN;
4066 }
4067
4068 /* Computes the type of the current function.  */
4069
4070 static unsigned long
4071 arm_compute_func_type (void)
4072 {
4073   unsigned long type = ARM_FT_UNKNOWN;
4074   tree a;
4075   tree attr;
4076
4077   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4078
4079   /* Decide if the current function is volatile.  Such functions
4080      never return, and many memory cycles can be saved by not storing
4081      register values that will never be needed again.  This optimization
4082      was added to speed up context switching in a kernel application.  */
4083   if (optimize > 0
4084       && (TREE_NOTHROW (current_function_decl)
4085           || !(flag_unwind_tables
4086                || (flag_exceptions
4087                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4088       && TREE_THIS_VOLATILE (current_function_decl))
4089     type |= ARM_FT_VOLATILE;
4090
4091   if (cfun->static_chain_decl != NULL)
4092     type |= ARM_FT_NESTED;
4093
4094   attr = DECL_ATTRIBUTES (current_function_decl);
4095
4096   a = lookup_attribute ("naked", attr);
4097   if (a != NULL_TREE)
4098     type |= ARM_FT_NAKED;
4099
4100   a = lookup_attribute ("isr", attr);
4101   if (a == NULL_TREE)
4102     a = lookup_attribute ("interrupt", attr);
4103
4104   if (a == NULL_TREE)
4105     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4106   else
4107     type |= arm_isr_value (TREE_VALUE (a));
4108
4109   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4110     type |= ARM_FT_CMSE_ENTRY;
4111
4112   return type;
4113 }
4114
4115 /* Returns the type of the current function.  */
4116
4117 unsigned long
4118 arm_current_func_type (void)
4119 {
4120   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4121     cfun->machine->func_type = arm_compute_func_type ();
4122
4123   return cfun->machine->func_type;
4124 }
4125
4126 bool
4127 arm_allocate_stack_slots_for_args (void)
4128 {
4129   /* Naked functions should not allocate stack slots for arguments.  */
4130   return !IS_NAKED (arm_current_func_type ());
4131 }
4132
4133 static bool
4134 arm_warn_func_return (tree decl)
4135 {
4136   /* Naked functions are implemented entirely in assembly, including the
4137      return sequence, so suppress warnings about this.  */
4138   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4139 }
4140
4141 \f
4142 /* Output assembler code for a block containing the constant parts
4143    of a trampoline, leaving space for the variable parts.
4144
4145    On the ARM, (if r8 is the static chain regnum, and remembering that
4146    referencing pc adds an offset of 8) the trampoline looks like:
4147            ldr          r8, [pc, #0]
4148            ldr          pc, [pc]
4149            .word        static chain value
4150            .word        function's address
4151    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4152
4153    In FDPIC mode, the trampoline looks like:
4154            .word        trampoline address
4155            .word        trampoline GOT address
4156            ldr          r12, [pc, #8] ; #4 for Arm mode
4157            ldr          r9,  [pc, #8] ; #4 for Arm mode
4158            ldr          pc,  [pc, #8] ; #4 for Arm mode
4159            .word        static chain value
4160            .word        GOT address
4161            .word        function's address
4162 */
4163
4164 static void
4165 arm_asm_trampoline_template (FILE *f)
4166 {
4167   fprintf (f, "\t.syntax unified\n");
4168
4169   if (TARGET_FDPIC)
4170     {
4171       /* The first two words are a function descriptor pointing to the
4172          trampoline code just below.  */
4173       if (TARGET_ARM)
4174         fprintf (f, "\t.arm\n");
4175       else if (TARGET_THUMB2)
4176         fprintf (f, "\t.thumb\n");
4177       else
4178         /* Only ARM and Thumb-2 are supported.  */
4179         gcc_unreachable ();
4180
4181       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4182       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4183       /* Trampoline code which sets the static chain register but also
4184          PIC register before jumping into real code.  */
4185       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4186                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4187                    TARGET_THUMB2 ? 8 : 4);
4188       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4189                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4190                    TARGET_THUMB2 ? 8 : 4);
4191       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4192                    PC_REGNUM, PC_REGNUM,
4193                    TARGET_THUMB2 ? 8 : 4);
4194       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4195     }
4196   else if (TARGET_ARM)
4197     {
4198       fprintf (f, "\t.arm\n");
4199       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4200       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4201     }
4202   else if (TARGET_THUMB2)
4203     {
4204       fprintf (f, "\t.thumb\n");
4205       /* The Thumb-2 trampoline is similar to the arm implementation.
4206          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4207       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4208                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4209       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4210     }
4211   else
4212     {
4213       ASM_OUTPUT_ALIGN (f, 2);
4214       fprintf (f, "\t.code\t16\n");
4215       fprintf (f, ".Ltrampoline_start:\n");
4216       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4217       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4218       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4219       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4220       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4221       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4222     }
4223   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4224   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4225 }
4226
4227 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4228
4229 static void
4230 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4231 {
4232   rtx fnaddr, mem, a_tramp;
4233
4234   emit_block_move (m_tramp, assemble_trampoline_template (),
4235                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4236
4237   if (TARGET_FDPIC)
4238     {
4239       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4240       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4241       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4242       /* The function start address is at offset 8, but in Thumb mode
4243          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4244          below.  */
4245       rtx trampoline_code_start
4246         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4247
4248       /* Write initial funcdesc which points to the trampoline.  */
4249       mem = adjust_address (m_tramp, SImode, 0);
4250       emit_move_insn (mem, trampoline_code_start);
4251       mem = adjust_address (m_tramp, SImode, 4);
4252       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4253       /* Setup static chain.  */
4254       mem = adjust_address (m_tramp, SImode, 20);
4255       emit_move_insn (mem, chain_value);
4256       /* GOT + real function entry point.  */
4257       mem = adjust_address (m_tramp, SImode, 24);
4258       emit_move_insn (mem, gotaddr);
4259       mem = adjust_address (m_tramp, SImode, 28);
4260       emit_move_insn (mem, fnaddr);
4261     }
4262   else
4263     {
4264       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4265       emit_move_insn (mem, chain_value);
4266
4267       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4268       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4269       emit_move_insn (mem, fnaddr);
4270     }
4271
4272   a_tramp = XEXP (m_tramp, 0);
4273   maybe_emit_call_builtin___clear_cache (a_tramp,
4274                                          plus_constant (ptr_mode,
4275                                                         a_tramp,
4276                                                         TRAMPOLINE_SIZE));
4277 }
4278
4279 /* Thumb trampolines should be entered in thumb mode, so set
4280    the bottom bit of the address.  */
4281
4282 static rtx
4283 arm_trampoline_adjust_address (rtx addr)
4284 {
4285   /* For FDPIC don't fix trampoline address since it's a function
4286      descriptor and not a function address.  */
4287   if (TARGET_THUMB && !TARGET_FDPIC)
4288     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4289                                 NULL, 0, OPTAB_LIB_WIDEN);
4290   return addr;
4291 }
4292 \f
4293 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4294    includes call-clobbered registers too.  If this is a leaf function
4295    we can just examine the registers used by the RTL, but otherwise we
4296    have to assume that whatever function is called might clobber
4297    anything, and so we have to save all the call-clobbered registers
4298    as well.  */
4299 static inline bool reg_needs_saving_p (unsigned reg)
4300 {
4301   unsigned long func_type = arm_current_func_type ();
4302
4303   if (IS_INTERRUPT (func_type))
4304     if (df_regs_ever_live_p (reg)
4305         /* Save call-clobbered core registers.  */
4306         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4307       return true;
4308     else
4309       return false;
4310   else
4311     if (!df_regs_ever_live_p (reg)
4312         || call_used_or_fixed_reg_p (reg))
4313       return false;
4314     else
4315       return true;
4316 }
4317
4318 /* Return 1 if it is possible to return using a single instruction.
4319    If SIBLING is non-null, this is a test for a return before a sibling
4320    call.  SIBLING is the call insn, so we can examine its register usage.  */
4321
4322 int
4323 use_return_insn (int iscond, rtx sibling)
4324 {
4325   int regno;
4326   unsigned int func_type;
4327   unsigned long saved_int_regs;
4328   unsigned HOST_WIDE_INT stack_adjust;
4329   arm_stack_offsets *offsets;
4330
4331   /* Never use a return instruction before reload has run.  */
4332   if (!reload_completed)
4333     return 0;
4334
4335   func_type = arm_current_func_type ();
4336
4337   /* Naked, volatile and stack alignment functions need special
4338      consideration.  */
4339   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4340     return 0;
4341
4342   /* So do interrupt functions that use the frame pointer and Thumb
4343      interrupt functions.  */
4344   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4345     return 0;
4346
4347   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4348       && !optimize_function_for_size_p (cfun))
4349     return 0;
4350
4351   offsets = arm_get_frame_offsets ();
4352   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4353
4354   /* As do variadic functions.  */
4355   if (crtl->args.pretend_args_size
4356       || cfun->machine->uses_anonymous_args
4357       /* Or if the function calls __builtin_eh_return () */
4358       || crtl->calls_eh_return
4359       /* Or if the function calls alloca */
4360       || cfun->calls_alloca
4361       /* Or if there is a stack adjustment.  However, if the stack pointer
4362          is saved on the stack, we can use a pre-incrementing stack load.  */
4363       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4364                                  && stack_adjust == 4))
4365       /* Or if the static chain register was saved above the frame, under the
4366          assumption that the stack pointer isn't saved on the stack.  */
4367       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4368           && arm_compute_static_chain_stack_bytes() != 0))
4369     return 0;
4370
4371   saved_int_regs = offsets->saved_regs_mask;
4372
4373   /* Unfortunately, the insn
4374
4375        ldmib sp, {..., sp, ...}
4376
4377      triggers a bug on most SA-110 based devices, such that the stack
4378      pointer won't be correctly restored if the instruction takes a
4379      page fault.  We work around this problem by popping r3 along with
4380      the other registers, since that is never slower than executing
4381      another instruction.
4382
4383      We test for !arm_arch5t here, because code for any architecture
4384      less than this could potentially be run on one of the buggy
4385      chips.  */
4386   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4387     {
4388       /* Validate that r3 is a call-clobbered register (always true in
4389          the default abi) ...  */
4390       if (!call_used_or_fixed_reg_p (3))
4391         return 0;
4392
4393       /* ... that it isn't being used for a return value ... */
4394       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4395         return 0;
4396
4397       /* ... or for a tail-call argument ...  */
4398       if (sibling)
4399         {
4400           gcc_assert (CALL_P (sibling));
4401
4402           if (find_regno_fusage (sibling, USE, 3))
4403             return 0;
4404         }
4405
4406       /* ... and that there are no call-saved registers in r0-r2
4407          (always true in the default ABI).  */
4408       if (saved_int_regs & 0x7)
4409         return 0;
4410     }
4411
4412   /* Can't be done if interworking with Thumb, and any registers have been
4413      stacked.  */
4414   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4415     return 0;
4416
4417   /* On StrongARM, conditional returns are expensive if they aren't
4418      taken and multiple registers have been stacked.  */
4419   if (iscond && arm_tune_strongarm)
4420     {
4421       /* Conditional return when just the LR is stored is a simple
4422          conditional-load instruction, that's not expensive.  */
4423       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4424         return 0;
4425
4426       if (flag_pic
4427           && arm_pic_register != INVALID_REGNUM
4428           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4429         return 0;
4430     }
4431
4432   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4433      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4434      also needs several instructions to save and restore FP context.  */
4435   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4436     return 0;
4437
4438   /* If there are saved registers but the LR isn't saved, then we need
4439      two instructions for the return.  */
4440   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4441     return 0;
4442
4443   /* Can't be done if any of the VFP regs are pushed,
4444      since this also requires an insn.  */
4445   if (TARGET_VFP_BASE)
4446     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4447       if (reg_needs_saving_p (regno))
4448         return 0;
4449
4450   if (TARGET_REALLY_IWMMXT)
4451     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4452       if (reg_needs_saving_p (regno))
4453         return 0;
4454
4455   return 1;
4456 }
4457
4458 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4459    shrink-wrapping if possible.  This is the case if we need to emit a
4460    prologue, which we can test by looking at the offsets.  */
4461 bool
4462 use_simple_return_p (void)
4463 {
4464   arm_stack_offsets *offsets;
4465
4466   /* Note this function can be called before or after reload.  */
4467   if (!reload_completed)
4468     arm_compute_frame_layout ();
4469
4470   offsets = arm_get_frame_offsets ();
4471   return offsets->outgoing_args != 0;
4472 }
4473
4474 /* Return TRUE if int I is a valid immediate ARM constant.  */
4475
4476 int
4477 const_ok_for_arm (HOST_WIDE_INT i)
4478 {
4479   int lowbit;
4480
4481   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4482      be all zero, or all one.  */
4483   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4484       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4485           != ((~(unsigned HOST_WIDE_INT) 0)
4486               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4487     return FALSE;
4488
4489   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4490
4491   /* Fast return for 0 and small values.  We must do this for zero, since
4492      the code below can't handle that one case.  */
4493   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4494     return TRUE;
4495
4496   /* Get the number of trailing zeros.  */
4497   lowbit = ffs((int) i) - 1;
4498
4499   /* Only even shifts are allowed in ARM mode so round down to the
4500      nearest even number.  */
4501   if (TARGET_ARM)
4502     lowbit &= ~1;
4503
4504   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4505     return TRUE;
4506
4507   if (TARGET_ARM)
4508     {
4509       /* Allow rotated constants in ARM mode.  */
4510       if (lowbit <= 4
4511            && ((i & ~0xc000003f) == 0
4512                || (i & ~0xf000000f) == 0
4513                || (i & ~0xfc000003) == 0))
4514         return TRUE;
4515     }
4516   else if (TARGET_THUMB2)
4517     {
4518       HOST_WIDE_INT v;
4519
4520       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4521       v = i & 0xff;
4522       v |= v << 16;
4523       if (i == v || i == (v | (v << 8)))
4524         return TRUE;
4525
4526       /* Allow repeated pattern 0xXY00XY00.  */
4527       v = i & 0xff00;
4528       v |= v << 16;
4529       if (i == v)
4530         return TRUE;
4531     }
4532   else if (TARGET_HAVE_MOVT)
4533     {
4534       /* Thumb-1 Targets with MOVT.  */
4535       if (i > 0xffff)
4536         return FALSE;
4537       else
4538         return TRUE;
4539     }
4540
4541   return FALSE;
4542 }
4543
4544 /* Return true if I is a valid constant for the operation CODE.  */
4545 int
4546 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4547 {
4548   if (const_ok_for_arm (i))
4549     return 1;
4550
4551   switch (code)
4552     {
4553     case SET:
4554       /* See if we can use movw.  */
4555       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4556         return 1;
4557       else
4558         /* Otherwise, try mvn.  */
4559         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4560
4561     case PLUS:
4562       /* See if we can use addw or subw.  */
4563       if (TARGET_THUMB2
4564           && ((i & 0xfffff000) == 0
4565               || ((-i) & 0xfffff000) == 0))
4566         return 1;
4567       /* Fall through.  */
4568     case COMPARE:
4569     case EQ:
4570     case NE:
4571     case GT:
4572     case LE:
4573     case LT:
4574     case GE:
4575     case GEU:
4576     case LTU:
4577     case GTU:
4578     case LEU:
4579     case UNORDERED:
4580     case ORDERED:
4581     case UNEQ:
4582     case UNGE:
4583     case UNLT:
4584     case UNGT:
4585     case UNLE:
4586       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4587
4588     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4589     case XOR:
4590       return 0;
4591
4592     case IOR:
4593       if (TARGET_THUMB2)
4594         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4595       return 0;
4596
4597     case AND:
4598       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600     default:
4601       gcc_unreachable ();
4602     }
4603 }
4604
4605 /* Return true if I is a valid di mode constant for the operation CODE.  */
4606 int
4607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4608 {
4609   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4610   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4611   rtx hi = GEN_INT (hi_val);
4612   rtx lo = GEN_INT (lo_val);
4613
4614   if (TARGET_THUMB1)
4615     return 0;
4616
4617   switch (code)
4618     {
4619     case AND:
4620     case IOR:
4621     case XOR:
4622       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4623              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4624     case PLUS:
4625       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4626
4627     default:
4628       return 0;
4629     }
4630 }
4631
4632 /* Emit a sequence of insns to handle a large constant.
4633    CODE is the code of the operation required, it can be any of SET, PLUS,
4634    IOR, AND, XOR, MINUS;
4635    MODE is the mode in which the operation is being performed;
4636    VAL is the integer to operate on;
4637    SOURCE is the other operand (a register, or a null-pointer for SET);
4638    SUBTARGETS means it is safe to create scratch registers if that will
4639    either produce a simpler sequence, or we will want to cse the values.
4640    Return value is the number of insns emitted.  */
4641
4642 /* ??? Tweak this for thumb2.  */
4643 int
4644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4645                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4646 {
4647   rtx cond;
4648
4649   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4650     cond = COND_EXEC_TEST (PATTERN (insn));
4651   else
4652     cond = NULL_RTX;
4653
4654   if (subtargets || code == SET
4655       || (REG_P (target) && REG_P (source)
4656           && REGNO (target) != REGNO (source)))
4657     {
4658       /* After arm_reorg has been called, we can't fix up expensive
4659          constants by pushing them into memory so we must synthesize
4660          them in-line, regardless of the cost.  This is only likely to
4661          be more costly on chips that have load delay slots and we are
4662          compiling without running the scheduler (so no splitting
4663          occurred before the final instruction emission).
4664
4665          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4666       */
4667       if (!cfun->machine->after_arm_reorg
4668           && !cond
4669           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4670                                 1, 0)
4671               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4672                  + (code != SET))))
4673         {
4674           if (code == SET)
4675             {
4676               /* Currently SET is the only monadic value for CODE, all
4677                  the rest are diadic.  */
4678               if (TARGET_USE_MOVT)
4679                 arm_emit_movpair (target, GEN_INT (val));
4680               else
4681                 emit_set_insn (target, GEN_INT (val));
4682
4683               return 1;
4684             }
4685           else
4686             {
4687               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4688
4689               if (TARGET_USE_MOVT)
4690                 arm_emit_movpair (temp, GEN_INT (val));
4691               else
4692                 emit_set_insn (temp, GEN_INT (val));
4693
4694               /* For MINUS, the value is subtracted from, since we never
4695                  have subtraction of a constant.  */
4696               if (code == MINUS)
4697                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4698               else
4699                 emit_set_insn (target,
4700                                gen_rtx_fmt_ee (code, mode, source, temp));
4701               return 2;
4702             }
4703         }
4704     }
4705
4706   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4707                            1);
4708 }
4709
4710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4711    ARM/THUMB2 immediates, and add up to VAL.
4712    Thr function return value gives the number of insns required.  */
4713 static int
4714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4715                             struct four_ints *return_sequence)
4716 {
4717   int best_consecutive_zeros = 0;
4718   int i;
4719   int best_start = 0;
4720   int insns1, insns2;
4721   struct four_ints tmp_sequence;
4722
4723   /* If we aren't targeting ARM, the best place to start is always at
4724      the bottom, otherwise look more closely.  */
4725   if (TARGET_ARM)
4726     {
4727       for (i = 0; i < 32; i += 2)
4728         {
4729           int consecutive_zeros = 0;
4730
4731           if (!(val & (3 << i)))
4732             {
4733               while ((i < 32) && !(val & (3 << i)))
4734                 {
4735                   consecutive_zeros += 2;
4736                   i += 2;
4737                 }
4738               if (consecutive_zeros > best_consecutive_zeros)
4739                 {
4740                   best_consecutive_zeros = consecutive_zeros;
4741                   best_start = i - consecutive_zeros;
4742                 }
4743               i -= 2;
4744             }
4745         }
4746     }
4747
4748   /* So long as it won't require any more insns to do so, it's
4749      desirable to emit a small constant (in bits 0...9) in the last
4750      insn.  This way there is more chance that it can be combined with
4751      a later addressing insn to form a pre-indexed load or store
4752      operation.  Consider:
4753
4754            *((volatile int *)0xe0000100) = 1;
4755            *((volatile int *)0xe0000110) = 2;
4756
4757      We want this to wind up as:
4758
4759             mov rA, #0xe0000000
4760             mov rB, #1
4761             str rB, [rA, #0x100]
4762             mov rB, #2
4763             str rB, [rA, #0x110]
4764
4765      rather than having to synthesize both large constants from scratch.
4766
4767      Therefore, we calculate how many insns would be required to emit
4768      the constant starting from `best_start', and also starting from
4769      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4770      yield a shorter sequence, we may as well use zero.  */
4771   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4772   if (best_start != 0
4773       && ((HOST_WIDE_INT_1U << best_start) < val))
4774     {
4775       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4776       if (insns2 <= insns1)
4777         {
4778           *return_sequence = tmp_sequence;
4779           insns1 = insns2;
4780         }
4781     }
4782
4783   return insns1;
4784 }
4785
4786 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4787 static int
4788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4789                              struct four_ints *return_sequence, int i)
4790 {
4791   int remainder = val & 0xffffffff;
4792   int insns = 0;
4793
4794   /* Try and find a way of doing the job in either two or three
4795      instructions.
4796
4797      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4798      location.  We start at position I.  This may be the MSB, or
4799      optimial_immediate_sequence may have positioned it at the largest block
4800      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4801      wrapping around to the top of the word when we drop off the bottom.
4802      In the worst case this code should produce no more than four insns.
4803
4804      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4805      constants, shifted to any arbitrary location.  We should always start
4806      at the MSB.  */
4807   do
4808     {
4809       int end;
4810       unsigned int b1, b2, b3, b4;
4811       unsigned HOST_WIDE_INT result;
4812       int loc;
4813
4814       gcc_assert (insns < 4);
4815
4816       if (i <= 0)
4817         i += 32;
4818
4819       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4820       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4821         {
4822           loc = i;
4823           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4824             /* We can use addw/subw for the last 12 bits.  */
4825             result = remainder;
4826           else
4827             {
4828               /* Use an 8-bit shifted/rotated immediate.  */
4829               end = i - 8;
4830               if (end < 0)
4831                 end += 32;
4832               result = remainder & ((0x0ff << end)
4833                                    | ((i < end) ? (0xff >> (32 - end))
4834                                                 : 0));
4835               i -= 8;
4836             }
4837         }
4838       else
4839         {
4840           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4841              arbitrary shifts.  */
4842           i -= TARGET_ARM ? 2 : 1;
4843           continue;
4844         }
4845
4846       /* Next, see if we can do a better job with a thumb2 replicated
4847          constant.
4848
4849          We do it this way around to catch the cases like 0x01F001E0 where
4850          two 8-bit immediates would work, but a replicated constant would
4851          make it worse.
4852
4853          TODO: 16-bit constants that don't clear all the bits, but still win.
4854          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4855       if (TARGET_THUMB2)
4856         {
4857           b1 = (remainder & 0xff000000) >> 24;
4858           b2 = (remainder & 0x00ff0000) >> 16;
4859           b3 = (remainder & 0x0000ff00) >> 8;
4860           b4 = remainder & 0xff;
4861
4862           if (loc > 24)
4863             {
4864               /* The 8-bit immediate already found clears b1 (and maybe b2),
4865                  but must leave b3 and b4 alone.  */
4866
4867               /* First try to find a 32-bit replicated constant that clears
4868                  almost everything.  We can assume that we can't do it in one,
4869                  or else we wouldn't be here.  */
4870               unsigned int tmp = b1 & b2 & b3 & b4;
4871               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4872                                   + (tmp << 24);
4873               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4874                                             + (tmp == b3) + (tmp == b4);
4875               if (tmp
4876                   && (matching_bytes >= 3
4877                       || (matching_bytes == 2
4878                           && const_ok_for_op (remainder & ~tmp2, code))))
4879                 {
4880                   /* At least 3 of the bytes match, and the fourth has at
4881                      least as many bits set, or two of the bytes match
4882                      and it will only require one more insn to finish.  */
4883                   result = tmp2;
4884                   i = tmp != b1 ? 32
4885                       : tmp != b2 ? 24
4886                       : tmp != b3 ? 16
4887                       : 8;
4888                 }
4889
4890               /* Second, try to find a 16-bit replicated constant that can
4891                  leave three of the bytes clear.  If b2 or b4 is already
4892                  zero, then we can.  If the 8-bit from above would not
4893                  clear b2 anyway, then we still win.  */
4894               else if (b1 == b3 && (!b2 || !b4
4895                                || (remainder & 0x00ff0000 & ~result)))
4896                 {
4897                   result = remainder & 0xff00ff00;
4898                   i = 24;
4899                 }
4900             }
4901           else if (loc > 16)
4902             {
4903               /* The 8-bit immediate already found clears b2 (and maybe b3)
4904                  and we don't get here unless b1 is alredy clear, but it will
4905                  leave b4 unchanged.  */
4906
4907               /* If we can clear b2 and b4 at once, then we win, since the
4908                  8-bits couldn't possibly reach that far.  */
4909               if (b2 == b4)
4910                 {
4911                   result = remainder & 0x00ff00ff;
4912                   i = 16;
4913                 }
4914             }
4915         }
4916
4917       return_sequence->i[insns++] = result;
4918       remainder &= ~result;
4919
4920       if (code == SET || code == MINUS)
4921         code = PLUS;
4922     }
4923   while (remainder);
4924
4925   return insns;
4926 }
4927
4928 /* Emit an instruction with the indicated PATTERN.  If COND is
4929    non-NULL, conditionalize the execution of the instruction on COND
4930    being true.  */
4931
4932 static void
4933 emit_constant_insn (rtx cond, rtx pattern)
4934 {
4935   if (cond)
4936     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4937   emit_insn (pattern);
4938 }
4939
4940 /* As above, but extra parameter GENERATE which, if clear, suppresses
4941    RTL generation.  */
4942
4943 static int
4944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4945                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4946                   int subtargets, int generate)
4947 {
4948   int can_invert = 0;
4949   int can_negate = 0;
4950   int final_invert = 0;
4951   int i;
4952   int set_sign_bit_copies = 0;
4953   int clear_sign_bit_copies = 0;
4954   int clear_zero_bit_copies = 0;
4955   int set_zero_bit_copies = 0;
4956   int insns = 0, neg_insns, inv_insns;
4957   unsigned HOST_WIDE_INT temp1, temp2;
4958   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4959   struct four_ints *immediates;
4960   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4961
4962   /* Find out which operations are safe for a given CODE.  Also do a quick
4963      check for degenerate cases; these can occur when DImode operations
4964      are split.  */
4965   switch (code)
4966     {
4967     case SET:
4968       can_invert = 1;
4969       break;
4970
4971     case PLUS:
4972       can_negate = 1;
4973       break;
4974
4975     case IOR:
4976       if (remainder == 0xffffffff)
4977         {
4978           if (generate)
4979             emit_constant_insn (cond,
4980                                 gen_rtx_SET (target,
4981                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4982           return 1;
4983         }
4984
4985       if (remainder == 0)
4986         {
4987           if (reload_completed && rtx_equal_p (target, source))
4988             return 0;
4989
4990           if (generate)
4991             emit_constant_insn (cond, gen_rtx_SET (target, source));
4992           return 1;
4993         }
4994       break;
4995
4996     case AND:
4997       if (remainder == 0)
4998         {
4999           if (generate)
5000             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5001           return 1;
5002         }
5003       if (remainder == 0xffffffff)
5004         {
5005           if (reload_completed && rtx_equal_p (target, source))
5006             return 0;
5007           if (generate)
5008             emit_constant_insn (cond, gen_rtx_SET (target, source));
5009           return 1;
5010         }
5011       can_invert = 1;
5012       break;
5013
5014     case XOR:
5015       if (remainder == 0)
5016         {
5017           if (reload_completed && rtx_equal_p (target, source))
5018             return 0;
5019           if (generate)
5020             emit_constant_insn (cond, gen_rtx_SET (target, source));
5021           return 1;
5022         }
5023
5024       if (remainder == 0xffffffff)
5025         {
5026           if (generate)
5027             emit_constant_insn (cond,
5028                                 gen_rtx_SET (target,
5029                                              gen_rtx_NOT (mode, source)));
5030           return 1;
5031         }
5032       final_invert = 1;
5033       break;
5034
5035     case MINUS:
5036       /* We treat MINUS as (val - source), since (source - val) is always
5037          passed as (source + (-val)).  */
5038       if (remainder == 0)
5039         {
5040           if (generate)
5041             emit_constant_insn (cond,
5042                                 gen_rtx_SET (target,
5043                                              gen_rtx_NEG (mode, source)));
5044           return 1;
5045         }
5046       if (const_ok_for_arm (val))
5047         {
5048           if (generate)
5049             emit_constant_insn (cond,
5050                                 gen_rtx_SET (target,
5051                                              gen_rtx_MINUS (mode, GEN_INT (val),
5052                                                             source)));
5053           return 1;
5054         }
5055
5056       break;
5057
5058     default:
5059       gcc_unreachable ();
5060     }
5061
5062   /* If we can do it in one insn get out quickly.  */
5063   if (const_ok_for_op (val, code))
5064     {
5065       if (generate)
5066         emit_constant_insn (cond,
5067                             gen_rtx_SET (target,
5068                                          (source
5069                                           ? gen_rtx_fmt_ee (code, mode, source,
5070                                                             GEN_INT (val))
5071                                           : GEN_INT (val))));
5072       return 1;
5073     }
5074
5075   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5076      insn.  */
5077   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5078       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5079     {
5080       if (generate)
5081         {
5082           if (mode == SImode && i == 16)
5083             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5084                smaller insn.  */
5085             emit_constant_insn (cond,
5086                                 gen_zero_extendhisi2
5087                                 (target, gen_lowpart (HImode, source)));
5088           else
5089             /* Extz only supports SImode, but we can coerce the operands
5090                into that mode.  */
5091             emit_constant_insn (cond,
5092                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5093                                               gen_lowpart (SImode, source),
5094                                               GEN_INT (i), const0_rtx));
5095         }
5096
5097       return 1;
5098     }
5099
5100   /* Calculate a few attributes that may be useful for specific
5101      optimizations.  */
5102   /* Count number of leading zeros.  */
5103   for (i = 31; i >= 0; i--)
5104     {
5105       if ((remainder & (1 << i)) == 0)
5106         clear_sign_bit_copies++;
5107       else
5108         break;
5109     }
5110
5111   /* Count number of leading 1's.  */
5112   for (i = 31; i >= 0; i--)
5113     {
5114       if ((remainder & (1 << i)) != 0)
5115         set_sign_bit_copies++;
5116       else
5117         break;
5118     }
5119
5120   /* Count number of trailing zero's.  */
5121   for (i = 0; i <= 31; i++)
5122     {
5123       if ((remainder & (1 << i)) == 0)
5124         clear_zero_bit_copies++;
5125       else
5126         break;
5127     }
5128
5129   /* Count number of trailing 1's.  */
5130   for (i = 0; i <= 31; i++)
5131     {
5132       if ((remainder & (1 << i)) != 0)
5133         set_zero_bit_copies++;
5134       else
5135         break;
5136     }
5137
5138   switch (code)
5139     {
5140     case SET:
5141       /* See if we can do this by sign_extending a constant that is known
5142          to be negative.  This is a good, way of doing it, since the shift
5143          may well merge into a subsequent insn.  */
5144       if (set_sign_bit_copies > 1)
5145         {
5146           if (const_ok_for_arm
5147               (temp1 = ARM_SIGN_EXTEND (remainder
5148                                         << (set_sign_bit_copies - 1))))
5149             {
5150               if (generate)
5151                 {
5152                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5153                   emit_constant_insn (cond,
5154                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5155                   emit_constant_insn (cond,
5156                                       gen_ashrsi3 (target, new_src,
5157                                                    GEN_INT (set_sign_bit_copies - 1)));
5158                 }
5159               return 2;
5160             }
5161           /* For an inverted constant, we will need to set the low bits,
5162              these will be shifted out of harm's way.  */
5163           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5164           if (const_ok_for_arm (~temp1))
5165             {
5166               if (generate)
5167                 {
5168                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5169                   emit_constant_insn (cond,
5170                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5171                   emit_constant_insn (cond,
5172                                       gen_ashrsi3 (target, new_src,
5173                                                    GEN_INT (set_sign_bit_copies - 1)));
5174                 }
5175               return 2;
5176             }
5177         }
5178
5179       /* See if we can calculate the value as the difference between two
5180          valid immediates.  */
5181       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5182         {
5183           int topshift = clear_sign_bit_copies & ~1;
5184
5185           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5186                                    & (0xff000000 >> topshift));
5187
5188           /* If temp1 is zero, then that means the 9 most significant
5189              bits of remainder were 1 and we've caused it to overflow.
5190              When topshift is 0 we don't need to do anything since we
5191              can borrow from 'bit 32'.  */
5192           if (temp1 == 0 && topshift != 0)
5193             temp1 = 0x80000000 >> (topshift - 1);
5194
5195           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5196
5197           if (const_ok_for_arm (temp2))
5198             {
5199               if (generate)
5200                 {
5201                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5202                   emit_constant_insn (cond,
5203                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5204                   emit_constant_insn (cond,
5205                                       gen_addsi3 (target, new_src,
5206                                                   GEN_INT (-temp2)));
5207                 }
5208
5209               return 2;
5210             }
5211         }
5212
5213       /* See if we can generate this by setting the bottom (or the top)
5214          16 bits, and then shifting these into the other half of the
5215          word.  We only look for the simplest cases, to do more would cost
5216          too much.  Be careful, however, not to generate this when the
5217          alternative would take fewer insns.  */
5218       if (val & 0xffff0000)
5219         {
5220           temp1 = remainder & 0xffff0000;
5221           temp2 = remainder & 0x0000ffff;
5222
5223           /* Overlaps outside this range are best done using other methods.  */
5224           for (i = 9; i < 24; i++)
5225             {
5226               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5227                   && !const_ok_for_arm (temp2))
5228                 {
5229                   rtx new_src = (subtargets
5230                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5231                                  : target);
5232                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5233                                             source, subtargets, generate);
5234                   source = new_src;
5235                   if (generate)
5236                     emit_constant_insn
5237                       (cond,
5238                        gen_rtx_SET
5239                        (target,
5240                         gen_rtx_IOR (mode,
5241                                      gen_rtx_ASHIFT (mode, source,
5242                                                      GEN_INT (i)),
5243                                      source)));
5244                   return insns + 1;
5245                 }
5246             }
5247
5248           /* Don't duplicate cases already considered.  */
5249           for (i = 17; i < 24; i++)
5250             {
5251               if (((temp1 | (temp1 >> i)) == remainder)
5252                   && !const_ok_for_arm (temp1))
5253                 {
5254                   rtx new_src = (subtargets
5255                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5256                                  : target);
5257                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5258                                             source, subtargets, generate);
5259                   source = new_src;
5260                   if (generate)
5261                     emit_constant_insn
5262                       (cond,
5263                        gen_rtx_SET (target,
5264                                     gen_rtx_IOR
5265                                     (mode,
5266                                      gen_rtx_LSHIFTRT (mode, source,
5267                                                        GEN_INT (i)),
5268                                      source)));
5269                   return insns + 1;
5270                 }
5271             }
5272         }
5273       break;
5274
5275     case IOR:
5276     case XOR:
5277       /* If we have IOR or XOR, and the constant can be loaded in a
5278          single instruction, and we can find a temporary to put it in,
5279          then this can be done in two instructions instead of 3-4.  */
5280       if (subtargets
5281           /* TARGET can't be NULL if SUBTARGETS is 0 */
5282           || (reload_completed && !reg_mentioned_p (target, source)))
5283         {
5284           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5285             {
5286               if (generate)
5287                 {
5288                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5289
5290                   emit_constant_insn (cond,
5291                                       gen_rtx_SET (sub, GEN_INT (val)));
5292                   emit_constant_insn (cond,
5293                                       gen_rtx_SET (target,
5294                                                    gen_rtx_fmt_ee (code, mode,
5295                                                                    source, sub)));
5296                 }
5297               return 2;
5298             }
5299         }
5300
5301       if (code == XOR)
5302         break;
5303
5304       /*  Convert.
5305           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5306                              and the remainder 0s for e.g. 0xfff00000)
5307           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5308
5309           This can be done in 2 instructions by using shifts with mov or mvn.
5310           e.g. for
5311           x = x | 0xfff00000;
5312           we generate.
5313           mvn   r0, r0, asl #12
5314           mvn   r0, r0, lsr #12  */
5315       if (set_sign_bit_copies > 8
5316           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5317         {
5318           if (generate)
5319             {
5320               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5321               rtx shift = GEN_INT (set_sign_bit_copies);
5322
5323               emit_constant_insn
5324                 (cond,
5325                  gen_rtx_SET (sub,
5326                               gen_rtx_NOT (mode,
5327                                            gen_rtx_ASHIFT (mode,
5328                                                            source,
5329                                                            shift))));
5330               emit_constant_insn
5331                 (cond,
5332                  gen_rtx_SET (target,
5333                               gen_rtx_NOT (mode,
5334                                            gen_rtx_LSHIFTRT (mode, sub,
5335                                                              shift))));
5336             }
5337           return 2;
5338         }
5339
5340       /* Convert
5341           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5342            to
5343           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5344
5345           For eg. r0 = r0 | 0xfff
5346                mvn      r0, r0, lsr #12
5347                mvn      r0, r0, asl #12
5348
5349       */
5350       if (set_zero_bit_copies > 8
5351           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5352         {
5353           if (generate)
5354             {
5355               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5356               rtx shift = GEN_INT (set_zero_bit_copies);
5357
5358               emit_constant_insn
5359                 (cond,
5360                  gen_rtx_SET (sub,
5361                               gen_rtx_NOT (mode,
5362                                            gen_rtx_LSHIFTRT (mode,
5363                                                              source,
5364                                                              shift))));
5365               emit_constant_insn
5366                 (cond,
5367                  gen_rtx_SET (target,
5368                               gen_rtx_NOT (mode,
5369                                            gen_rtx_ASHIFT (mode, sub,
5370                                                            shift))));
5371             }
5372           return 2;
5373         }
5374
5375       /* This will never be reached for Thumb2 because orn is a valid
5376          instruction. This is for Thumb1 and the ARM 32 bit cases.
5377
5378          x = y | constant (such that ~constant is a valid constant)
5379          Transform this to
5380          x = ~(~y & ~constant).
5381       */
5382       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5383         {
5384           if (generate)
5385             {
5386               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5387               emit_constant_insn (cond,
5388                                   gen_rtx_SET (sub,
5389                                                gen_rtx_NOT (mode, source)));
5390               source = sub;
5391               if (subtargets)
5392                 sub = gen_reg_rtx (mode);
5393               emit_constant_insn (cond,
5394                                   gen_rtx_SET (sub,
5395                                                gen_rtx_AND (mode, source,
5396                                                             GEN_INT (temp1))));
5397               emit_constant_insn (cond,
5398                                   gen_rtx_SET (target,
5399                                                gen_rtx_NOT (mode, sub)));
5400             }
5401           return 3;
5402         }
5403       break;
5404
5405     case AND:
5406       /* See if two shifts will do 2 or more insn's worth of work.  */
5407       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5408         {
5409           HOST_WIDE_INT shift_mask = ((0xffffffff
5410                                        << (32 - clear_sign_bit_copies))
5411                                       & 0xffffffff);
5412
5413           if ((remainder | shift_mask) != 0xffffffff)
5414             {
5415               HOST_WIDE_INT new_val
5416                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5417
5418               if (generate)
5419                 {
5420                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5421                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5422                                             new_src, source, subtargets, 1);
5423                   source = new_src;
5424                 }
5425               else
5426                 {
5427                   rtx targ = subtargets ? NULL_RTX : target;
5428                   insns = arm_gen_constant (AND, mode, cond, new_val,
5429                                             targ, source, subtargets, 0);
5430                 }
5431             }
5432
5433           if (generate)
5434             {
5435               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5436               rtx shift = GEN_INT (clear_sign_bit_copies);
5437
5438               emit_insn (gen_ashlsi3 (new_src, source, shift));
5439               emit_insn (gen_lshrsi3 (target, new_src, shift));
5440             }
5441
5442           return insns + 2;
5443         }
5444
5445       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5446         {
5447           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5448
5449           if ((remainder | shift_mask) != 0xffffffff)
5450             {
5451               HOST_WIDE_INT new_val
5452                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5453               if (generate)
5454                 {
5455                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5456
5457                   insns = arm_gen_constant (AND, mode, cond, new_val,
5458                                             new_src, source, subtargets, 1);
5459                   source = new_src;
5460                 }
5461               else
5462                 {
5463                   rtx targ = subtargets ? NULL_RTX : target;
5464
5465                   insns = arm_gen_constant (AND, mode, cond, new_val,
5466                                             targ, source, subtargets, 0);
5467                 }
5468             }
5469
5470           if (generate)
5471             {
5472               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5473               rtx shift = GEN_INT (clear_zero_bit_copies);
5474
5475               emit_insn (gen_lshrsi3 (new_src, source, shift));
5476               emit_insn (gen_ashlsi3 (target, new_src, shift));
5477             }
5478
5479           return insns + 2;
5480         }
5481
5482       break;
5483
5484     default:
5485       break;
5486     }
5487
5488   /* Calculate what the instruction sequences would be if we generated it
5489      normally, negated, or inverted.  */
5490   if (code == AND)
5491     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5492     insns = 99;
5493   else
5494     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5495
5496   if (can_negate)
5497     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5498                                             &neg_immediates);
5499   else
5500     neg_insns = 99;
5501
5502   if (can_invert || final_invert)
5503     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5504                                             &inv_immediates);
5505   else
5506     inv_insns = 99;
5507
5508   immediates = &pos_immediates;
5509
5510   /* Is the negated immediate sequence more efficient?  */
5511   if (neg_insns < insns && neg_insns <= inv_insns)
5512     {
5513       insns = neg_insns;
5514       immediates = &neg_immediates;
5515     }
5516   else
5517     can_negate = 0;
5518
5519   /* Is the inverted immediate sequence more efficient?
5520      We must allow for an extra NOT instruction for XOR operations, although
5521      there is some chance that the final 'mvn' will get optimized later.  */
5522   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5523     {
5524       insns = inv_insns;
5525       immediates = &inv_immediates;
5526     }
5527   else
5528     {
5529       can_invert = 0;
5530       final_invert = 0;
5531     }
5532
5533   /* Now output the chosen sequence as instructions.  */
5534   if (generate)
5535     {
5536       for (i = 0; i < insns; i++)
5537         {
5538           rtx new_src, temp1_rtx;
5539
5540           temp1 = immediates->i[i];
5541
5542           if (code == SET || code == MINUS)
5543             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5544           else if ((final_invert || i < (insns - 1)) && subtargets)
5545             new_src = gen_reg_rtx (mode);
5546           else
5547             new_src = target;
5548
5549           if (can_invert)
5550             temp1 = ~temp1;
5551           else if (can_negate)
5552             temp1 = -temp1;
5553
5554           temp1 = trunc_int_for_mode (temp1, mode);
5555           temp1_rtx = GEN_INT (temp1);
5556
5557           if (code == SET)
5558             ;
5559           else if (code == MINUS)
5560             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5561           else
5562             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5563
5564           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5565           source = new_src;
5566
5567           if (code == SET)
5568             {
5569               can_negate = can_invert;
5570               can_invert = 0;
5571               code = PLUS;
5572             }
5573           else if (code == MINUS)
5574             code = PLUS;
5575         }
5576     }
5577
5578   if (final_invert)
5579     {
5580       if (generate)
5581         emit_constant_insn (cond, gen_rtx_SET (target,
5582                                                gen_rtx_NOT (mode, source)));
5583       insns++;
5584     }
5585
5586   return insns;
5587 }
5588
5589 /* Return TRUE if op is a constant where both the low and top words are
5590    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5591    we do not have RSC in that case.  */
5592 static bool
5593 arm_const_double_prefer_rsbs_rsc (rtx op)
5594 {
5595   /* Thumb lacks RSC, so we never prefer that sequence.  */
5596   if (TARGET_THUMB || !CONST_INT_P (op))
5597     return false;
5598   HOST_WIDE_INT hi, lo;
5599   lo = UINTVAL (op) & 0xffffffffULL;
5600   hi = UINTVAL (op) >> 32;
5601   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5602 }
5603
5604 /* Canonicalize a comparison so that we are more likely to recognize it.
5605    This can be done for a few constant compares, where we can make the
5606    immediate value easier to load.  */
5607
5608 static void
5609 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5610                              bool op0_preserve_value)
5611 {
5612   machine_mode mode;
5613   unsigned HOST_WIDE_INT i, maxval;
5614
5615   mode = GET_MODE (*op0);
5616   if (mode == VOIDmode)
5617     mode = GET_MODE (*op1);
5618
5619   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5620
5621   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5622      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5623      either reversed or (for constant OP1) adjusted to GE/LT.
5624      Similarly for GTU/LEU in Thumb mode.  */
5625   if (mode == DImode)
5626     {
5627
5628       if (*code == GT || *code == LE
5629           || *code == GTU || *code == LEU)
5630         {
5631           /* Missing comparison.  First try to use an available
5632              comparison.  */
5633           if (CONST_INT_P (*op1))
5634             {
5635               i = INTVAL (*op1);
5636               switch (*code)
5637                 {
5638                 case GT:
5639                 case LE:
5640                   if (i != maxval)
5641                     {
5642                       /* Try to convert to GE/LT, unless that would be more
5643                          expensive.  */
5644                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5645                           && arm_const_double_prefer_rsbs_rsc (*op1))
5646                         return;
5647                       *op1 = GEN_INT (i + 1);
5648                       *code = *code == GT ? GE : LT;
5649                     }
5650                   else
5651                     {
5652                       /* GT maxval is always false, LE maxval is always true.
5653                          We can't fold that away here as we must make a
5654                          comparison, but we can fold them to comparisons
5655                          with the same result that can be handled:
5656                            op0 GT maxval -> op0 LT minval
5657                            op0 LE maxval -> op0 GE minval
5658                          where minval = (-maxval - 1).  */
5659                       *op1 = GEN_INT (-maxval - 1);
5660                       *code = *code == GT ? LT : GE;
5661                     }
5662                   return;
5663
5664                 case GTU:
5665                 case LEU:
5666                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5667                     {
5668                       /* Try to convert to GEU/LTU, unless that would
5669                          be more expensive.  */
5670                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5671                           && arm_const_double_prefer_rsbs_rsc (*op1))
5672                         return;
5673                       *op1 = GEN_INT (i + 1);
5674                       *code = *code == GTU ? GEU : LTU;
5675                     }
5676                   else
5677                     {
5678                       /* GTU ~0 is always false, LEU ~0 is always true.
5679                          We can't fold that away here as we must make a
5680                          comparison, but we can fold them to comparisons
5681                          with the same result that can be handled:
5682                            op0 GTU ~0 -> op0 LTU 0
5683                            op0 LEU ~0 -> op0 GEU 0.  */
5684                       *op1 = const0_rtx;
5685                       *code = *code == GTU ? LTU : GEU;
5686                     }
5687                   return;
5688
5689                 default:
5690                   gcc_unreachable ();
5691                 }
5692             }
5693
5694           if (!op0_preserve_value)
5695             {
5696               std::swap (*op0, *op1);
5697               *code = (int)swap_condition ((enum rtx_code)*code);
5698             }
5699         }
5700       return;
5701     }
5702
5703   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5704      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5705      to facilitate possible combining with a cmp into 'ands'.  */
5706   if (mode == SImode
5707       && GET_CODE (*op0) == ZERO_EXTEND
5708       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5709       && GET_MODE (XEXP (*op0, 0)) == QImode
5710       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5711       && subreg_lowpart_p (XEXP (*op0, 0))
5712       && *op1 == const0_rtx)
5713     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5714                         GEN_INT (255));
5715
5716   /* Comparisons smaller than DImode.  Only adjust comparisons against
5717      an out-of-range constant.  */
5718   if (!CONST_INT_P (*op1)
5719       || const_ok_for_arm (INTVAL (*op1))
5720       || const_ok_for_arm (- INTVAL (*op1)))
5721     return;
5722
5723   i = INTVAL (*op1);
5724
5725   switch (*code)
5726     {
5727     case EQ:
5728     case NE:
5729       return;
5730
5731     case GT:
5732     case LE:
5733       if (i != maxval
5734           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5735         {
5736           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5737           *code = *code == GT ? GE : LT;
5738           return;
5739         }
5740       break;
5741
5742     case GE:
5743     case LT:
5744       if (i != ~maxval
5745           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5746         {
5747           *op1 = GEN_INT (i - 1);
5748           *code = *code == GE ? GT : LE;
5749           return;
5750         }
5751       break;
5752
5753     case GTU:
5754     case LEU:
5755       if (i != ~((unsigned HOST_WIDE_INT) 0)
5756           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5757         {
5758           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5759           *code = *code == GTU ? GEU : LTU;
5760           return;
5761         }
5762       break;
5763
5764     case GEU:
5765     case LTU:
5766       if (i != 0
5767           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5768         {
5769           *op1 = GEN_INT (i - 1);
5770           *code = *code == GEU ? GTU : LEU;
5771           return;
5772         }
5773       break;
5774
5775     default:
5776       gcc_unreachable ();
5777     }
5778 }
5779
5780
5781 /* Define how to find the value returned by a function.  */
5782
5783 static rtx
5784 arm_function_value(const_tree type, const_tree func,
5785                    bool outgoing ATTRIBUTE_UNUSED)
5786 {
5787   machine_mode mode;
5788   int unsignedp ATTRIBUTE_UNUSED;
5789   rtx r ATTRIBUTE_UNUSED;
5790
5791   mode = TYPE_MODE (type);
5792
5793   if (TARGET_AAPCS_BASED)
5794     return aapcs_allocate_return_reg (mode, type, func);
5795
5796   /* Promote integer types.  */
5797   if (INTEGRAL_TYPE_P (type))
5798     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5799
5800   /* Promotes small structs returned in a register to full-word size
5801      for big-endian AAPCS.  */
5802   if (arm_return_in_msb (type))
5803     {
5804       HOST_WIDE_INT size = int_size_in_bytes (type);
5805       if (size % UNITS_PER_WORD != 0)
5806         {
5807           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5808           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5809         }
5810     }
5811
5812   return arm_libcall_value_1 (mode);
5813 }
5814
5815 /* libcall hashtable helpers.  */
5816
5817 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5818 {
5819   static inline hashval_t hash (const rtx_def *);
5820   static inline bool equal (const rtx_def *, const rtx_def *);
5821   static inline void remove (rtx_def *);
5822 };
5823
5824 inline bool
5825 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5826 {
5827   return rtx_equal_p (p1, p2);
5828 }
5829
5830 inline hashval_t
5831 libcall_hasher::hash (const rtx_def *p1)
5832 {
5833   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5834 }
5835
5836 typedef hash_table<libcall_hasher> libcall_table_type;
5837
5838 static void
5839 add_libcall (libcall_table_type *htab, rtx libcall)
5840 {
5841   *htab->find_slot (libcall, INSERT) = libcall;
5842 }
5843
5844 static bool
5845 arm_libcall_uses_aapcs_base (const_rtx libcall)
5846 {
5847   static bool init_done = false;
5848   static libcall_table_type *libcall_htab = NULL;
5849
5850   if (!init_done)
5851     {
5852       init_done = true;
5853
5854       libcall_htab = new libcall_table_type (31);
5855       add_libcall (libcall_htab,
5856                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5857       add_libcall (libcall_htab,
5858                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5859       add_libcall (libcall_htab,
5860                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5861       add_libcall (libcall_htab,
5862                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5863
5864       add_libcall (libcall_htab,
5865                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5866       add_libcall (libcall_htab,
5867                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5868       add_libcall (libcall_htab,
5869                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5870       add_libcall (libcall_htab,
5871                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5872
5873       add_libcall (libcall_htab,
5874                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5875       add_libcall (libcall_htab,
5876                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5877       add_libcall (libcall_htab,
5878                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5879       add_libcall (libcall_htab,
5880                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5881       add_libcall (libcall_htab,
5882                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5883       add_libcall (libcall_htab,
5884                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5885       add_libcall (libcall_htab,
5886                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5887       add_libcall (libcall_htab,
5888                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5889       add_libcall (libcall_htab,
5890                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5891       add_libcall (libcall_htab,
5892                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5893
5894       /* Values from double-precision helper functions are returned in core
5895          registers if the selected core only supports single-precision
5896          arithmetic, even if we are using the hard-float ABI.  The same is
5897          true for single-precision helpers except in case of MVE, because in
5898          MVE we will be using the hard-float ABI on a CPU which doesn't support
5899          single-precision operations in hardware.  In MVE the following check
5900          enables use of emulation for the single-precision arithmetic
5901          operations.  */
5902       if (TARGET_HAVE_MVE)
5903         {
5904           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5905           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5906           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5907           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5908           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5909           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5910           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5911           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5912           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5913           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5914           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5915         }
5916       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5917       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5918       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5919       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5920       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5921       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5922       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5923       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5924       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5925       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5926       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5927       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5928                                                         SFmode));
5929       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5930                                                         DFmode));
5931       add_libcall (libcall_htab,
5932                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5933     }
5934
5935   return libcall && libcall_htab->find (libcall) != NULL;
5936 }
5937
5938 static rtx
5939 arm_libcall_value_1 (machine_mode mode)
5940 {
5941   if (TARGET_AAPCS_BASED)
5942     return aapcs_libcall_value (mode);
5943   else if (TARGET_IWMMXT_ABI
5944            && arm_vector_mode_supported_p (mode))
5945     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5946   else
5947     return gen_rtx_REG (mode, ARG_REGISTER (1));
5948 }
5949
5950 /* Define how to find the value returned by a library function
5951    assuming the value has mode MODE.  */
5952
5953 static rtx
5954 arm_libcall_value (machine_mode mode, const_rtx libcall)
5955 {
5956   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5957       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5958     {
5959       /* The following libcalls return their result in integer registers,
5960          even though they return a floating point value.  */
5961       if (arm_libcall_uses_aapcs_base (libcall))
5962         return gen_rtx_REG (mode, ARG_REGISTER(1));
5963
5964     }
5965
5966   return arm_libcall_value_1 (mode);
5967 }
5968
5969 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5970
5971 static bool
5972 arm_function_value_regno_p (const unsigned int regno)
5973 {
5974   if (regno == ARG_REGISTER (1)
5975       || (TARGET_32BIT
5976           && TARGET_AAPCS_BASED
5977           && TARGET_HARD_FLOAT
5978           && regno == FIRST_VFP_REGNUM)
5979       || (TARGET_IWMMXT_ABI
5980           && regno == FIRST_IWMMXT_REGNUM))
5981     return true;
5982
5983   return false;
5984 }
5985
5986 /* Determine the amount of memory needed to store the possible return
5987    registers of an untyped call.  */
5988 int
5989 arm_apply_result_size (void)
5990 {
5991   int size = 16;
5992
5993   if (TARGET_32BIT)
5994     {
5995       if (TARGET_HARD_FLOAT_ABI)
5996         size += 32;
5997       if (TARGET_IWMMXT_ABI)
5998         size += 8;
5999     }
6000
6001   return size;
6002 }
6003
6004 /* Decide whether TYPE should be returned in memory (true)
6005    or in a register (false).  FNTYPE is the type of the function making
6006    the call.  */
6007 static bool
6008 arm_return_in_memory (const_tree type, const_tree fntype)
6009 {
6010   HOST_WIDE_INT size;
6011
6012   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6013
6014   if (TARGET_AAPCS_BASED)
6015     {
6016       /* Simple, non-aggregate types (ie not including vectors and
6017          complex) are always returned in a register (or registers).
6018          We don't care about which register here, so we can short-cut
6019          some of the detail.  */
6020       if (!AGGREGATE_TYPE_P (type)
6021           && TREE_CODE (type) != VECTOR_TYPE
6022           && TREE_CODE (type) != COMPLEX_TYPE)
6023         return false;
6024
6025       /* Any return value that is no larger than one word can be
6026          returned in r0.  */
6027       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6028         return false;
6029
6030       /* Check any available co-processors to see if they accept the
6031          type as a register candidate (VFP, for example, can return
6032          some aggregates in consecutive registers).  These aren't
6033          available if the call is variadic.  */
6034       if (aapcs_select_return_coproc (type, fntype) >= 0)
6035         return false;
6036
6037       /* Vector values should be returned using ARM registers, not
6038          memory (unless they're over 16 bytes, which will break since
6039          we only have four call-clobbered registers to play with).  */
6040       if (TREE_CODE (type) == VECTOR_TYPE)
6041         return (size < 0 || size > (4 * UNITS_PER_WORD));
6042
6043       /* The rest go in memory.  */
6044       return true;
6045     }
6046
6047   if (TREE_CODE (type) == VECTOR_TYPE)
6048     return (size < 0 || size > (4 * UNITS_PER_WORD));
6049
6050   if (!AGGREGATE_TYPE_P (type) &&
6051       (TREE_CODE (type) != VECTOR_TYPE))
6052     /* All simple types are returned in registers.  */
6053     return false;
6054
6055   if (arm_abi != ARM_ABI_APCS)
6056     {
6057       /* ATPCS and later return aggregate types in memory only if they are
6058          larger than a word (or are variable size).  */
6059       return (size < 0 || size > UNITS_PER_WORD);
6060     }
6061
6062   /* For the arm-wince targets we choose to be compatible with Microsoft's
6063      ARM and Thumb compilers, which always return aggregates in memory.  */
6064 #ifndef ARM_WINCE
6065   /* All structures/unions bigger than one word are returned in memory.
6066      Also catch the case where int_size_in_bytes returns -1.  In this case
6067      the aggregate is either huge or of variable size, and in either case
6068      we will want to return it via memory and not in a register.  */
6069   if (size < 0 || size > UNITS_PER_WORD)
6070     return true;
6071
6072   if (TREE_CODE (type) == RECORD_TYPE)
6073     {
6074       tree field;
6075
6076       /* For a struct the APCS says that we only return in a register
6077          if the type is 'integer like' and every addressable element
6078          has an offset of zero.  For practical purposes this means
6079          that the structure can have at most one non bit-field element
6080          and that this element must be the first one in the structure.  */
6081
6082       /* Find the first field, ignoring non FIELD_DECL things which will
6083          have been created by C++.  */
6084       /* NOTE: This code is deprecated and has not been updated to handle
6085          DECL_FIELD_ABI_IGNORED.  */
6086       for (field = TYPE_FIELDS (type);
6087            field && TREE_CODE (field) != FIELD_DECL;
6088            field = DECL_CHAIN (field))
6089         continue;
6090
6091       if (field == NULL)
6092         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6093
6094       /* Check that the first field is valid for returning in a register.  */
6095
6096       /* ... Floats are not allowed */
6097       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6098         return true;
6099
6100       /* ... Aggregates that are not themselves valid for returning in
6101          a register are not allowed.  */
6102       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6103         return true;
6104
6105       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6106          since they are not addressable.  */
6107       for (field = DECL_CHAIN (field);
6108            field;
6109            field = DECL_CHAIN (field))
6110         {
6111           if (TREE_CODE (field) != FIELD_DECL)
6112             continue;
6113
6114           if (!DECL_BIT_FIELD_TYPE (field))
6115             return true;
6116         }
6117
6118       return false;
6119     }
6120
6121   if (TREE_CODE (type) == UNION_TYPE)
6122     {
6123       tree field;
6124
6125       /* Unions can be returned in registers if every element is
6126          integral, or can be returned in an integer register.  */
6127       for (field = TYPE_FIELDS (type);
6128            field;
6129            field = DECL_CHAIN (field))
6130         {
6131           if (TREE_CODE (field) != FIELD_DECL)
6132             continue;
6133
6134           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6135             return true;
6136
6137           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6138             return true;
6139         }
6140
6141       return false;
6142     }
6143 #endif /* not ARM_WINCE */
6144
6145   /* Return all other types in memory.  */
6146   return true;
6147 }
6148
6149 const struct pcs_attribute_arg
6150 {
6151   const char *arg;
6152   enum arm_pcs value;
6153 } pcs_attribute_args[] =
6154   {
6155     {"aapcs", ARM_PCS_AAPCS},
6156     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6157 #if 0
6158     /* We could recognize these, but changes would be needed elsewhere
6159      * to implement them.  */
6160     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6161     {"atpcs", ARM_PCS_ATPCS},
6162     {"apcs", ARM_PCS_APCS},
6163 #endif
6164     {NULL, ARM_PCS_UNKNOWN}
6165   };
6166
6167 static enum arm_pcs
6168 arm_pcs_from_attribute (tree attr)
6169 {
6170   const struct pcs_attribute_arg *ptr;
6171   const char *arg;
6172
6173   /* Get the value of the argument.  */
6174   if (TREE_VALUE (attr) == NULL_TREE
6175       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6176     return ARM_PCS_UNKNOWN;
6177
6178   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6179
6180   /* Check it against the list of known arguments.  */
6181   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6182     if (streq (arg, ptr->arg))
6183       return ptr->value;
6184
6185   /* An unrecognized interrupt type.  */
6186   return ARM_PCS_UNKNOWN;
6187 }
6188
6189 /* Get the PCS variant to use for this call.  TYPE is the function's type
6190    specification, DECL is the specific declartion.  DECL may be null if
6191    the call could be indirect or if this is a library call.  */
6192 static enum arm_pcs
6193 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6194 {
6195   bool user_convention = false;
6196   enum arm_pcs user_pcs = arm_pcs_default;
6197   tree attr;
6198
6199   gcc_assert (type);
6200
6201   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6202   if (attr)
6203     {
6204       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6205       user_convention = true;
6206     }
6207
6208   if (TARGET_AAPCS_BASED)
6209     {
6210       /* Detect varargs functions.  These always use the base rules
6211          (no argument is ever a candidate for a co-processor
6212          register).  */
6213       bool base_rules = stdarg_p (type);
6214
6215       if (user_convention)
6216         {
6217           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6218             sorry ("non-AAPCS derived PCS variant");
6219           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6220             error ("variadic functions must use the base AAPCS variant");
6221         }
6222
6223       if (base_rules)
6224         return ARM_PCS_AAPCS;
6225       else if (user_convention)
6226         return user_pcs;
6227 #if 0
6228       /* Unfortunately, this is not safe and can lead to wrong code
6229          being generated (PR96882).  Not all calls into the back-end
6230          pass the DECL, so it is unsafe to make any PCS-changing
6231          decisions based on it.  In particular the RETURN_IN_MEMORY
6232          hook is only ever passed a TYPE.  This needs revisiting to
6233          see if there are any partial improvements that can be
6234          re-enabled.  */
6235       else if (decl && flag_unit_at_a_time)
6236         {
6237           /* Local functions never leak outside this compilation unit,
6238              so we are free to use whatever conventions are
6239              appropriate.  */
6240           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6241           cgraph_node *local_info_node
6242             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6243           if (local_info_node && local_info_node->local)
6244             return ARM_PCS_AAPCS_LOCAL;
6245         }
6246 #endif
6247     }
6248   else if (user_convention && user_pcs != arm_pcs_default)
6249     sorry ("PCS variant");
6250
6251   /* For everything else we use the target's default.  */
6252   return arm_pcs_default;
6253 }
6254
6255
6256 static void
6257 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6258                     const_tree fntype ATTRIBUTE_UNUSED,
6259                     rtx libcall ATTRIBUTE_UNUSED,
6260                     const_tree fndecl ATTRIBUTE_UNUSED)
6261 {
6262   /* Record the unallocated VFP registers.  */
6263   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6264   pcum->aapcs_vfp_reg_alloc = 0;
6265 }
6266
6267 /* Bitmasks that indicate whether earlier versions of GCC would have
6268    taken a different path through the ABI logic.  This should result in
6269    a -Wpsabi warning if the earlier path led to a different ABI decision.
6270
6271    WARN_PSABI_EMPTY_CXX17_BASE
6272       Indicates that the type includes an artificial empty C++17 base field
6273       that, prior to GCC 10.1, would prevent the type from being treated as
6274       a HFA or HVA.  See PR94711 for details.
6275
6276    WARN_PSABI_NO_UNIQUE_ADDRESS
6277       Indicates that the type includes an empty [[no_unique_address]] field
6278       that, prior to GCC 10.1, would prevent the type from being treated as
6279       a HFA or HVA.  */
6280 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6281 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6282 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6283
6284 /* Walk down the type tree of TYPE counting consecutive base elements.
6285    If *MODEP is VOIDmode, then set it to the first valid floating point
6286    type.  If a non-floating point type is found, or if a floating point
6287    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6288    otherwise return the count in the sub-tree.
6289
6290    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6291    function has changed its behavior relative to earlier versions of GCC.
6292    Normally the argument should be nonnull and point to a zero-initialized
6293    variable.  The function then records whether the ABI decision might
6294    be affected by a known fix to the ABI logic, setting the associated
6295    WARN_PSABI_* bits if so.
6296
6297    When the argument is instead a null pointer, the function tries to
6298    simulate the behavior of GCC before all such ABI fixes were made.
6299    This is useful to check whether the function returns something
6300    different after the ABI fixes.  */
6301 static int
6302 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6303                          unsigned int *warn_psabi_flags)
6304 {
6305   machine_mode mode;
6306   HOST_WIDE_INT size;
6307
6308   switch (TREE_CODE (type))
6309     {
6310     case REAL_TYPE:
6311       mode = TYPE_MODE (type);
6312       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6313         return -1;
6314
6315       if (*modep == VOIDmode)
6316         *modep = mode;
6317
6318       if (*modep == mode)
6319         return 1;
6320
6321       break;
6322
6323     case COMPLEX_TYPE:
6324       mode = TYPE_MODE (TREE_TYPE (type));
6325       if (mode != DFmode && mode != SFmode)
6326         return -1;
6327
6328       if (*modep == VOIDmode)
6329         *modep = mode;
6330
6331       if (*modep == mode)
6332         return 2;
6333
6334       break;
6335
6336     case VECTOR_TYPE:
6337       /* Use V2SImode and V4SImode as representatives of all 64-bit
6338          and 128-bit vector types, whether or not those modes are
6339          supported with the present options.  */
6340       size = int_size_in_bytes (type);
6341       switch (size)
6342         {
6343         case 8:
6344           mode = V2SImode;
6345           break;
6346         case 16:
6347           mode = V4SImode;
6348           break;
6349         default:
6350           return -1;
6351         }
6352
6353       if (*modep == VOIDmode)
6354         *modep = mode;
6355
6356       /* Vector modes are considered to be opaque: two vectors are
6357          equivalent for the purposes of being homogeneous aggregates
6358          if they are the same size.  */
6359       if (*modep == mode)
6360         return 1;
6361
6362       break;
6363
6364     case ARRAY_TYPE:
6365       {
6366         int count;
6367         tree index = TYPE_DOMAIN (type);
6368
6369         /* Can't handle incomplete types nor sizes that are not
6370            fixed.  */
6371         if (!COMPLETE_TYPE_P (type)
6372             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6373           return -1;
6374
6375         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6376                                          warn_psabi_flags);
6377         if (count == -1
6378             || !index
6379             || !TYPE_MAX_VALUE (index)
6380             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6381             || !TYPE_MIN_VALUE (index)
6382             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6383             || count < 0)
6384           return -1;
6385
6386         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6387                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6388
6389         /* There must be no padding.  */
6390         if (wi::to_wide (TYPE_SIZE (type))
6391             != count * GET_MODE_BITSIZE (*modep))
6392           return -1;
6393
6394         return count;
6395       }
6396
6397     case RECORD_TYPE:
6398       {
6399         int count = 0;
6400         int sub_count;
6401         tree field;
6402
6403         /* Can't handle incomplete types nor sizes that are not
6404            fixed.  */
6405         if (!COMPLETE_TYPE_P (type)
6406             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6407           return -1;
6408
6409         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6410           {
6411             if (TREE_CODE (field) != FIELD_DECL)
6412               continue;
6413
6414             if (DECL_FIELD_ABI_IGNORED (field))
6415               {
6416                 /* See whether this is something that earlier versions of
6417                    GCC failed to ignore.  */
6418                 unsigned int flag;
6419                 if (lookup_attribute ("no_unique_address",
6420                                       DECL_ATTRIBUTES (field)))
6421                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6422                 else if (cxx17_empty_base_field_p (field))
6423                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6424                 else
6425                   /* No compatibility problem.  */
6426                   continue;
6427
6428                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6429                 if (warn_psabi_flags)
6430                   {
6431                     *warn_psabi_flags |= flag;
6432                     continue;
6433                   }
6434               }
6435             /* A zero-width bitfield may affect layout in some
6436                circumstances, but adds no members.  The determination
6437                of whether or not a type is an HFA is performed after
6438                layout is complete, so if the type still looks like an
6439                HFA afterwards, it is still classed as one.  This is
6440                potentially an ABI break for the hard-float ABI.  */
6441             else if (DECL_BIT_FIELD (field)
6442                      && integer_zerop (DECL_SIZE (field)))
6443               {
6444                 /* Prior to GCC-12 these fields were striped early,
6445                    hiding them from the back-end entirely and
6446                    resulting in the correct behaviour for argument
6447                    passing.  Simulate that old behaviour without
6448                    generating a warning.  */
6449                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6450                   continue;
6451                 if (warn_psabi_flags)
6452                   {
6453                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6454                     continue;
6455                   }
6456               }
6457
6458             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6459                                                  warn_psabi_flags);
6460             if (sub_count < 0)
6461               return -1;
6462             count += sub_count;
6463           }
6464
6465         /* There must be no padding.  */
6466         if (wi::to_wide (TYPE_SIZE (type))
6467             != count * GET_MODE_BITSIZE (*modep))
6468           return -1;
6469
6470         return count;
6471       }
6472
6473     case UNION_TYPE:
6474     case QUAL_UNION_TYPE:
6475       {
6476         /* These aren't very interesting except in a degenerate case.  */
6477         int count = 0;
6478         int sub_count;
6479         tree field;
6480
6481         /* Can't handle incomplete types nor sizes that are not
6482            fixed.  */
6483         if (!COMPLETE_TYPE_P (type)
6484             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6485           return -1;
6486
6487         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6488           {
6489             if (TREE_CODE (field) != FIELD_DECL)
6490               continue;
6491
6492             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6493                                                  warn_psabi_flags);
6494             if (sub_count < 0)
6495               return -1;
6496             count = count > sub_count ? count : sub_count;
6497           }
6498
6499         /* There must be no padding.  */
6500         if (wi::to_wide (TYPE_SIZE (type))
6501             != count * GET_MODE_BITSIZE (*modep))
6502           return -1;
6503
6504         return count;
6505       }
6506
6507     default:
6508       break;
6509     }
6510
6511   return -1;
6512 }
6513
6514 /* Return true if PCS_VARIANT should use VFP registers.  */
6515 static bool
6516 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6517 {
6518   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6519     {
6520       static bool seen_thumb1_vfp = false;
6521
6522       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6523         {
6524           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6525           /* sorry() is not immediately fatal, so only display this once.  */
6526           seen_thumb1_vfp = true;
6527         }
6528
6529       return true;
6530     }
6531
6532   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6533     return false;
6534
6535   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6536          (TARGET_VFP_DOUBLE || !is_double));
6537 }
6538
6539 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6540    suitable for passing or returning in VFP registers for the PCS
6541    variant selected.  If it is, then *BASE_MODE is updated to contain
6542    a machine mode describing each element of the argument's type and
6543    *COUNT to hold the number of such elements.  */
6544 static bool
6545 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6546                                        machine_mode mode, const_tree type,
6547                                        machine_mode *base_mode, int *count)
6548 {
6549   machine_mode new_mode = VOIDmode;
6550
6551   /* If we have the type information, prefer that to working things
6552      out from the mode.  */
6553   if (type)
6554     {
6555       unsigned int warn_psabi_flags = 0;
6556       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6557                                               &warn_psabi_flags);
6558       if (ag_count > 0 && ag_count <= 4)
6559         {
6560           static unsigned last_reported_type_uid;
6561           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6562           int alt;
6563           if (warn_psabi
6564               && warn_psabi_flags
6565               && uid != last_reported_type_uid
6566               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6567                   != ag_count))
6568             {
6569               const char *url10
6570                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6571               const char *url12
6572                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6573               gcc_assert (alt == -1);
6574               last_reported_type_uid = uid;
6575               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6576                  qualification.  */
6577               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6578                 inform (input_location, "parameter passing for argument of "
6579                         "type %qT with %<[[no_unique_address]]%> members "
6580                         "changed %{in GCC 10.1%}",
6581                         TYPE_MAIN_VARIANT (type), url10);
6582               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6583                 inform (input_location, "parameter passing for argument of "
6584                         "type %qT when C++17 is enabled changed to match "
6585                         "C++14 %{in GCC 10.1%}",
6586                         TYPE_MAIN_VARIANT (type), url10);
6587               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6588                 inform (input_location, "parameter passing for argument of "
6589                         "type %qT changed %{in GCC 12.1%}",
6590                         TYPE_MAIN_VARIANT (type), url12);
6591             }
6592           *count = ag_count;
6593         }
6594       else
6595         return false;
6596     }
6597   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6598            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6599            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6600     {
6601       *count = 1;
6602       new_mode = mode;
6603     }
6604   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6605     {
6606       *count = 2;
6607       new_mode = (mode == DCmode ? DFmode : SFmode);
6608     }
6609   else
6610     return false;
6611
6612
6613   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6614     return false;
6615
6616   *base_mode = new_mode;
6617
6618   if (TARGET_GENERAL_REGS_ONLY)
6619     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6620            type);
6621
6622   return true;
6623 }
6624
6625 static bool
6626 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6627                                machine_mode mode, const_tree type)
6628 {
6629   int count ATTRIBUTE_UNUSED;
6630   machine_mode ag_mode ATTRIBUTE_UNUSED;
6631
6632   if (!use_vfp_abi (pcs_variant, false))
6633     return false;
6634   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6635                                                 &ag_mode, &count);
6636 }
6637
6638 static bool
6639 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6640                              const_tree type)
6641 {
6642   if (!use_vfp_abi (pcum->pcs_variant, false))
6643     return false;
6644
6645   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6646                                                 &pcum->aapcs_vfp_rmode,
6647                                                 &pcum->aapcs_vfp_rcount);
6648 }
6649
6650 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6651    for the behaviour of this function.  */
6652
6653 static bool
6654 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6655                     const_tree type  ATTRIBUTE_UNUSED)
6656 {
6657   int rmode_size
6658     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6659   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6660   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6661   int regno;
6662
6663   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6664     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6665       {
6666         pcum->aapcs_vfp_reg_alloc = mask << regno;
6667         if (mode == BLKmode
6668             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6669             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6670           {
6671             int i;
6672             int rcount = pcum->aapcs_vfp_rcount;
6673             int rshift = shift;
6674             machine_mode rmode = pcum->aapcs_vfp_rmode;
6675             rtx par;
6676             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6677               {
6678                 /* Avoid using unsupported vector modes.  */
6679                 if (rmode == V2SImode)
6680                   rmode = DImode;
6681                 else if (rmode == V4SImode)
6682                   {
6683                     rmode = DImode;
6684                     rcount *= 2;
6685                     rshift /= 2;
6686                   }
6687               }
6688             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6689             for (i = 0; i < rcount; i++)
6690               {
6691                 rtx tmp = gen_rtx_REG (rmode,
6692                                        FIRST_VFP_REGNUM + regno + i * rshift);
6693                 tmp = gen_rtx_EXPR_LIST
6694                   (VOIDmode, tmp,
6695                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6696                 XVECEXP (par, 0, i) = tmp;
6697               }
6698
6699             pcum->aapcs_reg = par;
6700           }
6701         else
6702           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6703         return true;
6704       }
6705   return false;
6706 }
6707
6708 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6709    comment there for the behaviour of this function.  */
6710
6711 static rtx
6712 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6713                                machine_mode mode,
6714                                const_tree type ATTRIBUTE_UNUSED)
6715 {
6716   if (!use_vfp_abi (pcs_variant, false))
6717     return NULL;
6718
6719   if (mode == BLKmode
6720       || (GET_MODE_CLASS (mode) == MODE_INT
6721           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6722           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6723     {
6724       int count;
6725       machine_mode ag_mode;
6726       int i;
6727       rtx par;
6728       int shift;
6729
6730       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6731                                              &ag_mode, &count);
6732
6733       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6734         {
6735           if (ag_mode == V2SImode)
6736             ag_mode = DImode;
6737           else if (ag_mode == V4SImode)
6738             {
6739               ag_mode = DImode;
6740               count *= 2;
6741             }
6742         }
6743       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6744       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6745       for (i = 0; i < count; i++)
6746         {
6747           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6748           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6749                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6750           XVECEXP (par, 0, i) = tmp;
6751         }
6752
6753       return par;
6754     }
6755
6756   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6757 }
6758
6759 static void
6760 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6761                    machine_mode mode  ATTRIBUTE_UNUSED,
6762                    const_tree type  ATTRIBUTE_UNUSED)
6763 {
6764   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6765   pcum->aapcs_vfp_reg_alloc = 0;
6766   return;
6767 }
6768
6769 #define AAPCS_CP(X)                             \
6770   {                                             \
6771     aapcs_ ## X ## _cum_init,                   \
6772     aapcs_ ## X ## _is_call_candidate,          \
6773     aapcs_ ## X ## _allocate,                   \
6774     aapcs_ ## X ## _is_return_candidate,        \
6775     aapcs_ ## X ## _allocate_return_reg,        \
6776     aapcs_ ## X ## _advance                     \
6777   }
6778
6779 /* Table of co-processors that can be used to pass arguments in
6780    registers.  Idealy no arugment should be a candidate for more than
6781    one co-processor table entry, but the table is processed in order
6782    and stops after the first match.  If that entry then fails to put
6783    the argument into a co-processor register, the argument will go on
6784    the stack.  */
6785 static struct
6786 {
6787   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6788   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6789
6790   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6791      BLKmode) is a candidate for this co-processor's registers; this
6792      function should ignore any position-dependent state in
6793      CUMULATIVE_ARGS and only use call-type dependent information.  */
6794   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6795
6796   /* Return true if the argument does get a co-processor register; it
6797      should set aapcs_reg to an RTX of the register allocated as is
6798      required for a return from FUNCTION_ARG.  */
6799   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6800
6801   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6802      be returned in this co-processor's registers.  */
6803   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6804
6805   /* Allocate and return an RTX element to hold the return type of a call.  This
6806      routine must not fail and will only be called if is_return_candidate
6807      returned true with the same parameters.  */
6808   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6809
6810   /* Finish processing this argument and prepare to start processing
6811      the next one.  */
6812   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6813 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6814   {
6815     AAPCS_CP(vfp)
6816   };
6817
6818 #undef AAPCS_CP
6819
6820 static int
6821 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6822                           const_tree type)
6823 {
6824   int i;
6825
6826   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6827     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6828       return i;
6829
6830   return -1;
6831 }
6832
6833 static int
6834 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6835 {
6836   /* We aren't passed a decl, so we can't check that a call is local.
6837      However, it isn't clear that that would be a win anyway, since it
6838      might limit some tail-calling opportunities.  */
6839   enum arm_pcs pcs_variant;
6840
6841   if (fntype)
6842     {
6843       const_tree fndecl = NULL_TREE;
6844
6845       if (TREE_CODE (fntype) == FUNCTION_DECL)
6846         {
6847           fndecl = fntype;
6848           fntype = TREE_TYPE (fntype);
6849         }
6850
6851       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6852     }
6853   else
6854     pcs_variant = arm_pcs_default;
6855
6856   if (pcs_variant != ARM_PCS_AAPCS)
6857     {
6858       int i;
6859
6860       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6861         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6862                                                         TYPE_MODE (type),
6863                                                         type))
6864           return i;
6865     }
6866   return -1;
6867 }
6868
6869 static rtx
6870 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6871                            const_tree fntype)
6872 {
6873   /* We aren't passed a decl, so we can't check that a call is local.
6874      However, it isn't clear that that would be a win anyway, since it
6875      might limit some tail-calling opportunities.  */
6876   enum arm_pcs pcs_variant;
6877   int unsignedp ATTRIBUTE_UNUSED;
6878
6879   if (fntype)
6880     {
6881       const_tree fndecl = NULL_TREE;
6882
6883       if (TREE_CODE (fntype) == FUNCTION_DECL)
6884         {
6885           fndecl = fntype;
6886           fntype = TREE_TYPE (fntype);
6887         }
6888
6889       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6890     }
6891   else
6892     pcs_variant = arm_pcs_default;
6893
6894   /* Promote integer types.  */
6895   if (type && INTEGRAL_TYPE_P (type))
6896     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6897
6898   if (pcs_variant != ARM_PCS_AAPCS)
6899     {
6900       int i;
6901
6902       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6903         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6904                                                         type))
6905           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6906                                                              mode, type);
6907     }
6908
6909   /* Promotes small structs returned in a register to full-word size
6910      for big-endian AAPCS.  */
6911   if (type && arm_return_in_msb (type))
6912     {
6913       HOST_WIDE_INT size = int_size_in_bytes (type);
6914       if (size % UNITS_PER_WORD != 0)
6915         {
6916           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6917           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6918         }
6919     }
6920
6921   return gen_rtx_REG (mode, R0_REGNUM);
6922 }
6923
6924 static rtx
6925 aapcs_libcall_value (machine_mode mode)
6926 {
6927   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6928       && GET_MODE_SIZE (mode) <= 4)
6929     mode = SImode;
6930
6931   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6932 }
6933
6934 /* Lay out a function argument using the AAPCS rules.  The rule
6935    numbers referred to here are those in the AAPCS.  */
6936 static void
6937 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6938                   const_tree type, bool named)
6939 {
6940   int nregs, nregs2;
6941   int ncrn;
6942
6943   /* We only need to do this once per argument.  */
6944   if (pcum->aapcs_arg_processed)
6945     return;
6946
6947   pcum->aapcs_arg_processed = true;
6948
6949   /* Special case: if named is false then we are handling an incoming
6950      anonymous argument which is on the stack.  */
6951   if (!named)
6952     return;
6953
6954   /* Is this a potential co-processor register candidate?  */
6955   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6956     {
6957       int slot = aapcs_select_call_coproc (pcum, mode, type);
6958       pcum->aapcs_cprc_slot = slot;
6959
6960       /* We don't have to apply any of the rules from part B of the
6961          preparation phase, these are handled elsewhere in the
6962          compiler.  */
6963
6964       if (slot >= 0)
6965         {
6966           /* A Co-processor register candidate goes either in its own
6967              class of registers or on the stack.  */
6968           if (!pcum->aapcs_cprc_failed[slot])
6969             {
6970               /* C1.cp - Try to allocate the argument to co-processor
6971                  registers.  */
6972               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6973                 return;
6974
6975               /* C2.cp - Put the argument on the stack and note that we
6976                  can't assign any more candidates in this slot.  We also
6977                  need to note that we have allocated stack space, so that
6978                  we won't later try to split a non-cprc candidate between
6979                  core registers and the stack.  */
6980               pcum->aapcs_cprc_failed[slot] = true;
6981               pcum->can_split = false;
6982             }
6983
6984           /* We didn't get a register, so this argument goes on the
6985              stack.  */
6986           gcc_assert (pcum->can_split == false);
6987           return;
6988         }
6989     }
6990
6991   /* C3 - For double-word aligned arguments, round the NCRN up to the
6992      next even number.  */
6993   ncrn = pcum->aapcs_ncrn;
6994   if (ncrn & 1)
6995     {
6996       int res = arm_needs_doubleword_align (mode, type);
6997       /* Only warn during RTL expansion of call stmts, otherwise we would
6998          warn e.g. during gimplification even on functions that will be
6999          always inlined, and we'd warn multiple times.  Don't warn when
7000          called in expand_function_start either, as we warn instead in
7001          arm_function_arg_boundary in that case.  */
7002       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7003         inform (input_location, "parameter passing for argument of type "
7004                 "%qT changed in GCC 7.1", type);
7005       else if (res > 0)
7006         ncrn++;
7007     }
7008
7009   nregs = ARM_NUM_REGS2(mode, type);
7010
7011   /* Sigh, this test should really assert that nregs > 0, but a GCC
7012      extension allows empty structs and then gives them empty size; it
7013      then allows such a structure to be passed by value.  For some of
7014      the code below we have to pretend that such an argument has
7015      non-zero size so that we 'locate' it correctly either in
7016      registers or on the stack.  */
7017   gcc_assert (nregs >= 0);
7018
7019   nregs2 = nregs ? nregs : 1;
7020
7021   /* C4 - Argument fits entirely in core registers.  */
7022   if (ncrn + nregs2 <= NUM_ARG_REGS)
7023     {
7024       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7025       pcum->aapcs_next_ncrn = ncrn + nregs;
7026       return;
7027     }
7028
7029   /* C5 - Some core registers left and there are no arguments already
7030      on the stack: split this argument between the remaining core
7031      registers and the stack.  */
7032   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7033     {
7034       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7035       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7036       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7037       return;
7038     }
7039
7040   /* C6 - NCRN is set to 4.  */
7041   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7042
7043   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7044   return;
7045 }
7046
7047 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7048    for a call to a function whose data type is FNTYPE.
7049    For a library call, FNTYPE is NULL.  */
7050 void
7051 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7052                           rtx libname,
7053                           tree fndecl ATTRIBUTE_UNUSED)
7054 {
7055   /* Long call handling.  */
7056   if (fntype)
7057     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7058   else
7059     pcum->pcs_variant = arm_pcs_default;
7060
7061   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7062     {
7063       if (arm_libcall_uses_aapcs_base (libname))
7064         pcum->pcs_variant = ARM_PCS_AAPCS;
7065
7066       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7067       pcum->aapcs_reg = NULL_RTX;
7068       pcum->aapcs_partial = 0;
7069       pcum->aapcs_arg_processed = false;
7070       pcum->aapcs_cprc_slot = -1;
7071       pcum->can_split = true;
7072
7073       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7074         {
7075           int i;
7076
7077           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7078             {
7079               pcum->aapcs_cprc_failed[i] = false;
7080               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7081             }
7082         }
7083       return;
7084     }
7085
7086   /* Legacy ABIs */
7087
7088   /* On the ARM, the offset starts at 0.  */
7089   pcum->nregs = 0;
7090   pcum->iwmmxt_nregs = 0;
7091   pcum->can_split = true;
7092
7093   /* Varargs vectors are treated the same as long long.
7094      named_count avoids having to change the way arm handles 'named' */
7095   pcum->named_count = 0;
7096   pcum->nargs = 0;
7097
7098   if (TARGET_REALLY_IWMMXT && fntype)
7099     {
7100       tree fn_arg;
7101
7102       for (fn_arg = TYPE_ARG_TYPES (fntype);
7103            fn_arg;
7104            fn_arg = TREE_CHAIN (fn_arg))
7105         pcum->named_count += 1;
7106
7107       if (! pcum->named_count)
7108         pcum->named_count = INT_MAX;
7109     }
7110 }
7111
7112 /* Return 2 if double word alignment is required for argument passing,
7113    but wasn't required before the fix for PR88469.
7114    Return 1 if double word alignment is required for argument passing.
7115    Return -1 if double word alignment used to be required for argument
7116    passing before PR77728 ABI fix, but is not required anymore.
7117    Return 0 if double word alignment is not required and wasn't requried
7118    before either.  */
7119 static int
7120 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7121 {
7122   if (!type)
7123     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7124
7125   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7126   if (!AGGREGATE_TYPE_P (type))
7127     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7128
7129   /* Array types: Use member alignment of element type.  */
7130   if (TREE_CODE (type) == ARRAY_TYPE)
7131     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7132
7133   int ret = 0;
7134   int ret2 = 0;
7135   /* Record/aggregate types: Use greatest member alignment of any member.
7136
7137      Note that we explicitly consider zero-sized fields here, even though
7138      they don't map to AAPCS machine types.  For example, in:
7139
7140          struct __attribute__((aligned(8))) empty {};
7141
7142          struct s {
7143            [[no_unique_address]] empty e;
7144            int x;
7145          };
7146
7147      "s" contains only one Fundamental Data Type (the int field)
7148      but gains 8-byte alignment and size thanks to "e".  */
7149   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7150     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7151       {
7152         if (TREE_CODE (field) == FIELD_DECL)
7153           return 1;
7154         else
7155           /* Before PR77728 fix, we were incorrectly considering also
7156              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7157              Make sure we can warn about that with -Wpsabi.  */
7158           ret = -1;
7159       }
7160     else if (TREE_CODE (field) == FIELD_DECL
7161              && DECL_BIT_FIELD_TYPE (field)
7162              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7163       ret2 = 1;
7164
7165   if (ret2)
7166     return 2;
7167
7168   return ret;
7169 }
7170
7171
7172 /* Determine where to put an argument to a function.
7173    Value is zero to push the argument on the stack,
7174    or a hard register in which to store the argument.
7175
7176    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7177     the preceding args and about the function being called.
7178    ARG is a description of the argument.
7179
7180    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7181    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7182    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7183    defined), say it is passed in the stack (function_prologue will
7184    indeed make it pass in the stack if necessary).  */
7185
7186 static rtx
7187 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7188 {
7189   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7190   int nregs;
7191
7192   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7193      a call insn (op3 of a call_value insn).  */
7194   if (arg.end_marker_p ())
7195     return const0_rtx;
7196
7197   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7198     {
7199       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7200       return pcum->aapcs_reg;
7201     }
7202
7203   /* Varargs vectors are treated the same as long long.
7204      named_count avoids having to change the way arm handles 'named' */
7205   if (TARGET_IWMMXT_ABI
7206       && arm_vector_mode_supported_p (arg.mode)
7207       && pcum->named_count > pcum->nargs + 1)
7208     {
7209       if (pcum->iwmmxt_nregs <= 9)
7210         return gen_rtx_REG (arg.mode,
7211                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7212       else
7213         {
7214           pcum->can_split = false;
7215           return NULL_RTX;
7216         }
7217     }
7218
7219   /* Put doubleword aligned quantities in even register pairs.  */
7220   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7221     {
7222       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7223       if (res < 0 && warn_psabi)
7224         inform (input_location, "parameter passing for argument of type "
7225                 "%qT changed in GCC 7.1", arg.type);
7226       else if (res > 0)
7227         {
7228           pcum->nregs++;
7229           if (res > 1 && warn_psabi)
7230             inform (input_location, "parameter passing for argument of type "
7231                     "%qT changed in GCC 9.1", arg.type);
7232         }
7233     }
7234
7235   /* Only allow splitting an arg between regs and memory if all preceding
7236      args were allocated to regs.  For args passed by reference we only count
7237      the reference pointer.  */
7238   if (pcum->can_split)
7239     nregs = 1;
7240   else
7241     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7242
7243   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7244     return NULL_RTX;
7245
7246   return gen_rtx_REG (arg.mode, pcum->nregs);
7247 }
7248
7249 static unsigned int
7250 arm_function_arg_boundary (machine_mode mode, const_tree type)
7251 {
7252   if (!ARM_DOUBLEWORD_ALIGN)
7253     return PARM_BOUNDARY;
7254
7255   int res = arm_needs_doubleword_align (mode, type);
7256   if (res < 0 && warn_psabi)
7257     inform (input_location, "parameter passing for argument of type %qT "
7258             "changed in GCC 7.1", type);
7259   if (res > 1 && warn_psabi)
7260     inform (input_location, "parameter passing for argument of type "
7261             "%qT changed in GCC 9.1", type);
7262
7263   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7264 }
7265
7266 static int
7267 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7268 {
7269   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7270   int nregs = pcum->nregs;
7271
7272   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7273     {
7274       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7275       return pcum->aapcs_partial;
7276     }
7277
7278   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7279     return 0;
7280
7281   if (NUM_ARG_REGS > nregs
7282       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7283       && pcum->can_split)
7284     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7285
7286   return 0;
7287 }
7288
7289 /* Update the data in PCUM to advance over argument ARG.  */
7290
7291 static void
7292 arm_function_arg_advance (cumulative_args_t pcum_v,
7293                           const function_arg_info &arg)
7294 {
7295   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7296
7297   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7298     {
7299       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7300
7301       if (pcum->aapcs_cprc_slot >= 0)
7302         {
7303           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7304                                                               arg.type);
7305           pcum->aapcs_cprc_slot = -1;
7306         }
7307
7308       /* Generic stuff.  */
7309       pcum->aapcs_arg_processed = false;
7310       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7311       pcum->aapcs_reg = NULL_RTX;
7312       pcum->aapcs_partial = 0;
7313     }
7314   else
7315     {
7316       pcum->nargs += 1;
7317       if (arm_vector_mode_supported_p (arg.mode)
7318           && pcum->named_count > pcum->nargs
7319           && TARGET_IWMMXT_ABI)
7320         pcum->iwmmxt_nregs += 1;
7321       else
7322         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7323     }
7324 }
7325
7326 /* Variable sized types are passed by reference.  This is a GCC
7327    extension to the ARM ABI.  */
7328
7329 static bool
7330 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7331 {
7332   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7333 }
7334 \f
7335 /* Encode the current state of the #pragma [no_]long_calls.  */
7336 typedef enum
7337 {
7338   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7339   LONG,         /* #pragma long_calls is in effect.  */
7340   SHORT         /* #pragma no_long_calls is in effect.  */
7341 } arm_pragma_enum;
7342
7343 static arm_pragma_enum arm_pragma_long_calls = OFF;
7344
7345 void
7346 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7347 {
7348   arm_pragma_long_calls = LONG;
7349 }
7350
7351 void
7352 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7353 {
7354   arm_pragma_long_calls = SHORT;
7355 }
7356
7357 void
7358 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7359 {
7360   arm_pragma_long_calls = OFF;
7361 }
7362 \f
7363 /* Handle an attribute requiring a FUNCTION_DECL;
7364    arguments as in struct attribute_spec.handler.  */
7365 static tree
7366 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7367                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7368 {
7369   if (TREE_CODE (*node) != FUNCTION_DECL)
7370     {
7371       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7372                name);
7373       *no_add_attrs = true;
7374     }
7375
7376   return NULL_TREE;
7377 }
7378
7379 /* Handle an "interrupt" or "isr" attribute;
7380    arguments as in struct attribute_spec.handler.  */
7381 static tree
7382 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7383                           bool *no_add_attrs)
7384 {
7385   if (DECL_P (*node))
7386     {
7387       if (TREE_CODE (*node) != FUNCTION_DECL)
7388         {
7389           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7390                    name);
7391           *no_add_attrs = true;
7392         }
7393       else if (TARGET_VFP_BASE)
7394         {
7395           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7396                    name);
7397         }
7398       /* FIXME: the argument if any is checked for type attributes;
7399          should it be checked for decl ones?  */
7400     }
7401   else
7402     {
7403       if (TREE_CODE (*node) == FUNCTION_TYPE
7404           || TREE_CODE (*node) == METHOD_TYPE)
7405         {
7406           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7407             {
7408               warning (OPT_Wattributes, "%qE attribute ignored",
7409                        name);
7410               *no_add_attrs = true;
7411             }
7412         }
7413       else if (TREE_CODE (*node) == POINTER_TYPE
7414                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7415                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7416                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7417         {
7418           *node = build_variant_type_copy (*node);
7419           TREE_TYPE (*node) = build_type_attribute_variant
7420             (TREE_TYPE (*node),
7421              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7422           *no_add_attrs = true;
7423         }
7424       else
7425         {
7426           /* Possibly pass this attribute on from the type to a decl.  */
7427           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7428                        | (int) ATTR_FLAG_FUNCTION_NEXT
7429                        | (int) ATTR_FLAG_ARRAY_NEXT))
7430             {
7431               *no_add_attrs = true;
7432               return tree_cons (name, args, NULL_TREE);
7433             }
7434           else
7435             {
7436               warning (OPT_Wattributes, "%qE attribute ignored",
7437                        name);
7438             }
7439         }
7440     }
7441
7442   return NULL_TREE;
7443 }
7444
7445 /* Handle a "pcs" attribute; arguments as in struct
7446    attribute_spec.handler.  */
7447 static tree
7448 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7449                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7450 {
7451   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7452     {
7453       warning (OPT_Wattributes, "%qE attribute ignored", name);
7454       *no_add_attrs = true;
7455     }
7456   return NULL_TREE;
7457 }
7458
7459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7460 /* Handle the "notshared" attribute.  This attribute is another way of
7461    requesting hidden visibility.  ARM's compiler supports
7462    "__declspec(notshared)"; we support the same thing via an
7463    attribute.  */
7464
7465 static tree
7466 arm_handle_notshared_attribute (tree *node,
7467                                 tree name ATTRIBUTE_UNUSED,
7468                                 tree args ATTRIBUTE_UNUSED,
7469                                 int flags ATTRIBUTE_UNUSED,
7470                                 bool *no_add_attrs)
7471 {
7472   tree decl = TYPE_NAME (*node);
7473
7474   if (decl)
7475     {
7476       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7477       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7478       *no_add_attrs = false;
7479     }
7480   return NULL_TREE;
7481 }
7482 #endif
7483
7484 /* This function returns true if a function with declaration FNDECL and type
7485    FNTYPE uses the stack to pass arguments or return variables and false
7486    otherwise.  This is used for functions with the attributes
7487    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7488    diagnostic messages if the stack is used.  NAME is the name of the attribute
7489    used.  */
7490
7491 static bool
7492 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7493 {
7494   function_args_iterator args_iter;
7495   CUMULATIVE_ARGS args_so_far_v;
7496   cumulative_args_t args_so_far;
7497   bool first_param = true;
7498   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7499
7500   /* Error out if any argument is passed on the stack.  */
7501   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7502   args_so_far = pack_cumulative_args (&args_so_far_v);
7503   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7504     {
7505       rtx arg_rtx;
7506
7507       prev_arg_type = arg_type;
7508       if (VOID_TYPE_P (arg_type))
7509         continue;
7510
7511       function_arg_info arg (arg_type, /*named=*/true);
7512       if (!first_param)
7513         /* ??? We should advance after processing the argument and pass
7514            the argument we're advancing past.  */
7515         arm_function_arg_advance (args_so_far, arg);
7516       arg_rtx = arm_function_arg (args_so_far, arg);
7517       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7518         {
7519           error ("%qE attribute not available to functions with arguments "
7520                  "passed on the stack", name);
7521           return true;
7522         }
7523       first_param = false;
7524     }
7525
7526   /* Error out for variadic functions since we cannot control how many
7527      arguments will be passed and thus stack could be used.  stdarg_p () is not
7528      used for the checking to avoid browsing arguments twice.  */
7529   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7530     {
7531       error ("%qE attribute not available to functions with variable number "
7532              "of arguments", name);
7533       return true;
7534     }
7535
7536   /* Error out if return value is passed on the stack.  */
7537   ret_type = TREE_TYPE (fntype);
7538   if (arm_return_in_memory (ret_type, fntype))
7539     {
7540       error ("%qE attribute not available to functions that return value on "
7541              "the stack", name);
7542       return true;
7543     }
7544   return false;
7545 }
7546
7547 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7548    function will check whether the attribute is allowed here and will add the
7549    attribute to the function declaration tree or otherwise issue a warning.  */
7550
7551 static tree
7552 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7553                                  tree /* args */,
7554                                  int /* flags */,
7555                                  bool *no_add_attrs)
7556 {
7557   tree fndecl;
7558
7559   if (!use_cmse)
7560     {
7561       *no_add_attrs = true;
7562       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7563                "option", name);
7564       return NULL_TREE;
7565     }
7566
7567   /* Ignore attribute for function types.  */
7568   if (TREE_CODE (*node) != FUNCTION_DECL)
7569     {
7570       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7571                name);
7572       *no_add_attrs = true;
7573       return NULL_TREE;
7574     }
7575
7576   fndecl = *node;
7577
7578   /* Warn for static linkage functions.  */
7579   if (!TREE_PUBLIC (fndecl))
7580     {
7581       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7582                "with static linkage", name);
7583       *no_add_attrs = true;
7584       return NULL_TREE;
7585     }
7586
7587   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7588                                                 TREE_TYPE (fndecl));
7589   return NULL_TREE;
7590 }
7591
7592
7593 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7594    function will check whether the attribute is allowed here and will add the
7595    attribute to the function type tree or otherwise issue a diagnostic.  The
7596    reason we check this at declaration time is to only allow the use of the
7597    attribute with declarations of function pointers and not function
7598    declarations.  This function checks NODE is of the expected type and issues
7599    diagnostics otherwise using NAME.  If it is not of the expected type
7600    *NO_ADD_ATTRS will be set to true.  */
7601
7602 static tree
7603 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7604                                  tree /* args */,
7605                                  int /* flags */,
7606                                  bool *no_add_attrs)
7607 {
7608   tree decl = NULL_TREE, fntype = NULL_TREE;
7609   tree type;
7610
7611   if (!use_cmse)
7612     {
7613       *no_add_attrs = true;
7614       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7615                "option", name);
7616       return NULL_TREE;
7617     }
7618
7619   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7620     {
7621       decl = *node;
7622       fntype = TREE_TYPE (decl);
7623     }
7624
7625   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7626     fntype = TREE_TYPE (fntype);
7627
7628   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7629     {
7630         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7631                  "function pointer", name);
7632         *no_add_attrs = true;
7633         return NULL_TREE;
7634     }
7635
7636   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7637
7638   if (*no_add_attrs)
7639     return NULL_TREE;
7640
7641   /* Prevent trees being shared among function types with and without
7642      cmse_nonsecure_call attribute.  */
7643   type = TREE_TYPE (decl);
7644
7645   type = build_distinct_type_copy (type);
7646   TREE_TYPE (decl) = type;
7647   fntype = type;
7648
7649   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7650     {
7651       type = fntype;
7652       fntype = TREE_TYPE (fntype);
7653       fntype = build_distinct_type_copy (fntype);
7654       TREE_TYPE (type) = fntype;
7655     }
7656
7657   /* Construct a type attribute and add it to the function type.  */
7658   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7659                           TYPE_ATTRIBUTES (fntype));
7660   TYPE_ATTRIBUTES (fntype) = attrs;
7661   return NULL_TREE;
7662 }
7663
7664 /* Return 0 if the attributes for two types are incompatible, 1 if they
7665    are compatible, and 2 if they are nearly compatible (which causes a
7666    warning to be generated).  */
7667 static int
7668 arm_comp_type_attributes (const_tree type1, const_tree type2)
7669 {
7670   int l1, l2, s1, s2;
7671
7672   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7673                                   TYPE_ATTRIBUTES (type1));
7674   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7675                                   TYPE_ATTRIBUTES (type2));
7676   if (bool (attrs1) != bool (attrs2))
7677     return 0;
7678   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7679     return 0;
7680
7681   /* Check for mismatch of non-default calling convention.  */
7682   if (TREE_CODE (type1) != FUNCTION_TYPE)
7683     return 1;
7684
7685   /* Check for mismatched call attributes.  */
7686   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7687   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7688   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7689   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7690
7691   /* Only bother to check if an attribute is defined.  */
7692   if (l1 | l2 | s1 | s2)
7693     {
7694       /* If one type has an attribute, the other must have the same attribute.  */
7695       if ((l1 != l2) || (s1 != s2))
7696         return 0;
7697
7698       /* Disallow mixed attributes.  */
7699       if ((l1 & s2) || (l2 & s1))
7700         return 0;
7701     }
7702
7703   /* Check for mismatched ISR attribute.  */
7704   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7705   if (! l1)
7706     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7707   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7708   if (! l2)
7709     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7710   if (l1 != l2)
7711     return 0;
7712
7713   l1 = lookup_attribute ("cmse_nonsecure_call",
7714                          TYPE_ATTRIBUTES (type1)) != NULL;
7715   l2 = lookup_attribute ("cmse_nonsecure_call",
7716                          TYPE_ATTRIBUTES (type2)) != NULL;
7717
7718   if (l1 != l2)
7719     return 0;
7720
7721   return 1;
7722 }
7723
7724 /*  Assigns default attributes to newly defined type.  This is used to
7725     set short_call/long_call attributes for function types of
7726     functions defined inside corresponding #pragma scopes.  */
7727 static void
7728 arm_set_default_type_attributes (tree type)
7729 {
7730   /* Add __attribute__ ((long_call)) to all functions, when
7731      inside #pragma long_calls or __attribute__ ((short_call)),
7732      when inside #pragma no_long_calls.  */
7733   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7734     {
7735       tree type_attr_list, attr_name;
7736       type_attr_list = TYPE_ATTRIBUTES (type);
7737
7738       if (arm_pragma_long_calls == LONG)
7739         attr_name = get_identifier ("long_call");
7740       else if (arm_pragma_long_calls == SHORT)
7741         attr_name = get_identifier ("short_call");
7742       else
7743         return;
7744
7745       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7746       TYPE_ATTRIBUTES (type) = type_attr_list;
7747     }
7748 }
7749 \f
7750 /* Return true if DECL is known to be linked into section SECTION.  */
7751
7752 static bool
7753 arm_function_in_section_p (tree decl, section *section)
7754 {
7755   /* We can only be certain about the prevailing symbol definition.  */
7756   if (!decl_binds_to_current_def_p (decl))
7757     return false;
7758
7759   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7760   if (!DECL_SECTION_NAME (decl))
7761     {
7762       /* Make sure that we will not create a unique section for DECL.  */
7763       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7764         return false;
7765     }
7766
7767   return function_section (decl) == section;
7768 }
7769
7770 /* Return nonzero if a 32-bit "long_call" should be generated for
7771    a call from the current function to DECL.  We generate a long_call
7772    if the function:
7773
7774         a.  has an __attribute__((long call))
7775      or b.  is within the scope of a #pragma long_calls
7776      or c.  the -mlong-calls command line switch has been specified
7777
7778    However we do not generate a long call if the function:
7779
7780         d.  has an __attribute__ ((short_call))
7781      or e.  is inside the scope of a #pragma no_long_calls
7782      or f.  is defined in the same section as the current function.  */
7783
7784 bool
7785 arm_is_long_call_p (tree decl)
7786 {
7787   tree attrs;
7788
7789   if (!decl)
7790     return TARGET_LONG_CALLS;
7791
7792   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7793   if (lookup_attribute ("short_call", attrs))
7794     return false;
7795
7796   /* For "f", be conservative, and only cater for cases in which the
7797      whole of the current function is placed in the same section.  */
7798   if (!flag_reorder_blocks_and_partition
7799       && TREE_CODE (decl) == FUNCTION_DECL
7800       && arm_function_in_section_p (decl, current_function_section ()))
7801     return false;
7802
7803   if (lookup_attribute ("long_call", attrs))
7804     return true;
7805
7806   return TARGET_LONG_CALLS;
7807 }
7808
7809 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7810 static bool
7811 arm_function_ok_for_sibcall (tree decl, tree exp)
7812 {
7813   unsigned long func_type;
7814
7815   if (cfun->machine->sibcall_blocked)
7816     return false;
7817
7818   if (TARGET_FDPIC)
7819     {
7820       /* In FDPIC, never tailcall something for which we have no decl:
7821          the target function could be in a different module, requiring
7822          a different FDPIC register value.  */
7823       if (decl == NULL)
7824         return false;
7825     }
7826
7827   /* Never tailcall something if we are generating code for Thumb-1.  */
7828   if (TARGET_THUMB1)
7829     return false;
7830
7831   /* The PIC register is live on entry to VxWorks PLT entries, so we
7832      must make the call before restoring the PIC register.  */
7833   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7834     return false;
7835
7836   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7837      may be used both as target of the call and base register for restoring
7838      the VFP registers  */
7839   if (TARGET_APCS_FRAME && TARGET_ARM
7840       && TARGET_HARD_FLOAT
7841       && decl && arm_is_long_call_p (decl))
7842     return false;
7843
7844   /* If we are interworking and the function is not declared static
7845      then we can't tail-call it unless we know that it exists in this
7846      compilation unit (since it might be a Thumb routine).  */
7847   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7848       && !TREE_ASM_WRITTEN (decl))
7849     return false;
7850
7851   func_type = arm_current_func_type ();
7852   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7853   if (IS_INTERRUPT (func_type))
7854     return false;
7855
7856   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7857      generated for entry functions themselves.  */
7858   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7859     return false;
7860
7861   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7862      this would complicate matters for later code generation.  */
7863   if (TREE_CODE (exp) == CALL_EXPR)
7864     {
7865       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7866       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7867         return false;
7868     }
7869
7870   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7871     {
7872       /* Check that the return value locations are the same.  For
7873          example that we aren't returning a value from the sibling in
7874          a VFP register but then need to transfer it to a core
7875          register.  */
7876       rtx a, b;
7877       tree decl_or_type = decl;
7878
7879       /* If it is an indirect function pointer, get the function type.  */
7880       if (!decl)
7881         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7882
7883       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7884       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7885                               cfun->decl, false);
7886       if (!rtx_equal_p (a, b))
7887         return false;
7888     }
7889
7890   /* Never tailcall if function may be called with a misaligned SP.  */
7891   if (IS_STACKALIGN (func_type))
7892     return false;
7893
7894   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7895      references should become a NOP.  Don't convert such calls into
7896      sibling calls.  */
7897   if (TARGET_AAPCS_BASED
7898       && arm_abi == ARM_ABI_AAPCS
7899       && decl
7900       && DECL_WEAK (decl))
7901     return false;
7902
7903   /* We cannot do a tailcall for an indirect call by descriptor if all the
7904      argument registers are used because the only register left to load the
7905      address is IP and it will already contain the static chain.  */
7906   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7907     {
7908       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7909       CUMULATIVE_ARGS cum;
7910       cumulative_args_t cum_v;
7911
7912       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7913       cum_v = pack_cumulative_args (&cum);
7914
7915       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7916         {
7917           tree type = TREE_VALUE (t);
7918           if (!VOID_TYPE_P (type))
7919             {
7920               function_arg_info arg (type, /*named=*/true);
7921               arm_function_arg_advance (cum_v, arg);
7922             }
7923         }
7924
7925       function_arg_info arg (integer_type_node, /*named=*/true);
7926       if (!arm_function_arg (cum_v, arg))
7927         return false;
7928     }
7929
7930   /* Everything else is ok.  */
7931   return true;
7932 }
7933
7934 \f
7935 /* Addressing mode support functions.  */
7936
7937 /* Return nonzero if X is a legitimate immediate operand when compiling
7938    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7939 int
7940 legitimate_pic_operand_p (rtx x)
7941 {
7942   if (SYMBOL_REF_P (x)
7943       || (GET_CODE (x) == CONST
7944           && GET_CODE (XEXP (x, 0)) == PLUS
7945           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7946     return 0;
7947
7948   return 1;
7949 }
7950
7951 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7952    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7953    both case cfun->machine->pic_reg is initialized if we have not already done
7954    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7955    PIC register is reloaded in the current position of the instruction stream
7956    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7957    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7958    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7959    is only supported iff COMPUTE_NOW is false.  */
7960
7961 static void
7962 require_pic_register (rtx pic_reg, bool compute_now)
7963 {
7964   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7965
7966   /* A lot of the logic here is made obscure by the fact that this
7967      routine gets called as part of the rtx cost estimation process.
7968      We don't want those calls to affect any assumptions about the real
7969      function; and further, we can't call entry_of_function() until we
7970      start the real expansion process.  */
7971   if (!crtl->uses_pic_offset_table || compute_now)
7972     {
7973       gcc_assert (can_create_pseudo_p ()
7974                   || (pic_reg != NULL_RTX
7975                       && REG_P (pic_reg)
7976                       && GET_MODE (pic_reg) == Pmode));
7977       if (arm_pic_register != INVALID_REGNUM
7978           && !compute_now
7979           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7980         {
7981           if (!cfun->machine->pic_reg)
7982             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7983
7984           /* Play games to avoid marking the function as needing pic
7985              if we are being called as part of the cost-estimation
7986              process.  */
7987           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7988             crtl->uses_pic_offset_table = 1;
7989         }
7990       else
7991         {
7992           rtx_insn *seq, *insn;
7993
7994           if (pic_reg == NULL_RTX)
7995             pic_reg = gen_reg_rtx (Pmode);
7996           if (!cfun->machine->pic_reg)
7997             cfun->machine->pic_reg = pic_reg;
7998
7999           /* Play games to avoid marking the function as needing pic
8000              if we are being called as part of the cost-estimation
8001              process.  */
8002           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8003             {
8004               crtl->uses_pic_offset_table = 1;
8005               start_sequence ();
8006
8007               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8008                   && arm_pic_register > LAST_LO_REGNUM
8009                   && !compute_now)
8010                 emit_move_insn (cfun->machine->pic_reg,
8011                                 gen_rtx_REG (Pmode, arm_pic_register));
8012               else
8013                 arm_load_pic_register (0UL, pic_reg);
8014
8015               seq = get_insns ();
8016               end_sequence ();
8017
8018               for (insn = seq; insn; insn = NEXT_INSN (insn))
8019                 if (INSN_P (insn))
8020                   INSN_LOCATION (insn) = prologue_location;
8021
8022               /* We can be called during expansion of PHI nodes, where
8023                  we can't yet emit instructions directly in the final
8024                  insn stream.  Queue the insns on the entry edge, they will
8025                  be committed after everything else is expanded.  */
8026               if (currently_expanding_to_rtl)
8027                 insert_insn_on_edge (seq,
8028                                      single_succ_edge
8029                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8030               else
8031                 emit_insn (seq);
8032             }
8033         }
8034     }
8035 }
8036
8037 /* Generate insns to calculate the address of ORIG in pic mode.  */
8038 static rtx_insn *
8039 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8040 {
8041   rtx pat;
8042   rtx mem;
8043
8044   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8045
8046   /* Make the MEM as close to a constant as possible.  */
8047   mem = SET_SRC (pat);
8048   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8049   MEM_READONLY_P (mem) = 1;
8050   MEM_NOTRAP_P (mem) = 1;
8051
8052   return emit_insn (pat);
8053 }
8054
8055 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8056    created to hold the result of the load.  If not NULL, PIC_REG indicates
8057    which register to use as PIC register, otherwise it is decided by register
8058    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8059    location in the instruction stream, irregardless of whether it was loaded
8060    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8061    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8062
8063    Returns the register REG into which the PIC load is performed.  */
8064
8065 rtx
8066 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8067                         bool compute_now)
8068 {
8069   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8070
8071   if (SYMBOL_REF_P (orig)
8072       || LABEL_REF_P (orig))
8073     {
8074       if (reg == 0)
8075         {
8076           gcc_assert (can_create_pseudo_p ());
8077           reg = gen_reg_rtx (Pmode);
8078         }
8079
8080       /* VxWorks does not impose a fixed gap between segments; the run-time
8081          gap can be different from the object-file gap.  We therefore can't
8082          use GOTOFF unless we are absolutely sure that the symbol is in the
8083          same segment as the GOT.  Unfortunately, the flexibility of linker
8084          scripts means that we can't be sure of that in general, so assume
8085          that GOTOFF is never valid on VxWorks.  */
8086       /* References to weak symbols cannot be resolved locally: they
8087          may be overridden by a non-weak definition at link time.  */
8088       rtx_insn *insn;
8089       if ((LABEL_REF_P (orig)
8090            || (SYMBOL_REF_P (orig)
8091                && SYMBOL_REF_LOCAL_P (orig)
8092                && (SYMBOL_REF_DECL (orig)
8093                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8094                && (!SYMBOL_REF_FUNCTION_P (orig)
8095                    || arm_fdpic_local_funcdesc_p (orig))))
8096           && NEED_GOT_RELOC
8097           && arm_pic_data_is_text_relative)
8098         insn = arm_pic_static_addr (orig, reg);
8099       else
8100         {
8101           /* If this function doesn't have a pic register, create one now.  */
8102           require_pic_register (pic_reg, compute_now);
8103
8104           if (pic_reg == NULL_RTX)
8105             pic_reg = cfun->machine->pic_reg;
8106
8107           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8108         }
8109
8110       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8111          by loop.  */
8112       set_unique_reg_note (insn, REG_EQUAL, orig);
8113
8114       return reg;
8115     }
8116   else if (GET_CODE (orig) == CONST)
8117     {
8118       rtx base, offset;
8119
8120       if (GET_CODE (XEXP (orig, 0)) == PLUS
8121           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8122         return orig;
8123
8124       /* Handle the case where we have: const (UNSPEC_TLS).  */
8125       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8126           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8127         return orig;
8128
8129       /* Handle the case where we have:
8130          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8131          CONST_INT.  */
8132       if (GET_CODE (XEXP (orig, 0)) == PLUS
8133           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8134           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8135         {
8136           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8137           return orig;
8138         }
8139
8140       if (reg == 0)
8141         {
8142           gcc_assert (can_create_pseudo_p ());
8143           reg = gen_reg_rtx (Pmode);
8144         }
8145
8146       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8147
8148       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8149                                      pic_reg, compute_now);
8150       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8151                                        base == reg ? 0 : reg, pic_reg,
8152                                        compute_now);
8153
8154       if (CONST_INT_P (offset))
8155         {
8156           /* The base register doesn't really matter, we only want to
8157              test the index for the appropriate mode.  */
8158           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8159             {
8160               gcc_assert (can_create_pseudo_p ());
8161               offset = force_reg (Pmode, offset);
8162             }
8163
8164           if (CONST_INT_P (offset))
8165             return plus_constant (Pmode, base, INTVAL (offset));
8166         }
8167
8168       if (GET_MODE_SIZE (mode) > 4
8169           && (GET_MODE_CLASS (mode) == MODE_INT
8170               || TARGET_SOFT_FLOAT))
8171         {
8172           emit_insn (gen_addsi3 (reg, base, offset));
8173           return reg;
8174         }
8175
8176       return gen_rtx_PLUS (Pmode, base, offset);
8177     }
8178
8179   return orig;
8180 }
8181
8182
8183 /* Generate insns that produce the address of the stack canary */
8184 rtx
8185 arm_stack_protect_tls_canary_mem (bool reload)
8186 {
8187   rtx tp = gen_reg_rtx (SImode);
8188   if (reload)
8189     emit_insn (gen_reload_tp_hard (tp));
8190   else
8191     emit_insn (gen_load_tp_hard (tp));
8192
8193   rtx reg = gen_reg_rtx (SImode);
8194   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8195   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8196   return gen_rtx_MEM (SImode, reg);
8197 }
8198
8199
8200 /* Whether a register is callee saved or not.  This is necessary because high
8201    registers are marked as caller saved when optimizing for size on Thumb-1
8202    targets despite being callee saved in order to avoid using them.  */
8203 #define callee_saved_reg_p(reg) \
8204   (!call_used_or_fixed_reg_p (reg) \
8205    || (TARGET_THUMB1 && optimize_size \
8206        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8207
8208 /* Return a mask for the call-clobbered low registers that are unused
8209    at the end of the prologue.  */
8210 static unsigned long
8211 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8212 {
8213   unsigned long mask = 0;
8214   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8215
8216   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8217     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8218       mask |= 1 << (reg - FIRST_LO_REGNUM);
8219   return mask;
8220 }
8221
8222 /* Similarly for the start of the epilogue.  */
8223 static unsigned long
8224 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8225 {
8226   unsigned long mask = 0;
8227   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8228
8229   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8230     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8231       mask |= 1 << (reg - FIRST_LO_REGNUM);
8232   return mask;
8233 }
8234
8235 /* Find a spare register to use during the prolog of a function.  */
8236
8237 static int
8238 thumb_find_work_register (unsigned long pushed_regs_mask)
8239 {
8240   int reg;
8241
8242   unsigned long unused_regs
8243     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8244
8245   /* Check the argument registers first as these are call-used.  The
8246      register allocation order means that sometimes r3 might be used
8247      but earlier argument registers might not, so check them all.  */
8248   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8249     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8250       return reg;
8251
8252   /* Otherwise look for a call-saved register that is going to be pushed.  */
8253   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8254     if (pushed_regs_mask & (1 << reg))
8255       return reg;
8256
8257   if (TARGET_THUMB2)
8258     {
8259       /* Thumb-2 can use high regs.  */
8260       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8261         if (pushed_regs_mask & (1 << reg))
8262           return reg;
8263     }
8264   /* Something went wrong - thumb_compute_save_reg_mask()
8265      should have arranged for a suitable register to be pushed.  */
8266   gcc_unreachable ();
8267 }
8268
8269 static GTY(()) int pic_labelno;
8270
8271 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8272    low register.  */
8273
8274 void
8275 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8276 {
8277   rtx l1, labelno, pic_tmp, pic_rtx;
8278
8279   if (crtl->uses_pic_offset_table == 0
8280       || TARGET_SINGLE_PIC_BASE
8281       || TARGET_FDPIC)
8282     return;
8283
8284   gcc_assert (flag_pic);
8285
8286   if (pic_reg == NULL_RTX)
8287     pic_reg = cfun->machine->pic_reg;
8288   if (TARGET_VXWORKS_RTP)
8289     {
8290       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8291       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8292       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8293
8294       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8295
8296       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8297       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8298     }
8299   else
8300     {
8301       /* We use an UNSPEC rather than a LABEL_REF because this label
8302          never appears in the code stream.  */
8303
8304       labelno = GEN_INT (pic_labelno++);
8305       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8306       l1 = gen_rtx_CONST (VOIDmode, l1);
8307
8308       /* On the ARM the PC register contains 'dot + 8' at the time of the
8309          addition, on the Thumb it is 'dot + 4'.  */
8310       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8311       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8312                                 UNSPEC_GOTSYM_OFF);
8313       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8314
8315       if (TARGET_32BIT)
8316         {
8317           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8318         }
8319       else /* TARGET_THUMB1 */
8320         {
8321           if (arm_pic_register != INVALID_REGNUM
8322               && REGNO (pic_reg) > LAST_LO_REGNUM)
8323             {
8324               /* We will have pushed the pic register, so we should always be
8325                  able to find a work register.  */
8326               pic_tmp = gen_rtx_REG (SImode,
8327                                      thumb_find_work_register (saved_regs));
8328               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8329               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8330               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8331             }
8332           else if (arm_pic_register != INVALID_REGNUM
8333                    && arm_pic_register > LAST_LO_REGNUM
8334                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8335             {
8336               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8337               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8338               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8339             }
8340           else
8341             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8342         }
8343     }
8344
8345   /* Need to emit this whether or not we obey regdecls,
8346      since setjmp/longjmp can cause life info to screw up.  */
8347   emit_use (pic_reg);
8348 }
8349
8350 /* Try to determine whether an object, referenced via ORIG, will be
8351    placed in the text or data segment.  This is used in FDPIC mode, to
8352    decide which relocations to use when accessing ORIG.  *IS_READONLY
8353    is set to true if ORIG is a read-only location, false otherwise.
8354    Return true if we could determine the location of ORIG, false
8355    otherwise.  *IS_READONLY is valid only when we return true.  */
8356 static bool
8357 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8358 {
8359   *is_readonly = false;
8360
8361   if (LABEL_REF_P (orig))
8362     {
8363       *is_readonly = true;
8364       return true;
8365     }
8366
8367   if (SYMBOL_REF_P (orig))
8368     {
8369       if (CONSTANT_POOL_ADDRESS_P (orig))
8370         {
8371           *is_readonly = true;
8372           return true;
8373         }
8374       if (SYMBOL_REF_LOCAL_P (orig)
8375           && !SYMBOL_REF_EXTERNAL_P (orig)
8376           && SYMBOL_REF_DECL (orig)
8377           && (!DECL_P (SYMBOL_REF_DECL (orig))
8378               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8379         {
8380           tree decl = SYMBOL_REF_DECL (orig);
8381           tree init = (TREE_CODE (decl) == VAR_DECL)
8382             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8383             ? decl : 0;
8384           int reloc = 0;
8385           bool named_section, readonly;
8386
8387           if (init && init != error_mark_node)
8388             reloc = compute_reloc_for_constant (init);
8389
8390           named_section = TREE_CODE (decl) == VAR_DECL
8391             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8392           readonly = decl_readonly_section (decl, reloc);
8393
8394           /* We don't know where the link script will put a named
8395              section, so return false in such a case.  */
8396           if (named_section)
8397             return false;
8398
8399           *is_readonly = readonly;
8400           return true;
8401         }
8402
8403       /* We don't know.  */
8404       return false;
8405     }
8406
8407   gcc_unreachable ();
8408 }
8409
8410 /* Generate code to load the address of a static var when flag_pic is set.  */
8411 static rtx_insn *
8412 arm_pic_static_addr (rtx orig, rtx reg)
8413 {
8414   rtx l1, labelno, offset_rtx;
8415   rtx_insn *insn;
8416
8417   gcc_assert (flag_pic);
8418
8419   bool is_readonly = false;
8420   bool info_known = false;
8421
8422   if (TARGET_FDPIC
8423       && SYMBOL_REF_P (orig)
8424       && !SYMBOL_REF_FUNCTION_P (orig))
8425     info_known = arm_is_segment_info_known (orig, &is_readonly);
8426
8427   if (TARGET_FDPIC
8428       && SYMBOL_REF_P (orig)
8429       && !SYMBOL_REF_FUNCTION_P (orig)
8430       && !info_known)
8431     {
8432       /* We don't know where orig is stored, so we have be
8433          pessimistic and use a GOT relocation.  */
8434       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8435
8436       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8437     }
8438   else if (TARGET_FDPIC
8439            && SYMBOL_REF_P (orig)
8440            && (SYMBOL_REF_FUNCTION_P (orig)
8441                || !is_readonly))
8442     {
8443       /* We use the GOTOFF relocation.  */
8444       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8445
8446       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8447       emit_insn (gen_movsi (reg, l1));
8448       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8449     }
8450   else
8451     {
8452       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8453          PC-relative access.  */
8454       /* We use an UNSPEC rather than a LABEL_REF because this label
8455          never appears in the code stream.  */
8456       labelno = GEN_INT (pic_labelno++);
8457       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8458       l1 = gen_rtx_CONST (VOIDmode, l1);
8459
8460       /* On the ARM the PC register contains 'dot + 8' at the time of the
8461          addition, on the Thumb it is 'dot + 4'.  */
8462       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8463       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8464                                    UNSPEC_SYMBOL_OFFSET);
8465       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8466
8467       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8468                                                    labelno));
8469     }
8470
8471   return insn;
8472 }
8473
8474 /* Return nonzero if X is valid as an ARM state addressing register.  */
8475 static int
8476 arm_address_register_rtx_p (rtx x, int strict_p)
8477 {
8478   int regno;
8479
8480   if (!REG_P (x))
8481     return 0;
8482
8483   regno = REGNO (x);
8484
8485   if (strict_p)
8486     return ARM_REGNO_OK_FOR_BASE_P (regno);
8487
8488   return (regno <= LAST_ARM_REGNUM
8489           || regno >= FIRST_PSEUDO_REGISTER
8490           || regno == FRAME_POINTER_REGNUM
8491           || regno == ARG_POINTER_REGNUM);
8492 }
8493
8494 /* Return TRUE if this rtx is the difference of a symbol and a label,
8495    and will reduce to a PC-relative relocation in the object file.
8496    Expressions like this can be left alone when generating PIC, rather
8497    than forced through the GOT.  */
8498 static int
8499 pcrel_constant_p (rtx x)
8500 {
8501   if (GET_CODE (x) == MINUS)
8502     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8503
8504   return FALSE;
8505 }
8506
8507 /* Return true if X will surely end up in an index register after next
8508    splitting pass.  */
8509 static bool
8510 will_be_in_index_register (const_rtx x)
8511 {
8512   /* arm.md: calculate_pic_address will split this into a register.  */
8513   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8514 }
8515
8516 /* Return nonzero if X is a valid ARM state address operand.  */
8517 int
8518 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8519                                 int strict_p)
8520 {
8521   bool use_ldrd;
8522   enum rtx_code code = GET_CODE (x);
8523
8524   if (arm_address_register_rtx_p (x, strict_p))
8525     return 1;
8526
8527   use_ldrd = (TARGET_LDRD
8528               && (mode == DImode || mode == DFmode));
8529
8530   if (code == POST_INC || code == PRE_DEC
8531       || ((code == PRE_INC || code == POST_DEC)
8532           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8533     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8534
8535   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8536            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8537            && GET_CODE (XEXP (x, 1)) == PLUS
8538            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8539     {
8540       rtx addend = XEXP (XEXP (x, 1), 1);
8541
8542       /* Don't allow ldrd post increment by register because it's hard
8543          to fixup invalid register choices.  */
8544       if (use_ldrd
8545           && GET_CODE (x) == POST_MODIFY
8546           && REG_P (addend))
8547         return 0;
8548
8549       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8550               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8551     }
8552
8553   /* After reload constants split into minipools will have addresses
8554      from a LABEL_REF.  */
8555   else if (reload_completed
8556            && (code == LABEL_REF
8557                || (code == CONST
8558                    && GET_CODE (XEXP (x, 0)) == PLUS
8559                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8560                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8561     return 1;
8562
8563   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8564     return 0;
8565
8566   else if (code == PLUS)
8567     {
8568       rtx xop0 = XEXP (x, 0);
8569       rtx xop1 = XEXP (x, 1);
8570
8571       return ((arm_address_register_rtx_p (xop0, strict_p)
8572                && ((CONST_INT_P (xop1)
8573                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8574                    || (!strict_p && will_be_in_index_register (xop1))))
8575               || (arm_address_register_rtx_p (xop1, strict_p)
8576                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8577     }
8578
8579 #if 0
8580   /* Reload currently can't handle MINUS, so disable this for now */
8581   else if (GET_CODE (x) == MINUS)
8582     {
8583       rtx xop0 = XEXP (x, 0);
8584       rtx xop1 = XEXP (x, 1);
8585
8586       return (arm_address_register_rtx_p (xop0, strict_p)
8587               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8588     }
8589 #endif
8590
8591   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8592            && code == SYMBOL_REF
8593            && CONSTANT_POOL_ADDRESS_P (x)
8594            && ! (flag_pic
8595                  && symbol_mentioned_p (get_pool_constant (x))
8596                  && ! pcrel_constant_p (get_pool_constant (x))))
8597     return 1;
8598
8599   return 0;
8600 }
8601
8602 /* Return true if we can avoid creating a constant pool entry for x.  */
8603 static bool
8604 can_avoid_literal_pool_for_label_p (rtx x)
8605 {
8606   /* Normally we can assign constant values to target registers without
8607      the help of constant pool.  But there are cases we have to use constant
8608      pool like:
8609      1) assign a label to register.
8610      2) sign-extend a 8bit value to 32bit and then assign to register.
8611
8612      Constant pool access in format:
8613      (set (reg r0) (mem (symbol_ref (".LC0"))))
8614      will cause the use of literal pool (later in function arm_reorg).
8615      So here we mark such format as an invalid format, then the compiler
8616      will adjust it into:
8617      (set (reg r0) (symbol_ref (".LC0")))
8618      (set (reg r0) (mem (reg r0))).
8619      No extra register is required, and (mem (reg r0)) won't cause the use
8620      of literal pools.  */
8621   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8622       && CONSTANT_POOL_ADDRESS_P (x))
8623     return 1;
8624   return 0;
8625 }
8626
8627
8628 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8629 static int
8630 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8631 {
8632   bool use_ldrd;
8633   enum rtx_code code = GET_CODE (x);
8634
8635   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8636     return mve_vector_mem_operand (mode, x, strict_p);
8637
8638   if (arm_address_register_rtx_p (x, strict_p))
8639     return 1;
8640
8641   use_ldrd = (TARGET_LDRD
8642               && (mode == DImode || mode == DFmode));
8643
8644   if (code == POST_INC || code == PRE_DEC
8645       || ((code == PRE_INC || code == POST_DEC)
8646           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8647     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8648
8649   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8650            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8651            && GET_CODE (XEXP (x, 1)) == PLUS
8652            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8653     {
8654       /* Thumb-2 only has autoincrement by constant.  */
8655       rtx addend = XEXP (XEXP (x, 1), 1);
8656       HOST_WIDE_INT offset;
8657
8658       if (!CONST_INT_P (addend))
8659         return 0;
8660
8661       offset = INTVAL(addend);
8662       if (GET_MODE_SIZE (mode) <= 4)
8663         return (offset > -256 && offset < 256);
8664
8665       return (use_ldrd && offset > -1024 && offset < 1024
8666               && (offset & 3) == 0);
8667     }
8668
8669   /* After reload constants split into minipools will have addresses
8670      from a LABEL_REF.  */
8671   else if (reload_completed
8672            && (code == LABEL_REF
8673                || (code == CONST
8674                    && GET_CODE (XEXP (x, 0)) == PLUS
8675                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8676                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8677     return 1;
8678
8679   else if (mode == TImode
8680            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8681            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8682     return 0;
8683
8684   else if (code == PLUS)
8685     {
8686       rtx xop0 = XEXP (x, 0);
8687       rtx xop1 = XEXP (x, 1);
8688
8689       return ((arm_address_register_rtx_p (xop0, strict_p)
8690                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8691                    || (!strict_p && will_be_in_index_register (xop1))))
8692               || (arm_address_register_rtx_p (xop1, strict_p)
8693                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8694     }
8695
8696   else if (can_avoid_literal_pool_for_label_p (x))
8697     return 0;
8698
8699   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8700            && code == SYMBOL_REF
8701            && CONSTANT_POOL_ADDRESS_P (x)
8702            && ! (flag_pic
8703                  && symbol_mentioned_p (get_pool_constant (x))
8704                  && ! pcrel_constant_p (get_pool_constant (x))))
8705     return 1;
8706
8707   return 0;
8708 }
8709
8710 /* Return nonzero if INDEX is valid for an address index operand in
8711    ARM state.  */
8712 static int
8713 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8714                         int strict_p)
8715 {
8716   HOST_WIDE_INT range;
8717   enum rtx_code code = GET_CODE (index);
8718
8719   /* Standard coprocessor addressing modes.  */
8720   if (TARGET_HARD_FLOAT
8721       && (mode == SFmode || mode == DFmode))
8722     return (code == CONST_INT && INTVAL (index) < 1024
8723             && INTVAL (index) > -1024
8724             && (INTVAL (index) & 3) == 0);
8725
8726   /* For quad modes, we restrict the constant offset to be slightly less
8727      than what the instruction format permits.  We do this because for
8728      quad mode moves, we will actually decompose them into two separate
8729      double-mode reads or writes.  INDEX must therefore be a valid
8730      (double-mode) offset and so should INDEX+8.  */
8731   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8732     return (code == CONST_INT
8733             && INTVAL (index) < 1016
8734             && INTVAL (index) > -1024
8735             && (INTVAL (index) & 3) == 0);
8736
8737   /* We have no such constraint on double mode offsets, so we permit the
8738      full range of the instruction format.  */
8739   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8740     return (code == CONST_INT
8741             && INTVAL (index) < 1024
8742             && INTVAL (index) > -1024
8743             && (INTVAL (index) & 3) == 0);
8744
8745   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8746     return (code == CONST_INT
8747             && INTVAL (index) < 1024
8748             && INTVAL (index) > -1024
8749             && (INTVAL (index) & 3) == 0);
8750
8751   if (arm_address_register_rtx_p (index, strict_p)
8752       && (GET_MODE_SIZE (mode) <= 4))
8753     return 1;
8754
8755   if (mode == DImode || mode == DFmode)
8756     {
8757       if (code == CONST_INT)
8758         {
8759           HOST_WIDE_INT val = INTVAL (index);
8760
8761           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8762              If vldr is selected it uses arm_coproc_mem_operand.  */
8763           if (TARGET_LDRD)
8764             return val > -256 && val < 256;
8765           else
8766             return val > -4096 && val < 4092;
8767         }
8768
8769       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8770     }
8771
8772   if (GET_MODE_SIZE (mode) <= 4
8773       && ! (arm_arch4
8774             && (mode == HImode
8775                 || mode == HFmode
8776                 || (mode == QImode && outer == SIGN_EXTEND))))
8777     {
8778       if (code == MULT)
8779         {
8780           rtx xiop0 = XEXP (index, 0);
8781           rtx xiop1 = XEXP (index, 1);
8782
8783           return ((arm_address_register_rtx_p (xiop0, strict_p)
8784                    && power_of_two_operand (xiop1, SImode))
8785                   || (arm_address_register_rtx_p (xiop1, strict_p)
8786                       && power_of_two_operand (xiop0, SImode)));
8787         }
8788       else if (code == LSHIFTRT || code == ASHIFTRT
8789                || code == ASHIFT || code == ROTATERT)
8790         {
8791           rtx op = XEXP (index, 1);
8792
8793           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8794                   && CONST_INT_P (op)
8795                   && INTVAL (op) > 0
8796                   && INTVAL (op) <= 31);
8797         }
8798     }
8799
8800   /* For ARM v4 we may be doing a sign-extend operation during the
8801      load.  */
8802   if (arm_arch4)
8803     {
8804       if (mode == HImode
8805           || mode == HFmode
8806           || (outer == SIGN_EXTEND && mode == QImode))
8807         range = 256;
8808       else
8809         range = 4096;
8810     }
8811   else
8812     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8813
8814   return (code == CONST_INT
8815           && INTVAL (index) < range
8816           && INTVAL (index) > -range);
8817 }
8818
8819 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8820    index operand.  i.e. 1, 2, 4 or 8.  */
8821 static bool
8822 thumb2_index_mul_operand (rtx op)
8823 {
8824   HOST_WIDE_INT val;
8825
8826   if (!CONST_INT_P (op))
8827     return false;
8828
8829   val = INTVAL(op);
8830   return (val == 1 || val == 2 || val == 4 || val == 8);
8831 }
8832
8833 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8834 static int
8835 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8836 {
8837   enum rtx_code code = GET_CODE (index);
8838
8839   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8840   /* Standard coprocessor addressing modes.  */
8841   if (TARGET_VFP_BASE
8842       && (mode == SFmode || mode == DFmode))
8843     return (code == CONST_INT && INTVAL (index) < 1024
8844             /* Thumb-2 allows only > -256 index range for it's core register
8845                load/stores. Since we allow SF/DF in core registers, we have
8846                to use the intersection between -256~4096 (core) and -1024~1024
8847                (coprocessor).  */
8848             && INTVAL (index) > -256
8849             && (INTVAL (index) & 3) == 0);
8850
8851   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8852     {
8853       /* For DImode assume values will usually live in core regs
8854          and only allow LDRD addressing modes.  */
8855       if (!TARGET_LDRD || mode != DImode)
8856         return (code == CONST_INT
8857                 && INTVAL (index) < 1024
8858                 && INTVAL (index) > -1024
8859                 && (INTVAL (index) & 3) == 0);
8860     }
8861
8862   /* For quad modes, we restrict the constant offset to be slightly less
8863      than what the instruction format permits.  We do this because for
8864      quad mode moves, we will actually decompose them into two separate
8865      double-mode reads or writes.  INDEX must therefore be a valid
8866      (double-mode) offset and so should INDEX+8.  */
8867   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8868     return (code == CONST_INT
8869             && INTVAL (index) < 1016
8870             && INTVAL (index) > -1024
8871             && (INTVAL (index) & 3) == 0);
8872
8873   /* We have no such constraint on double mode offsets, so we permit the
8874      full range of the instruction format.  */
8875   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8876     return (code == CONST_INT
8877             && INTVAL (index) < 1024
8878             && INTVAL (index) > -1024
8879             && (INTVAL (index) & 3) == 0);
8880
8881   if (arm_address_register_rtx_p (index, strict_p)
8882       && (GET_MODE_SIZE (mode) <= 4))
8883     return 1;
8884
8885   if (mode == DImode || mode == DFmode)
8886     {
8887       if (code == CONST_INT)
8888         {
8889           HOST_WIDE_INT val = INTVAL (index);
8890           /* Thumb-2 ldrd only has reg+const addressing modes.
8891              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8892              If vldr is selected it uses arm_coproc_mem_operand.  */
8893           if (TARGET_LDRD)
8894             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8895           else
8896             return IN_RANGE (val, -255, 4095 - 4);
8897         }
8898       else
8899         return 0;
8900     }
8901
8902   if (code == MULT)
8903     {
8904       rtx xiop0 = XEXP (index, 0);
8905       rtx xiop1 = XEXP (index, 1);
8906
8907       return ((arm_address_register_rtx_p (xiop0, strict_p)
8908                && thumb2_index_mul_operand (xiop1))
8909               || (arm_address_register_rtx_p (xiop1, strict_p)
8910                   && thumb2_index_mul_operand (xiop0)));
8911     }
8912   else if (code == ASHIFT)
8913     {
8914       rtx op = XEXP (index, 1);
8915
8916       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8917               && CONST_INT_P (op)
8918               && INTVAL (op) > 0
8919               && INTVAL (op) <= 3);
8920     }
8921
8922   return (code == CONST_INT
8923           && INTVAL (index) < 4096
8924           && INTVAL (index) > -256);
8925 }
8926
8927 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8928 static int
8929 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8930 {
8931   int regno;
8932
8933   if (!REG_P (x))
8934     return 0;
8935
8936   regno = REGNO (x);
8937
8938   if (strict_p)
8939     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8940
8941   return (regno <= LAST_LO_REGNUM
8942           || regno > LAST_VIRTUAL_REGISTER
8943           || regno == FRAME_POINTER_REGNUM
8944           || (GET_MODE_SIZE (mode) >= 4
8945               && (regno == STACK_POINTER_REGNUM
8946                   || regno >= FIRST_PSEUDO_REGISTER
8947                   || x == hard_frame_pointer_rtx
8948                   || x == arg_pointer_rtx)));
8949 }
8950
8951 /* Return nonzero if x is a legitimate index register.  This is the case
8952    for any base register that can access a QImode object.  */
8953 inline static int
8954 thumb1_index_register_rtx_p (rtx x, int strict_p)
8955 {
8956   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8957 }
8958
8959 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8960
8961    The AP may be eliminated to either the SP or the FP, so we use the
8962    least common denominator, e.g. SImode, and offsets from 0 to 64.
8963
8964    ??? Verify whether the above is the right approach.
8965
8966    ??? Also, the FP may be eliminated to the SP, so perhaps that
8967    needs special handling also.
8968
8969    ??? Look at how the mips16 port solves this problem.  It probably uses
8970    better ways to solve some of these problems.
8971
8972    Although it is not incorrect, we don't accept QImode and HImode
8973    addresses based on the frame pointer or arg pointer until the
8974    reload pass starts.  This is so that eliminating such addresses
8975    into stack based ones won't produce impossible code.  */
8976 int
8977 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8978 {
8979   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8980     return 0;
8981
8982   /* ??? Not clear if this is right.  Experiment.  */
8983   if (GET_MODE_SIZE (mode) < 4
8984       && !(reload_in_progress || reload_completed)
8985       && (reg_mentioned_p (frame_pointer_rtx, x)
8986           || reg_mentioned_p (arg_pointer_rtx, x)
8987           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8988           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8989           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8990           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8991     return 0;
8992
8993   /* Accept any base register.  SP only in SImode or larger.  */
8994   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8995     return 1;
8996
8997   /* This is PC relative data before arm_reorg runs.  */
8998   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8999            && SYMBOL_REF_P (x)
9000            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9001            && !arm_disable_literal_pool)
9002     return 1;
9003
9004   /* This is PC relative data after arm_reorg runs.  */
9005   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9006            && reload_completed
9007            && (LABEL_REF_P (x)
9008                || (GET_CODE (x) == CONST
9009                    && GET_CODE (XEXP (x, 0)) == PLUS
9010                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9011                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9012     return 1;
9013
9014   /* Post-inc indexing only supported for SImode and larger.  */
9015   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9016            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9017     return 1;
9018
9019   else if (GET_CODE (x) == PLUS)
9020     {
9021       /* REG+REG address can be any two index registers.  */
9022       /* We disallow FRAME+REG addressing since we know that FRAME
9023          will be replaced with STACK, and SP relative addressing only
9024          permits SP+OFFSET.  */
9025       if (GET_MODE_SIZE (mode) <= 4
9026           && XEXP (x, 0) != frame_pointer_rtx
9027           && XEXP (x, 1) != frame_pointer_rtx
9028           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9029           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9030               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9031         return 1;
9032
9033       /* REG+const has 5-7 bit offset for non-SP registers.  */
9034       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9035                 || XEXP (x, 0) == arg_pointer_rtx)
9036                && CONST_INT_P (XEXP (x, 1))
9037                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9038         return 1;
9039
9040       /* REG+const has 10-bit offset for SP, but only SImode and
9041          larger is supported.  */
9042       /* ??? Should probably check for DI/DFmode overflow here
9043          just like GO_IF_LEGITIMATE_OFFSET does.  */
9044       else if (REG_P (XEXP (x, 0))
9045                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9046                && GET_MODE_SIZE (mode) >= 4
9047                && CONST_INT_P (XEXP (x, 1))
9048                && INTVAL (XEXP (x, 1)) >= 0
9049                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9050                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9051         return 1;
9052
9053       else if (REG_P (XEXP (x, 0))
9054                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9055                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9056                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9057                        && REGNO (XEXP (x, 0))
9058                           <= LAST_VIRTUAL_POINTER_REGISTER))
9059                && GET_MODE_SIZE (mode) >= 4
9060                && CONST_INT_P (XEXP (x, 1))
9061                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9062         return 1;
9063     }
9064
9065   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9066            && GET_MODE_SIZE (mode) == 4
9067            && SYMBOL_REF_P (x)
9068            && CONSTANT_POOL_ADDRESS_P (x)
9069            && !arm_disable_literal_pool
9070            && ! (flag_pic
9071                  && symbol_mentioned_p (get_pool_constant (x))
9072                  && ! pcrel_constant_p (get_pool_constant (x))))
9073     return 1;
9074
9075   return 0;
9076 }
9077
9078 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9079    instruction of mode MODE.  */
9080 int
9081 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9082 {
9083   switch (GET_MODE_SIZE (mode))
9084     {
9085     case 1:
9086       return val >= 0 && val < 32;
9087
9088     case 2:
9089       return val >= 0 && val < 64 && (val & 1) == 0;
9090
9091     default:
9092       return (val >= 0
9093               && (val + GET_MODE_SIZE (mode)) <= 128
9094               && (val & 3) == 0);
9095     }
9096 }
9097
9098 bool
9099 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9100 {
9101   if (TARGET_ARM)
9102     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9103   else if (TARGET_THUMB2)
9104     return thumb2_legitimate_address_p (mode, x, strict_p);
9105   else /* if (TARGET_THUMB1) */
9106     return thumb1_legitimate_address_p (mode, x, strict_p);
9107 }
9108
9109 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9110
9111    Given an rtx X being reloaded into a reg required to be
9112    in class CLASS, return the class of reg to actually use.
9113    In general this is just CLASS, but for the Thumb core registers and
9114    immediate constants we prefer a LO_REGS class or a subset.  */
9115
9116 static reg_class_t
9117 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9118 {
9119   if (TARGET_32BIT)
9120     return rclass;
9121   else
9122     {
9123       if (rclass == GENERAL_REGS)
9124         return LO_REGS;
9125       else
9126         return rclass;
9127     }
9128 }
9129
9130 /* Build the SYMBOL_REF for __tls_get_addr.  */
9131
9132 static GTY(()) rtx tls_get_addr_libfunc;
9133
9134 static rtx
9135 get_tls_get_addr (void)
9136 {
9137   if (!tls_get_addr_libfunc)
9138     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9139   return tls_get_addr_libfunc;
9140 }
9141
9142 rtx
9143 arm_load_tp (rtx target)
9144 {
9145   if (!target)
9146     target = gen_reg_rtx (SImode);
9147
9148   if (TARGET_HARD_TP)
9149     {
9150       /* Can return in any reg.  */
9151       emit_insn (gen_load_tp_hard (target));
9152     }
9153   else
9154     {
9155       /* Always returned in r0.  Immediately copy the result into a pseudo,
9156          otherwise other uses of r0 (e.g. setting up function arguments) may
9157          clobber the value.  */
9158
9159       rtx tmp;
9160
9161       if (TARGET_FDPIC)
9162         {
9163           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9164           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9165
9166           emit_insn (gen_load_tp_soft_fdpic ());
9167
9168           /* Restore r9.  */
9169           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9170         }
9171       else
9172         emit_insn (gen_load_tp_soft ());
9173
9174       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9175       emit_move_insn (target, tmp);
9176     }
9177   return target;
9178 }
9179
9180 static rtx
9181 load_tls_operand (rtx x, rtx reg)
9182 {
9183   rtx tmp;
9184
9185   if (reg == NULL_RTX)
9186     reg = gen_reg_rtx (SImode);
9187
9188   tmp = gen_rtx_CONST (SImode, x);
9189
9190   emit_move_insn (reg, tmp);
9191
9192   return reg;
9193 }
9194
9195 static rtx_insn *
9196 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9197 {
9198   rtx label, labelno = NULL_RTX, sum;
9199
9200   gcc_assert (reloc != TLS_DESCSEQ);
9201   start_sequence ();
9202
9203   if (TARGET_FDPIC)
9204     {
9205       sum = gen_rtx_UNSPEC (Pmode,
9206                             gen_rtvec (2, x, GEN_INT (reloc)),
9207                             UNSPEC_TLS);
9208     }
9209   else
9210     {
9211       labelno = GEN_INT (pic_labelno++);
9212       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9213       label = gen_rtx_CONST (VOIDmode, label);
9214
9215       sum = gen_rtx_UNSPEC (Pmode,
9216                             gen_rtvec (4, x, GEN_INT (reloc), label,
9217                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9218                             UNSPEC_TLS);
9219     }
9220   reg = load_tls_operand (sum, reg);
9221
9222   if (TARGET_FDPIC)
9223       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9224   else if (TARGET_ARM)
9225     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9226   else
9227     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9228
9229   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9230                                      LCT_PURE, /* LCT_CONST?  */
9231                                      Pmode, reg, Pmode);
9232
9233   rtx_insn *insns = get_insns ();
9234   end_sequence ();
9235
9236   return insns;
9237 }
9238
9239 static rtx
9240 arm_tls_descseq_addr (rtx x, rtx reg)
9241 {
9242   rtx labelno = GEN_INT (pic_labelno++);
9243   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9244   rtx sum = gen_rtx_UNSPEC (Pmode,
9245                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9246                                        gen_rtx_CONST (VOIDmode, label),
9247                                        GEN_INT (!TARGET_ARM)),
9248                             UNSPEC_TLS);
9249   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9250
9251   emit_insn (gen_tlscall (x, labelno));
9252   if (!reg)
9253     reg = gen_reg_rtx (SImode);
9254   else
9255     gcc_assert (REGNO (reg) != R0_REGNUM);
9256
9257   emit_move_insn (reg, reg0);
9258
9259   return reg;
9260 }
9261
9262
9263 rtx
9264 legitimize_tls_address (rtx x, rtx reg)
9265 {
9266   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9267   rtx_insn *insns;
9268   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9269
9270   switch (model)
9271     {
9272     case TLS_MODEL_GLOBAL_DYNAMIC:
9273       if (TARGET_GNU2_TLS)
9274         {
9275           gcc_assert (!TARGET_FDPIC);
9276
9277           reg = arm_tls_descseq_addr (x, reg);
9278
9279           tp = arm_load_tp (NULL_RTX);
9280
9281           dest = gen_rtx_PLUS (Pmode, tp, reg);
9282         }
9283       else
9284         {
9285           /* Original scheme */
9286           if (TARGET_FDPIC)
9287             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9288           else
9289             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9290           dest = gen_reg_rtx (Pmode);
9291           emit_libcall_block (insns, dest, ret, x);
9292         }
9293       return dest;
9294
9295     case TLS_MODEL_LOCAL_DYNAMIC:
9296       if (TARGET_GNU2_TLS)
9297         {
9298           gcc_assert (!TARGET_FDPIC);
9299
9300           reg = arm_tls_descseq_addr (x, reg);
9301
9302           tp = arm_load_tp (NULL_RTX);
9303
9304           dest = gen_rtx_PLUS (Pmode, tp, reg);
9305         }
9306       else
9307         {
9308           if (TARGET_FDPIC)
9309             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9310           else
9311             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9312
9313           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9314              share the LDM result with other LD model accesses.  */
9315           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9316                                 UNSPEC_TLS);
9317           dest = gen_reg_rtx (Pmode);
9318           emit_libcall_block (insns, dest, ret, eqv);
9319
9320           /* Load the addend.  */
9321           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9322                                                      GEN_INT (TLS_LDO32)),
9323                                    UNSPEC_TLS);
9324           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9325           dest = gen_rtx_PLUS (Pmode, dest, addend);
9326         }
9327       return dest;
9328
9329     case TLS_MODEL_INITIAL_EXEC:
9330       if (TARGET_FDPIC)
9331         {
9332           sum = gen_rtx_UNSPEC (Pmode,
9333                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9334                                 UNSPEC_TLS);
9335           reg = load_tls_operand (sum, reg);
9336           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9337           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9338         }
9339       else
9340         {
9341           labelno = GEN_INT (pic_labelno++);
9342           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9343           label = gen_rtx_CONST (VOIDmode, label);
9344           sum = gen_rtx_UNSPEC (Pmode,
9345                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9346                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9347                                 UNSPEC_TLS);
9348           reg = load_tls_operand (sum, reg);
9349
9350           if (TARGET_ARM)
9351             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9352           else if (TARGET_THUMB2)
9353             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9354           else
9355             {
9356               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9357               emit_move_insn (reg, gen_const_mem (SImode, reg));
9358             }
9359         }
9360
9361       tp = arm_load_tp (NULL_RTX);
9362
9363       return gen_rtx_PLUS (Pmode, tp, reg);
9364
9365     case TLS_MODEL_LOCAL_EXEC:
9366       tp = arm_load_tp (NULL_RTX);
9367
9368       reg = gen_rtx_UNSPEC (Pmode,
9369                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9370                             UNSPEC_TLS);
9371       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9372
9373       return gen_rtx_PLUS (Pmode, tp, reg);
9374
9375     default:
9376       abort ();
9377     }
9378 }
9379
9380 /* Try machine-dependent ways of modifying an illegitimate address
9381    to be legitimate.  If we find one, return the new, valid address.  */
9382 rtx
9383 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9384 {
9385   if (arm_tls_referenced_p (x))
9386     {
9387       rtx addend = NULL;
9388
9389       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9390         {
9391           addend = XEXP (XEXP (x, 0), 1);
9392           x = XEXP (XEXP (x, 0), 0);
9393         }
9394
9395       if (!SYMBOL_REF_P (x))
9396         return x;
9397
9398       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9399
9400       x = legitimize_tls_address (x, NULL_RTX);
9401
9402       if (addend)
9403         {
9404           x = gen_rtx_PLUS (SImode, x, addend);
9405           orig_x = x;
9406         }
9407       else
9408         return x;
9409     }
9410
9411   if (TARGET_THUMB1)
9412     return thumb_legitimize_address (x, orig_x, mode);
9413
9414   if (GET_CODE (x) == PLUS)
9415     {
9416       rtx xop0 = XEXP (x, 0);
9417       rtx xop1 = XEXP (x, 1);
9418
9419       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9420         xop0 = force_reg (SImode, xop0);
9421
9422       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9423           && !symbol_mentioned_p (xop1))
9424         xop1 = force_reg (SImode, xop1);
9425
9426       if (ARM_BASE_REGISTER_RTX_P (xop0)
9427           && CONST_INT_P (xop1))
9428         {
9429           HOST_WIDE_INT n, low_n;
9430           rtx base_reg, val;
9431           n = INTVAL (xop1);
9432
9433           /* VFP addressing modes actually allow greater offsets, but for
9434              now we just stick with the lowest common denominator.  */
9435           if (mode == DImode || mode == DFmode)
9436             {
9437               low_n = n & 0x0f;
9438               n &= ~0x0f;
9439               if (low_n > 4)
9440                 {
9441                   n += 16;
9442                   low_n -= 16;
9443                 }
9444             }
9445           else
9446             {
9447               low_n = ((mode) == TImode ? 0
9448                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9449               n -= low_n;
9450             }
9451
9452           base_reg = gen_reg_rtx (SImode);
9453           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9454           emit_move_insn (base_reg, val);
9455           x = plus_constant (Pmode, base_reg, low_n);
9456         }
9457       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9458         x = gen_rtx_PLUS (SImode, xop0, xop1);
9459     }
9460
9461   /* XXX We don't allow MINUS any more -- see comment in
9462      arm_legitimate_address_outer_p ().  */
9463   else if (GET_CODE (x) == MINUS)
9464     {
9465       rtx xop0 = XEXP (x, 0);
9466       rtx xop1 = XEXP (x, 1);
9467
9468       if (CONSTANT_P (xop0))
9469         xop0 = force_reg (SImode, xop0);
9470
9471       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9472         xop1 = force_reg (SImode, xop1);
9473
9474       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9475         x = gen_rtx_MINUS (SImode, xop0, xop1);
9476     }
9477
9478   /* Make sure to take full advantage of the pre-indexed addressing mode
9479      with absolute addresses which often allows for the base register to
9480      be factorized for multiple adjacent memory references, and it might
9481      even allows for the mini pool to be avoided entirely. */
9482   else if (CONST_INT_P (x) && optimize > 0)
9483     {
9484       unsigned int bits;
9485       HOST_WIDE_INT mask, base, index;
9486       rtx base_reg;
9487
9488       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9489          only use a 8-bit index. So let's use a 12-bit index for
9490          SImode only and hope that arm_gen_constant will enable LDRB
9491          to use more bits. */
9492       bits = (mode == SImode) ? 12 : 8;
9493       mask = (1 << bits) - 1;
9494       base = INTVAL (x) & ~mask;
9495       index = INTVAL (x) & mask;
9496       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9497         {
9498           /* It'll most probably be more efficient to generate the
9499              base with more bits set and use a negative index instead.
9500              Don't do this for Thumb as negative offsets are much more
9501              limited.  */
9502           base |= mask;
9503           index -= mask;
9504         }
9505       base_reg = force_reg (SImode, GEN_INT (base));
9506       x = plus_constant (Pmode, base_reg, index);
9507     }
9508
9509   if (flag_pic)
9510     {
9511       /* We need to find and carefully transform any SYMBOL and LABEL
9512          references; so go back to the original address expression.  */
9513       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9514                                           false /*compute_now*/);
9515
9516       if (new_x != orig_x)
9517         x = new_x;
9518     }
9519
9520   return x;
9521 }
9522
9523
9524 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9525    to be legitimate.  If we find one, return the new, valid address.  */
9526 rtx
9527 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9528 {
9529   if (GET_CODE (x) == PLUS
9530       && CONST_INT_P (XEXP (x, 1))
9531       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9532           || INTVAL (XEXP (x, 1)) < 0))
9533     {
9534       rtx xop0 = XEXP (x, 0);
9535       rtx xop1 = XEXP (x, 1);
9536       HOST_WIDE_INT offset = INTVAL (xop1);
9537
9538       /* Try and fold the offset into a biasing of the base register and
9539          then offsetting that.  Don't do this when optimizing for space
9540          since it can cause too many CSEs.  */
9541       if (optimize_size && offset >= 0
9542           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9543         {
9544           HOST_WIDE_INT delta;
9545
9546           if (offset >= 256)
9547             delta = offset - (256 - GET_MODE_SIZE (mode));
9548           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9549             delta = 31 * GET_MODE_SIZE (mode);
9550           else
9551             delta = offset & (~31 * GET_MODE_SIZE (mode));
9552
9553           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9554                                 NULL_RTX);
9555           x = plus_constant (Pmode, xop0, delta);
9556         }
9557       else if (offset < 0 && offset > -256)
9558         /* Small negative offsets are best done with a subtract before the
9559            dereference, forcing these into a register normally takes two
9560            instructions.  */
9561         x = force_operand (x, NULL_RTX);
9562       else
9563         {
9564           /* For the remaining cases, force the constant into a register.  */
9565           xop1 = force_reg (SImode, xop1);
9566           x = gen_rtx_PLUS (SImode, xop0, xop1);
9567         }
9568     }
9569   else if (GET_CODE (x) == PLUS
9570            && s_register_operand (XEXP (x, 1), SImode)
9571            && !s_register_operand (XEXP (x, 0), SImode))
9572     {
9573       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9574
9575       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9576     }
9577
9578   if (flag_pic)
9579     {
9580       /* We need to find and carefully transform any SYMBOL and LABEL
9581          references; so go back to the original address expression.  */
9582       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9583                                           false /*compute_now*/);
9584
9585       if (new_x != orig_x)
9586         x = new_x;
9587     }
9588
9589   return x;
9590 }
9591
9592 /* Return TRUE if X contains any TLS symbol references.  */
9593
9594 bool
9595 arm_tls_referenced_p (rtx x)
9596 {
9597   if (! TARGET_HAVE_TLS)
9598     return false;
9599
9600   subrtx_iterator::array_type array;
9601   FOR_EACH_SUBRTX (iter, array, x, ALL)
9602     {
9603       const_rtx x = *iter;
9604       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9605         {
9606           /* ARM currently does not provide relocations to encode TLS variables
9607              into AArch32 instructions, only data, so there is no way to
9608              currently implement these if a literal pool is disabled.  */
9609           if (arm_disable_literal_pool)
9610             sorry ("accessing thread-local storage is not currently supported "
9611                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9612
9613           return true;
9614         }
9615
9616       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9617          TLS offsets, not real symbol references.  */
9618       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9619         iter.skip_subrtxes ();
9620     }
9621   return false;
9622 }
9623
9624 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9625
9626    On the ARM, allow any integer (invalid ones are removed later by insn
9627    patterns), nice doubles and symbol_refs which refer to the function's
9628    constant pool XXX.
9629
9630    When generating pic allow anything.  */
9631
9632 static bool
9633 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9634 {
9635   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9636     return false;
9637
9638   return flag_pic || !label_mentioned_p (x);
9639 }
9640
9641 static bool
9642 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9643 {
9644   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9645      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9646      for ARMv8-M Baseline or later the result is valid.  */
9647   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9648     x = XEXP (x, 0);
9649
9650   return (CONST_INT_P (x)
9651           || CONST_DOUBLE_P (x)
9652           || CONSTANT_ADDRESS_P (x)
9653           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9654           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9655              we build the symbol address with upper/lower
9656              relocations.  */
9657           || (TARGET_THUMB1
9658               && !label_mentioned_p (x)
9659               && arm_valid_symbolic_address_p (x)
9660               && arm_disable_literal_pool)
9661           || flag_pic);
9662 }
9663
9664 static bool
9665 arm_legitimate_constant_p (machine_mode mode, rtx x)
9666 {
9667   return (!arm_cannot_force_const_mem (mode, x)
9668           && (TARGET_32BIT
9669               ? arm_legitimate_constant_p_1 (mode, x)
9670               : thumb_legitimate_constant_p (mode, x)));
9671 }
9672
9673 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9674
9675 static bool
9676 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9677 {
9678   rtx base, offset;
9679   split_const (x, &base, &offset);
9680
9681   if (SYMBOL_REF_P (base))
9682     {
9683       /* Function symbols cannot have an offset due to the Thumb bit.  */
9684       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9685           && INTVAL (offset) != 0)
9686         return true;
9687
9688       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9689           && !offset_within_block_p (base, INTVAL (offset)))
9690         return true;
9691     }
9692   return arm_tls_referenced_p (x);
9693 }
9694 \f
9695 #define REG_OR_SUBREG_REG(X)                                            \
9696   (REG_P (X)                                                    \
9697    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9698
9699 #define REG_OR_SUBREG_RTX(X)                    \
9700    (REG_P (X) ? (X) : SUBREG_REG (X))
9701
9702 static inline int
9703 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9704 {
9705   machine_mode mode = GET_MODE (x);
9706   int total, words;
9707
9708   switch (code)
9709     {
9710     case ASHIFT:
9711     case ASHIFTRT:
9712     case LSHIFTRT:
9713     case ROTATERT:
9714       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9715
9716     case PLUS:
9717     case MINUS:
9718     case COMPARE:
9719     case NEG:
9720     case NOT:
9721       return COSTS_N_INSNS (1);
9722
9723     case MULT:
9724       if (arm_arch6m && arm_m_profile_small_mul)
9725         return COSTS_N_INSNS (32);
9726
9727       if (CONST_INT_P (XEXP (x, 1)))
9728         {
9729           int cycles = 0;
9730           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9731
9732           while (i)
9733             {
9734               i >>= 2;
9735               cycles++;
9736             }
9737           return COSTS_N_INSNS (2) + cycles;
9738         }
9739       return COSTS_N_INSNS (1) + 16;
9740
9741     case SET:
9742       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9743          the mode.  */
9744       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9745       return (COSTS_N_INSNS (words)
9746               + 4 * ((MEM_P (SET_SRC (x)))
9747                      + MEM_P (SET_DEST (x))));
9748
9749     case CONST_INT:
9750       if (outer == SET)
9751         {
9752           if (UINTVAL (x) < 256
9753               /* 16-bit constant.  */
9754               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9755             return 0;
9756           if (thumb_shiftable_const (INTVAL (x)))
9757             return COSTS_N_INSNS (2);
9758           return arm_disable_literal_pool
9759             ? COSTS_N_INSNS (8)
9760             : COSTS_N_INSNS (3);
9761         }
9762       else if ((outer == PLUS || outer == COMPARE)
9763                && INTVAL (x) < 256 && INTVAL (x) > -256)
9764         return 0;
9765       else if ((outer == IOR || outer == XOR || outer == AND)
9766                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9767         return COSTS_N_INSNS (1);
9768       else if (outer == AND)
9769         {
9770           int i;
9771           /* This duplicates the tests in the andsi3 expander.  */
9772           for (i = 9; i <= 31; i++)
9773             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9774                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9775               return COSTS_N_INSNS (2);
9776         }
9777       else if (outer == ASHIFT || outer == ASHIFTRT
9778                || outer == LSHIFTRT)
9779         return 0;
9780       return COSTS_N_INSNS (2);
9781
9782     case CONST:
9783     case CONST_DOUBLE:
9784     case LABEL_REF:
9785     case SYMBOL_REF:
9786       return COSTS_N_INSNS (3);
9787
9788     case UDIV:
9789     case UMOD:
9790     case DIV:
9791     case MOD:
9792       return 100;
9793
9794     case TRUNCATE:
9795       return 99;
9796
9797     case AND:
9798     case XOR:
9799     case IOR:
9800       /* XXX guess.  */
9801       return 8;
9802
9803     case MEM:
9804       /* XXX another guess.  */
9805       /* Memory costs quite a lot for the first word, but subsequent words
9806          load at the equivalent of a single insn each.  */
9807       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9808               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9809                  ? 4 : 0));
9810
9811     case IF_THEN_ELSE:
9812       /* XXX a guess.  */
9813       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9814         return 14;
9815       return 2;
9816
9817     case SIGN_EXTEND:
9818     case ZERO_EXTEND:
9819       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9820       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9821
9822       if (mode == SImode)
9823         return total;
9824
9825       if (arm_arch6)
9826         return total + COSTS_N_INSNS (1);
9827
9828       /* Assume a two-shift sequence.  Increase the cost slightly so
9829          we prefer actual shifts over an extend operation.  */
9830       return total + 1 + COSTS_N_INSNS (2);
9831
9832     default:
9833       return 99;
9834     }
9835 }
9836
9837 /* Estimates the size cost of thumb1 instructions.
9838    For now most of the code is copied from thumb1_rtx_costs. We need more
9839    fine grain tuning when we have more related test cases.  */
9840 static inline int
9841 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9842 {
9843   machine_mode mode = GET_MODE (x);
9844   int words, cost;
9845
9846   switch (code)
9847     {
9848     case ASHIFT:
9849     case ASHIFTRT:
9850     case LSHIFTRT:
9851     case ROTATERT:
9852       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9853
9854     case PLUS:
9855     case MINUS:
9856       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9857          defined by RTL expansion, especially for the expansion of
9858          multiplication.  */
9859       if ((GET_CODE (XEXP (x, 0)) == MULT
9860            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9861           || (GET_CODE (XEXP (x, 1)) == MULT
9862               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9863         return COSTS_N_INSNS (2);
9864       /* Fall through.  */
9865     case COMPARE:
9866     case NEG:
9867     case NOT:
9868       return COSTS_N_INSNS (1);
9869
9870     case MULT:
9871       if (CONST_INT_P (XEXP (x, 1)))
9872         {
9873           /* Thumb1 mul instruction can't operate on const. We must Load it
9874              into a register first.  */
9875           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9876           /* For the targets which have a very small and high-latency multiply
9877              unit, we prefer to synthesize the mult with up to 5 instructions,
9878              giving a good balance between size and performance.  */
9879           if (arm_arch6m && arm_m_profile_small_mul)
9880             return COSTS_N_INSNS (5);
9881           else
9882             return COSTS_N_INSNS (1) + const_size;
9883         }
9884       return COSTS_N_INSNS (1);
9885
9886     case SET:
9887       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9888          the mode.  */
9889       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9890       cost = COSTS_N_INSNS (words);
9891       if (satisfies_constraint_J (SET_SRC (x))
9892           || satisfies_constraint_K (SET_SRC (x))
9893              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9894           || (CONST_INT_P (SET_SRC (x))
9895               && UINTVAL (SET_SRC (x)) >= 256
9896               && TARGET_HAVE_MOVT
9897               && satisfies_constraint_j (SET_SRC (x)))
9898              /* thumb1_movdi_insn.  */
9899           || ((words > 1) && MEM_P (SET_SRC (x))))
9900         cost += COSTS_N_INSNS (1);
9901       return cost;
9902
9903     case CONST_INT:
9904       if (outer == SET)
9905         {
9906           if (UINTVAL (x) < 256)
9907             return COSTS_N_INSNS (1);
9908           /* movw is 4byte long.  */
9909           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9910             return COSTS_N_INSNS (2);
9911           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9912           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9913             return COSTS_N_INSNS (2);
9914           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9915           if (thumb_shiftable_const (INTVAL (x)))
9916             return COSTS_N_INSNS (2);
9917           return arm_disable_literal_pool
9918             ? COSTS_N_INSNS (8)
9919             : COSTS_N_INSNS (3);
9920         }
9921       else if ((outer == PLUS || outer == COMPARE)
9922                && INTVAL (x) < 256 && INTVAL (x) > -256)
9923         return 0;
9924       else if ((outer == IOR || outer == XOR || outer == AND)
9925                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9926         return COSTS_N_INSNS (1);
9927       else if (outer == AND)
9928         {
9929           int i;
9930           /* This duplicates the tests in the andsi3 expander.  */
9931           for (i = 9; i <= 31; i++)
9932             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9933                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9934               return COSTS_N_INSNS (2);
9935         }
9936       else if (outer == ASHIFT || outer == ASHIFTRT
9937                || outer == LSHIFTRT)
9938         return 0;
9939       return COSTS_N_INSNS (2);
9940
9941     case CONST:
9942     case CONST_DOUBLE:
9943     case LABEL_REF:
9944     case SYMBOL_REF:
9945       return COSTS_N_INSNS (3);
9946
9947     case UDIV:
9948     case UMOD:
9949     case DIV:
9950     case MOD:
9951       return 100;
9952
9953     case TRUNCATE:
9954       return 99;
9955
9956     case AND:
9957     case XOR:
9958     case IOR:
9959       return COSTS_N_INSNS (1);
9960
9961     case MEM:
9962       return (COSTS_N_INSNS (1)
9963               + COSTS_N_INSNS (1)
9964                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9965               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9966                  ? COSTS_N_INSNS (1) : 0));
9967
9968     case IF_THEN_ELSE:
9969       /* XXX a guess.  */
9970       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9971         return 14;
9972       return 2;
9973
9974     case ZERO_EXTEND:
9975       /* XXX still guessing.  */
9976       switch (GET_MODE (XEXP (x, 0)))
9977         {
9978           case E_QImode:
9979             return (1 + (mode == DImode ? 4 : 0)
9980                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9981
9982           case E_HImode:
9983             return (4 + (mode == DImode ? 4 : 0)
9984                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9985
9986           case E_SImode:
9987             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9988
9989           default:
9990             return 99;
9991         }
9992
9993     default:
9994       return 99;
9995     }
9996 }
9997
9998 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9999    PLUS, adds the carry flag, then return the other operand.  If
10000    neither is a carry, return OP unchanged.  */
10001 static rtx
10002 strip_carry_operation (rtx op)
10003 {
10004   gcc_assert (GET_CODE (op) == PLUS);
10005   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10006     return XEXP (op, 1);
10007   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10008     return XEXP (op, 0);
10009   return op;
10010 }
10011
10012 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10013    operand, then return the operand that is being shifted.  If the shift
10014    is not by a constant, then set SHIFT_REG to point to the operand.
10015    Return NULL if OP is not a shifter operand.  */
10016 static rtx
10017 shifter_op_p (rtx op, rtx *shift_reg)
10018 {
10019   enum rtx_code code = GET_CODE (op);
10020
10021   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10022       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10023     return XEXP (op, 0);
10024   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10025     return XEXP (op, 0);
10026   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10027            || code == ASHIFTRT)
10028     {
10029       if (!CONST_INT_P (XEXP (op, 1)))
10030         *shift_reg = XEXP (op, 1);
10031       return XEXP (op, 0);
10032     }
10033
10034   return NULL;
10035 }
10036
10037 static bool
10038 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10039 {
10040   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10041   rtx_code code = GET_CODE (x);
10042   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10043
10044   switch (XINT (x, 1))
10045     {
10046     case UNSPEC_UNALIGNED_LOAD:
10047       /* We can only do unaligned loads into the integer unit, and we can't
10048          use LDM or LDRD.  */
10049       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10050       if (speed_p)
10051         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10052                   + extra_cost->ldst.load_unaligned);
10053
10054 #ifdef NOT_YET
10055       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10056                                  ADDR_SPACE_GENERIC, speed_p);
10057 #endif
10058       return true;
10059
10060     case UNSPEC_UNALIGNED_STORE:
10061       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10062       if (speed_p)
10063         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10064                   + extra_cost->ldst.store_unaligned);
10065
10066       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10067 #ifdef NOT_YET
10068       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10069                                  ADDR_SPACE_GENERIC, speed_p);
10070 #endif
10071       return true;
10072
10073     case UNSPEC_VRINTZ:
10074     case UNSPEC_VRINTP:
10075     case UNSPEC_VRINTM:
10076     case UNSPEC_VRINTR:
10077     case UNSPEC_VRINTX:
10078     case UNSPEC_VRINTA:
10079       if (speed_p)
10080         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10081
10082       return true;
10083     default:
10084       *cost = COSTS_N_INSNS (2);
10085       break;
10086     }
10087   return true;
10088 }
10089
10090 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10091    call (one insn for -Os) and then one for processing the result.  */
10092 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10093
10094 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10095         do                                                              \
10096           {                                                             \
10097             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10098             if (shift_op != NULL                                        \
10099                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10100               {                                                         \
10101                 if (shift_reg)                                          \
10102                   {                                                     \
10103                     if (speed_p)                                        \
10104                       *cost += extra_cost->alu.arith_shift_reg;         \
10105                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10106                                        ASHIFT, 1, speed_p);             \
10107                   }                                                     \
10108                 else if (speed_p)                                       \
10109                   *cost += extra_cost->alu.arith_shift;                 \
10110                                                                         \
10111                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10112                                     ASHIFT, 0, speed_p)                 \
10113                           + rtx_cost (XEXP (x, 1 - IDX),                \
10114                                       GET_MODE (shift_op),              \
10115                                       OP, 1, speed_p));                 \
10116                 return true;                                            \
10117               }                                                         \
10118           }                                                             \
10119         while (0)
10120
10121 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10122    considering the costs of the addressing mode and memory access
10123    separately.  */
10124 static bool
10125 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10126                int *cost, bool speed_p)
10127 {
10128   machine_mode mode = GET_MODE (x);
10129
10130   *cost = COSTS_N_INSNS (1);
10131
10132   if (flag_pic
10133       && GET_CODE (XEXP (x, 0)) == PLUS
10134       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10135     /* This will be split into two instructions.  Add the cost of the
10136        additional instruction here.  The cost of the memory access is computed
10137        below.  See arm.md:calculate_pic_address.  */
10138     *cost += COSTS_N_INSNS (1);
10139
10140   /* Calculate cost of the addressing mode.  */
10141   if (speed_p)
10142     {
10143       arm_addr_mode_op op_type;
10144       switch (GET_CODE (XEXP (x, 0)))
10145         {
10146         default:
10147         case REG:
10148           op_type = AMO_DEFAULT;
10149           break;
10150         case MINUS:
10151           /* MINUS does not appear in RTL, but the architecture supports it,
10152              so handle this case defensively.  */
10153           /* fall through */
10154         case PLUS:
10155           op_type = AMO_NO_WB;
10156           break;
10157         case PRE_INC:
10158         case PRE_DEC:
10159         case POST_INC:
10160         case POST_DEC:
10161         case PRE_MODIFY:
10162         case POST_MODIFY:
10163           op_type = AMO_WB;
10164           break;
10165         }
10166
10167       if (VECTOR_MODE_P (mode))
10168           *cost += current_tune->addr_mode_costs->vector[op_type];
10169       else if (FLOAT_MODE_P (mode))
10170           *cost += current_tune->addr_mode_costs->fp[op_type];
10171       else
10172           *cost += current_tune->addr_mode_costs->integer[op_type];
10173     }
10174
10175   /* Calculate cost of memory access.  */
10176   if (speed_p)
10177     {
10178       if (FLOAT_MODE_P (mode))
10179         {
10180           if (GET_MODE_SIZE (mode) == 8)
10181             *cost += extra_cost->ldst.loadd;
10182           else
10183             *cost += extra_cost->ldst.loadf;
10184         }
10185       else if (VECTOR_MODE_P (mode))
10186         *cost += extra_cost->ldst.loadv;
10187       else
10188         {
10189           /* Integer modes */
10190           if (GET_MODE_SIZE (mode) == 8)
10191             *cost += extra_cost->ldst.ldrd;
10192           else
10193             *cost += extra_cost->ldst.load;
10194         }
10195     }
10196
10197   return true;
10198 }
10199
10200 /* Helper for arm_bfi_p.  */
10201 static bool
10202 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10203 {
10204   unsigned HOST_WIDE_INT const1;
10205   unsigned HOST_WIDE_INT const2 = 0;
10206
10207   if (!CONST_INT_P (XEXP (op0, 1)))
10208     return false;
10209
10210   const1 = UINTVAL (XEXP (op0, 1));
10211   if (!CONST_INT_P (XEXP (op1, 1))
10212       || ~UINTVAL (XEXP (op1, 1)) != const1)
10213     return false;
10214
10215   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10216       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10217     {
10218       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10219       *sub0 = XEXP (XEXP (op0, 0), 0);
10220     }
10221   else
10222     *sub0 = XEXP (op0, 0);
10223
10224   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10225     return false;
10226
10227   *sub1 = XEXP (op1, 0);
10228   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10229 }
10230
10231 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10232    format looks something like:
10233
10234    (IOR (AND (reg1) (~const1))
10235         (AND (ASHIFT (reg2) (const2))
10236              (const1)))
10237
10238    where const1 is a consecutive sequence of 1-bits with the
10239    least-significant non-zero bit starting at bit position const2.  If
10240    const2 is zero, then the shift will not appear at all, due to
10241    canonicalization.  The two arms of the IOR expression may be
10242    flipped.  */
10243 static bool
10244 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10245 {
10246   if (GET_CODE (x) != IOR)
10247     return false;
10248   if (GET_CODE (XEXP (x, 0)) != AND
10249       || GET_CODE (XEXP (x, 1)) != AND)
10250     return false;
10251   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10252           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10253 }
10254
10255 /* RTX costs.  Make an estimate of the cost of executing the operation
10256    X, which is contained within an operation with code OUTER_CODE.
10257    SPEED_P indicates whether the cost desired is the performance cost,
10258    or the size cost.  The estimate is stored in COST and the return
10259    value is TRUE if the cost calculation is final, or FALSE if the
10260    caller should recurse through the operands of X to add additional
10261    costs.
10262
10263    We currently make no attempt to model the size savings of Thumb-2
10264    16-bit instructions.  At the normal points in compilation where
10265    this code is called we have no measure of whether the condition
10266    flags are live or not, and thus no realistic way to determine what
10267    the size will eventually be.  */
10268 static bool
10269 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10270                    const struct cpu_cost_table *extra_cost,
10271                    int *cost, bool speed_p)
10272 {
10273   machine_mode mode = GET_MODE (x);
10274
10275   *cost = COSTS_N_INSNS (1);
10276
10277   if (TARGET_THUMB1)
10278     {
10279       if (speed_p)
10280         *cost = thumb1_rtx_costs (x, code, outer_code);
10281       else
10282         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10283       return true;
10284     }
10285
10286   switch (code)
10287     {
10288     case SET:
10289       *cost = 0;
10290       /* SET RTXs don't have a mode so we get it from the destination.  */
10291       mode = GET_MODE (SET_DEST (x));
10292
10293       if (REG_P (SET_SRC (x))
10294           && REG_P (SET_DEST (x)))
10295         {
10296           /* Assume that most copies can be done with a single insn,
10297              unless we don't have HW FP, in which case everything
10298              larger than word mode will require two insns.  */
10299           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10300                                    && GET_MODE_SIZE (mode) > 4)
10301                                   || mode == DImode)
10302                                  ? 2 : 1);
10303           /* Conditional register moves can be encoded
10304              in 16 bits in Thumb mode.  */
10305           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10306             *cost >>= 1;
10307
10308           return true;
10309         }
10310
10311       if (CONST_INT_P (SET_SRC (x)))
10312         {
10313           /* Handle CONST_INT here, since the value doesn't have a mode
10314              and we would otherwise be unable to work out the true cost.  */
10315           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10316                             0, speed_p);
10317           outer_code = SET;
10318           /* Slightly lower the cost of setting a core reg to a constant.
10319              This helps break up chains and allows for better scheduling.  */
10320           if (REG_P (SET_DEST (x))
10321               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10322             *cost -= 1;
10323           x = SET_SRC (x);
10324           /* Immediate moves with an immediate in the range [0, 255] can be
10325              encoded in 16 bits in Thumb mode.  */
10326           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10327               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10328             *cost >>= 1;
10329           goto const_int_cost;
10330         }
10331
10332       return false;
10333
10334     case MEM:
10335       return arm_mem_costs (x, extra_cost, cost, speed_p);
10336
10337     case PARALLEL:
10338     {
10339    /* Calculations of LDM costs are complex.  We assume an initial cost
10340    (ldm_1st) which will load the number of registers mentioned in
10341    ldm_regs_per_insn_1st registers; then each additional
10342    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10343    formula for N regs is thus:
10344
10345    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10346                              + ldm_regs_per_insn_subsequent - 1)
10347                             / ldm_regs_per_insn_subsequent).
10348
10349    Additional costs may also be added for addressing.  A similar
10350    formula is used for STM.  */
10351
10352       bool is_ldm = load_multiple_operation (x, SImode);
10353       bool is_stm = store_multiple_operation (x, SImode);
10354
10355       if (is_ldm || is_stm)
10356         {
10357           if (speed_p)
10358             {
10359               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10360               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10361                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10362                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10363               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10364                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10365                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10366
10367               *cost += regs_per_insn_1st
10368                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10369                                             + regs_per_insn_sub - 1)
10370                                           / regs_per_insn_sub);
10371               return true;
10372             }
10373
10374         }
10375       return false;
10376     }
10377     case DIV:
10378     case UDIV:
10379       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10380           && (mode == SFmode || !TARGET_VFP_SINGLE))
10381         *cost += COSTS_N_INSNS (speed_p
10382                                ? extra_cost->fp[mode != SFmode].div : 0);
10383       else if (mode == SImode && TARGET_IDIV)
10384         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10385       else
10386         *cost = LIBCALL_COST (2);
10387
10388       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10389          possible udiv is prefered.  */
10390       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10391       return false;     /* All arguments must be in registers.  */
10392
10393     case MOD:
10394       /* MOD by a power of 2 can be expanded as:
10395          rsbs    r1, r0, #0
10396          and     r0, r0, #(n - 1)
10397          and     r1, r1, #(n - 1)
10398          rsbpl   r0, r1, #0.  */
10399       if (CONST_INT_P (XEXP (x, 1))
10400           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10401           && mode == SImode)
10402         {
10403           *cost += COSTS_N_INSNS (3);
10404
10405           if (speed_p)
10406             *cost += 2 * extra_cost->alu.logical
10407                      + extra_cost->alu.arith;
10408           return true;
10409         }
10410
10411     /* Fall-through.  */
10412     case UMOD:
10413       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10414          possible udiv is prefered.  */
10415       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10416       return false;     /* All arguments must be in registers.  */
10417
10418     case ROTATE:
10419       if (mode == SImode && REG_P (XEXP (x, 1)))
10420         {
10421           *cost += (COSTS_N_INSNS (1)
10422                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10423           if (speed_p)
10424             *cost += extra_cost->alu.shift_reg;
10425           return true;
10426         }
10427       /* Fall through */
10428     case ROTATERT:
10429     case ASHIFT:
10430     case LSHIFTRT:
10431     case ASHIFTRT:
10432       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10433         {
10434           *cost += (COSTS_N_INSNS (2)
10435                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10436           if (speed_p)
10437             *cost += 2 * extra_cost->alu.shift;
10438           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10439           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10440             *cost += 1;
10441           return true;
10442         }
10443       else if (mode == SImode)
10444         {
10445           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10446           /* Slightly disparage register shifts at -Os, but not by much.  */
10447           if (!CONST_INT_P (XEXP (x, 1)))
10448             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10449                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10450           return true;
10451         }
10452       else if (GET_MODE_CLASS (mode) == MODE_INT
10453                && GET_MODE_SIZE (mode) < 4)
10454         {
10455           if (code == ASHIFT)
10456             {
10457               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10458               /* Slightly disparage register shifts at -Os, but not by
10459                  much.  */
10460               if (!CONST_INT_P (XEXP (x, 1)))
10461                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10462                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10463             }
10464           else if (code == LSHIFTRT || code == ASHIFTRT)
10465             {
10466               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10467                 {
10468                   /* Can use SBFX/UBFX.  */
10469                   if (speed_p)
10470                     *cost += extra_cost->alu.bfx;
10471                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10472                 }
10473               else
10474                 {
10475                   *cost += COSTS_N_INSNS (1);
10476                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10477                   if (speed_p)
10478                     {
10479                       if (CONST_INT_P (XEXP (x, 1)))
10480                         *cost += 2 * extra_cost->alu.shift;
10481                       else
10482                         *cost += (extra_cost->alu.shift
10483                                   + extra_cost->alu.shift_reg);
10484                     }
10485                   else
10486                     /* Slightly disparage register shifts.  */
10487                     *cost += !CONST_INT_P (XEXP (x, 1));
10488                 }
10489             }
10490           else /* Rotates.  */
10491             {
10492               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10493               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10494               if (speed_p)
10495                 {
10496                   if (CONST_INT_P (XEXP (x, 1)))
10497                     *cost += (2 * extra_cost->alu.shift
10498                               + extra_cost->alu.log_shift);
10499                   else
10500                     *cost += (extra_cost->alu.shift
10501                               + extra_cost->alu.shift_reg
10502                               + extra_cost->alu.log_shift_reg);
10503                 }
10504             }
10505           return true;
10506         }
10507
10508       *cost = LIBCALL_COST (2);
10509       return false;
10510
10511     case BSWAP:
10512       if (arm_arch6)
10513         {
10514           if (mode == SImode)
10515             {
10516               if (speed_p)
10517                 *cost += extra_cost->alu.rev;
10518
10519               return false;
10520             }
10521         }
10522       else
10523         {
10524         /* No rev instruction available.  Look at arm_legacy_rev
10525            and thumb_legacy_rev for the form of RTL used then.  */
10526           if (TARGET_THUMB)
10527             {
10528               *cost += COSTS_N_INSNS (9);
10529
10530               if (speed_p)
10531                 {
10532                   *cost += 6 * extra_cost->alu.shift;
10533                   *cost += 3 * extra_cost->alu.logical;
10534                 }
10535             }
10536           else
10537             {
10538               *cost += COSTS_N_INSNS (4);
10539
10540               if (speed_p)
10541                 {
10542                   *cost += 2 * extra_cost->alu.shift;
10543                   *cost += extra_cost->alu.arith_shift;
10544                   *cost += 2 * extra_cost->alu.logical;
10545                 }
10546             }
10547           return true;
10548         }
10549       return false;
10550
10551     case MINUS:
10552       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10553           && (mode == SFmode || !TARGET_VFP_SINGLE))
10554         {
10555           if (GET_CODE (XEXP (x, 0)) == MULT
10556               || GET_CODE (XEXP (x, 1)) == MULT)
10557             {
10558               rtx mul_op0, mul_op1, sub_op;
10559
10560               if (speed_p)
10561                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10562
10563               if (GET_CODE (XEXP (x, 0)) == MULT)
10564                 {
10565                   mul_op0 = XEXP (XEXP (x, 0), 0);
10566                   mul_op1 = XEXP (XEXP (x, 0), 1);
10567                   sub_op = XEXP (x, 1);
10568                 }
10569               else
10570                 {
10571                   mul_op0 = XEXP (XEXP (x, 1), 0);
10572                   mul_op1 = XEXP (XEXP (x, 1), 1);
10573                   sub_op = XEXP (x, 0);
10574                 }
10575
10576               /* The first operand of the multiply may be optionally
10577                  negated.  */
10578               if (GET_CODE (mul_op0) == NEG)
10579                 mul_op0 = XEXP (mul_op0, 0);
10580
10581               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10582                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10583                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10584
10585               return true;
10586             }
10587
10588           if (speed_p)
10589             *cost += extra_cost->fp[mode != SFmode].addsub;
10590           return false;
10591         }
10592
10593       if (mode == SImode)
10594         {
10595           rtx shift_by_reg = NULL;
10596           rtx shift_op;
10597           rtx non_shift_op;
10598           rtx op0 = XEXP (x, 0);
10599           rtx op1 = XEXP (x, 1);
10600
10601           /* Factor out any borrow operation.  There's more than one way
10602              of expressing this; try to recognize them all.  */
10603           if (GET_CODE (op0) == MINUS)
10604             {
10605               if (arm_borrow_operation (op1, SImode))
10606                 {
10607                   op1 = XEXP (op0, 1);
10608                   op0 = XEXP (op0, 0);
10609                 }
10610               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10611                 op0 = XEXP (op0, 0);
10612             }
10613           else if (GET_CODE (op1) == PLUS
10614                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10615             op1 = XEXP (op1, 0);
10616           else if (GET_CODE (op0) == NEG
10617                    && arm_borrow_operation (op1, SImode))
10618             {
10619               /* Negate with carry-in.  For Thumb2 this is done with
10620                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10621                  RSC instruction that exists in Arm mode.  */
10622               if (speed_p)
10623                 *cost += (TARGET_THUMB2
10624                           ? extra_cost->alu.arith_shift
10625                           : extra_cost->alu.arith);
10626               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10627               return true;
10628             }
10629           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10630              Note we do mean ~borrow here.  */
10631           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10632             {
10633               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10634               return true;
10635             }
10636
10637           shift_op = shifter_op_p (op0, &shift_by_reg);
10638           if (shift_op == NULL)
10639             {
10640               shift_op = shifter_op_p (op1, &shift_by_reg);
10641               non_shift_op = op0;
10642             }
10643           else
10644             non_shift_op = op1;
10645
10646           if (shift_op != NULL)
10647             {
10648               if (shift_by_reg != NULL)
10649                 {
10650                   if (speed_p)
10651                     *cost += extra_cost->alu.arith_shift_reg;
10652                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10653                 }
10654               else if (speed_p)
10655                 *cost += extra_cost->alu.arith_shift;
10656
10657               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10658               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10659               return true;
10660             }
10661
10662           if (arm_arch_thumb2
10663               && GET_CODE (XEXP (x, 1)) == MULT)
10664             {
10665               /* MLS.  */
10666               if (speed_p)
10667                 *cost += extra_cost->mult[0].add;
10668               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10669               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10670               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10671               return true;
10672             }
10673
10674           if (CONST_INT_P (op0))
10675             {
10676               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10677                                             INTVAL (op0), NULL_RTX,
10678                                             NULL_RTX, 1, 0);
10679               *cost = COSTS_N_INSNS (insns);
10680               if (speed_p)
10681                 *cost += insns * extra_cost->alu.arith;
10682               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10683               return true;
10684             }
10685           else if (speed_p)
10686             *cost += extra_cost->alu.arith;
10687
10688           /* Don't recurse as we don't want to cost any borrow that
10689              we've stripped.  */
10690           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10691           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10692           return true;
10693         }
10694
10695       if (GET_MODE_CLASS (mode) == MODE_INT
10696           && GET_MODE_SIZE (mode) < 4)
10697         {
10698           rtx shift_op, shift_reg;
10699           shift_reg = NULL;
10700
10701           /* We check both sides of the MINUS for shifter operands since,
10702              unlike PLUS, it's not commutative.  */
10703
10704           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10705           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10706
10707           /* Slightly disparage, as we might need to widen the result.  */
10708           *cost += 1;
10709           if (speed_p)
10710             *cost += extra_cost->alu.arith;
10711
10712           if (CONST_INT_P (XEXP (x, 0)))
10713             {
10714               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10715               return true;
10716             }
10717
10718           return false;
10719         }
10720
10721       if (mode == DImode)
10722         {
10723           *cost += COSTS_N_INSNS (1);
10724
10725           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10726             {
10727               rtx op1 = XEXP (x, 1);
10728
10729               if (speed_p)
10730                 *cost += 2 * extra_cost->alu.arith;
10731
10732               if (GET_CODE (op1) == ZERO_EXTEND)
10733                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10734                                    0, speed_p);
10735               else
10736                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10737               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10738                                  0, speed_p);
10739               return true;
10740             }
10741           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10742             {
10743               if (speed_p)
10744                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10745               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10746                                   0, speed_p)
10747                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10748               return true;
10749             }
10750           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10751                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10752             {
10753               if (speed_p)
10754                 *cost += (extra_cost->alu.arith
10755                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10756                              ? extra_cost->alu.arith
10757                              : extra_cost->alu.arith_shift));
10758               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10759                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10760                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10761               return true;
10762             }
10763
10764           if (speed_p)
10765             *cost += 2 * extra_cost->alu.arith;
10766           return false;
10767         }
10768
10769       /* Vector mode?  */
10770
10771       *cost = LIBCALL_COST (2);
10772       return false;
10773
10774     case PLUS:
10775       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10776           && (mode == SFmode || !TARGET_VFP_SINGLE))
10777         {
10778           if (GET_CODE (XEXP (x, 0)) == MULT)
10779             {
10780               rtx mul_op0, mul_op1, add_op;
10781
10782               if (speed_p)
10783                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10784
10785               mul_op0 = XEXP (XEXP (x, 0), 0);
10786               mul_op1 = XEXP (XEXP (x, 0), 1);
10787               add_op = XEXP (x, 1);
10788
10789               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10790                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10791                         + rtx_cost (add_op, mode, code, 0, speed_p));
10792
10793               return true;
10794             }
10795
10796           if (speed_p)
10797             *cost += extra_cost->fp[mode != SFmode].addsub;
10798           return false;
10799         }
10800       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10801         {
10802           *cost = LIBCALL_COST (2);
10803           return false;
10804         }
10805
10806         /* Narrow modes can be synthesized in SImode, but the range
10807            of useful sub-operations is limited.  Check for shift operations
10808            on one of the operands.  Only left shifts can be used in the
10809            narrow modes.  */
10810       if (GET_MODE_CLASS (mode) == MODE_INT
10811           && GET_MODE_SIZE (mode) < 4)
10812         {
10813           rtx shift_op, shift_reg;
10814           shift_reg = NULL;
10815
10816           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10817
10818           if (CONST_INT_P (XEXP (x, 1)))
10819             {
10820               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10821                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10822                                             NULL_RTX, 1, 0);
10823               *cost = COSTS_N_INSNS (insns);
10824               if (speed_p)
10825                 *cost += insns * extra_cost->alu.arith;
10826               /* Slightly penalize a narrow operation as the result may
10827                  need widening.  */
10828               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10829               return true;
10830             }
10831
10832           /* Slightly penalize a narrow operation as the result may
10833              need widening.  */
10834           *cost += 1;
10835           if (speed_p)
10836             *cost += extra_cost->alu.arith;
10837
10838           return false;
10839         }
10840
10841       if (mode == SImode)
10842         {
10843           rtx shift_op, shift_reg;
10844
10845           if (TARGET_INT_SIMD
10846               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10847                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10848             {
10849               /* UXTA[BH] or SXTA[BH].  */
10850               if (speed_p)
10851                 *cost += extra_cost->alu.extend_arith;
10852               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10853                                   0, speed_p)
10854                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10855               return true;
10856             }
10857
10858           rtx op0 = XEXP (x, 0);
10859           rtx op1 = XEXP (x, 1);
10860
10861           /* Handle a side effect of adding in the carry to an addition.  */
10862           if (GET_CODE (op0) == PLUS
10863               && arm_carry_operation (op1, mode))
10864             {
10865               op1 = XEXP (op0, 1);
10866               op0 = XEXP (op0, 0);
10867             }
10868           else if (GET_CODE (op1) == PLUS
10869                    && arm_carry_operation (op0, mode))
10870             {
10871               op0 = XEXP (op1, 0);
10872               op1 = XEXP (op1, 1);
10873             }
10874           else if (GET_CODE (op0) == PLUS)
10875             {
10876               op0 = strip_carry_operation (op0);
10877               if (swap_commutative_operands_p (op0, op1))
10878                 std::swap (op0, op1);
10879             }
10880
10881           if (arm_carry_operation (op0, mode))
10882             {
10883               /* Adding the carry to a register is a canonicalization of
10884                  adding 0 to the register plus the carry.  */
10885               if (speed_p)
10886                 *cost += extra_cost->alu.arith;
10887               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10888               return true;
10889             }
10890
10891           shift_reg = NULL;
10892           shift_op = shifter_op_p (op0, &shift_reg);
10893           if (shift_op != NULL)
10894             {
10895               if (shift_reg)
10896                 {
10897                   if (speed_p)
10898                     *cost += extra_cost->alu.arith_shift_reg;
10899                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10900                 }
10901               else if (speed_p)
10902                 *cost += extra_cost->alu.arith_shift;
10903
10904               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10905                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10906               return true;
10907             }
10908
10909           if (GET_CODE (op0) == MULT)
10910             {
10911               rtx mul_op = op0;
10912
10913               if (TARGET_DSP_MULTIPLY
10914                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10915                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10916                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10917                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10918                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10919                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10920                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10921                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10922                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10923                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10924                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10925                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10926                                       == 16))))))
10927                 {
10928                   /* SMLA[BT][BT].  */
10929                   if (speed_p)
10930                     *cost += extra_cost->mult[0].extend_add;
10931                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10932                                       SIGN_EXTEND, 0, speed_p)
10933                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10934                                         SIGN_EXTEND, 0, speed_p)
10935                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10936                   return true;
10937                 }
10938
10939               if (speed_p)
10940                 *cost += extra_cost->mult[0].add;
10941               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10942                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10943                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10944               return true;
10945             }
10946
10947           if (CONST_INT_P (op1))
10948             {
10949               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10950                                             INTVAL (op1), NULL_RTX,
10951                                             NULL_RTX, 1, 0);
10952               *cost = COSTS_N_INSNS (insns);
10953               if (speed_p)
10954                 *cost += insns * extra_cost->alu.arith;
10955               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10956               return true;
10957             }
10958
10959           if (speed_p)
10960             *cost += extra_cost->alu.arith;
10961
10962           /* Don't recurse here because we want to test the operands
10963              without any carry operation.  */
10964           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10965           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10966           return true;
10967         }
10968
10969       if (mode == DImode)
10970         {
10971           if (GET_CODE (XEXP (x, 0)) == MULT
10972               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10973                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10974                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10975                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10976             {
10977               if (speed_p)
10978                 *cost += extra_cost->mult[1].extend_add;
10979               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10980                                   ZERO_EXTEND, 0, speed_p)
10981                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10982                                     ZERO_EXTEND, 0, speed_p)
10983                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10984               return true;
10985             }
10986
10987           *cost += COSTS_N_INSNS (1);
10988
10989           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10990               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10991             {
10992               if (speed_p)
10993                 *cost += (extra_cost->alu.arith
10994                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10995                              ? extra_cost->alu.arith
10996                              : extra_cost->alu.arith_shift));
10997
10998               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10999                                   0, speed_p)
11000                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11001               return true;
11002             }
11003
11004           if (speed_p)
11005             *cost += 2 * extra_cost->alu.arith;
11006           return false;
11007         }
11008
11009       /* Vector mode?  */
11010       *cost = LIBCALL_COST (2);
11011       return false;
11012     case IOR:
11013       {
11014         rtx sub0, sub1;
11015         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11016           {
11017             if (speed_p)
11018               *cost += extra_cost->alu.rev;
11019
11020             return true;
11021           }
11022         else if (mode == SImode && arm_arch_thumb2
11023                  && arm_bfi_p (x, &sub0, &sub1))
11024           {
11025             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11026             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11027             if (speed_p)
11028               *cost += extra_cost->alu.bfi;
11029
11030             return true;
11031           }
11032       }
11033
11034       /* Fall through.  */
11035     case AND: case XOR:
11036       if (mode == SImode)
11037         {
11038           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11039           rtx op0 = XEXP (x, 0);
11040           rtx shift_op, shift_reg;
11041
11042           if (subcode == NOT
11043               && (code == AND
11044                   || (code == IOR && TARGET_THUMB2)))
11045             op0 = XEXP (op0, 0);
11046
11047           shift_reg = NULL;
11048           shift_op = shifter_op_p (op0, &shift_reg);
11049           if (shift_op != NULL)
11050             {
11051               if (shift_reg)
11052                 {
11053                   if (speed_p)
11054                     *cost += extra_cost->alu.log_shift_reg;
11055                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11056                 }
11057               else if (speed_p)
11058                 *cost += extra_cost->alu.log_shift;
11059
11060               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11061                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11062               return true;
11063             }
11064
11065           if (CONST_INT_P (XEXP (x, 1)))
11066             {
11067               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11068                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11069                                             NULL_RTX, 1, 0);
11070
11071               *cost = COSTS_N_INSNS (insns);
11072               if (speed_p)
11073                 *cost += insns * extra_cost->alu.logical;
11074               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11075               return true;
11076             }
11077
11078           if (speed_p)
11079             *cost += extra_cost->alu.logical;
11080           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11081                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11082           return true;
11083         }
11084
11085       if (mode == DImode)
11086         {
11087           rtx op0 = XEXP (x, 0);
11088           enum rtx_code subcode = GET_CODE (op0);
11089
11090           *cost += COSTS_N_INSNS (1);
11091
11092           if (subcode == NOT
11093               && (code == AND
11094                   || (code == IOR && TARGET_THUMB2)))
11095             op0 = XEXP (op0, 0);
11096
11097           if (GET_CODE (op0) == ZERO_EXTEND)
11098             {
11099               if (speed_p)
11100                 *cost += 2 * extra_cost->alu.logical;
11101
11102               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11103                                   0, speed_p)
11104                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11105               return true;
11106             }
11107           else if (GET_CODE (op0) == SIGN_EXTEND)
11108             {
11109               if (speed_p)
11110                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11111
11112               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11113                                   0, speed_p)
11114                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11115               return true;
11116             }
11117
11118           if (speed_p)
11119             *cost += 2 * extra_cost->alu.logical;
11120
11121           return true;
11122         }
11123       /* Vector mode?  */
11124
11125       *cost = LIBCALL_COST (2);
11126       return false;
11127
11128     case MULT:
11129       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11130           && (mode == SFmode || !TARGET_VFP_SINGLE))
11131         {
11132           rtx op0 = XEXP (x, 0);
11133
11134           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11135             op0 = XEXP (op0, 0);
11136
11137           if (speed_p)
11138             *cost += extra_cost->fp[mode != SFmode].mult;
11139
11140           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11141                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11142           return true;
11143         }
11144       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11145         {
11146           *cost = LIBCALL_COST (2);
11147           return false;
11148         }
11149
11150       if (mode == SImode)
11151         {
11152           if (TARGET_DSP_MULTIPLY
11153               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11154                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11155                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11156                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11157                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11158                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11159                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11160                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11161                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11162                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11163                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11164                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11165                                   == 16))))))
11166             {
11167               /* SMUL[TB][TB].  */
11168               if (speed_p)
11169                 *cost += extra_cost->mult[0].extend;
11170               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11171                                  SIGN_EXTEND, 0, speed_p);
11172               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11173                                  SIGN_EXTEND, 1, speed_p);
11174               return true;
11175             }
11176           if (speed_p)
11177             *cost += extra_cost->mult[0].simple;
11178           return false;
11179         }
11180
11181       if (mode == DImode)
11182         {
11183           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11184                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11185                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11186                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11187             {
11188               if (speed_p)
11189                 *cost += extra_cost->mult[1].extend;
11190               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11191                                   ZERO_EXTEND, 0, speed_p)
11192                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11193                                     ZERO_EXTEND, 0, speed_p));
11194               return true;
11195             }
11196
11197           *cost = LIBCALL_COST (2);
11198           return false;
11199         }
11200
11201       /* Vector mode?  */
11202       *cost = LIBCALL_COST (2);
11203       return false;
11204
11205     case NEG:
11206       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11207           && (mode == SFmode || !TARGET_VFP_SINGLE))
11208         {
11209           if (GET_CODE (XEXP (x, 0)) == MULT)
11210             {
11211               /* VNMUL.  */
11212               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11213               return true;
11214             }
11215
11216           if (speed_p)
11217             *cost += extra_cost->fp[mode != SFmode].neg;
11218
11219           return false;
11220         }
11221       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11222         {
11223           *cost = LIBCALL_COST (1);
11224           return false;
11225         }
11226
11227       if (mode == SImode)
11228         {
11229           if (GET_CODE (XEXP (x, 0)) == ABS)
11230             {
11231               *cost += COSTS_N_INSNS (1);
11232               /* Assume the non-flag-changing variant.  */
11233               if (speed_p)
11234                 *cost += (extra_cost->alu.log_shift
11235                           + extra_cost->alu.arith_shift);
11236               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11237               return true;
11238             }
11239
11240           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11241               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11242             {
11243               *cost += COSTS_N_INSNS (1);
11244               /* No extra cost for MOV imm and MVN imm.  */
11245               /* If the comparison op is using the flags, there's no further
11246                  cost, otherwise we need to add the cost of the comparison.  */
11247               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11248                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11249                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11250                 {
11251                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11252                   *cost += (COSTS_N_INSNS (1)
11253                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11254                                         0, speed_p)
11255                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11256                                         1, speed_p));
11257                   if (speed_p)
11258                     *cost += extra_cost->alu.arith;
11259                 }
11260               return true;
11261             }
11262
11263           if (speed_p)
11264             *cost += extra_cost->alu.arith;
11265           return false;
11266         }
11267
11268       if (GET_MODE_CLASS (mode) == MODE_INT
11269           && GET_MODE_SIZE (mode) < 4)
11270         {
11271           /* Slightly disparage, as we might need an extend operation.  */
11272           *cost += 1;
11273           if (speed_p)
11274             *cost += extra_cost->alu.arith;
11275           return false;
11276         }
11277
11278       if (mode == DImode)
11279         {
11280           *cost += COSTS_N_INSNS (1);
11281           if (speed_p)
11282             *cost += 2 * extra_cost->alu.arith;
11283           return false;
11284         }
11285
11286       /* Vector mode?  */
11287       *cost = LIBCALL_COST (1);
11288       return false;
11289
11290     case NOT:
11291       if (mode == SImode)
11292         {
11293           rtx shift_op;
11294           rtx shift_reg = NULL;
11295
11296           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11297
11298           if (shift_op)
11299             {
11300               if (shift_reg != NULL)
11301                 {
11302                   if (speed_p)
11303                     *cost += extra_cost->alu.log_shift_reg;
11304                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11305                 }
11306               else if (speed_p)
11307                 *cost += extra_cost->alu.log_shift;
11308               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11309               return true;
11310             }
11311
11312           if (speed_p)
11313             *cost += extra_cost->alu.logical;
11314           return false;
11315         }
11316       if (mode == DImode)
11317         {
11318           *cost += COSTS_N_INSNS (1);
11319           return false;
11320         }
11321
11322       /* Vector mode?  */
11323
11324       *cost += LIBCALL_COST (1);
11325       return false;
11326
11327     case IF_THEN_ELSE:
11328       {
11329         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11330           {
11331             *cost += COSTS_N_INSNS (3);
11332             return true;
11333           }
11334         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11335         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11336
11337         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11338         /* Assume that if one arm of the if_then_else is a register,
11339            that it will be tied with the result and eliminate the
11340            conditional insn.  */
11341         if (REG_P (XEXP (x, 1)))
11342           *cost += op2cost;
11343         else if (REG_P (XEXP (x, 2)))
11344           *cost += op1cost;
11345         else
11346           {
11347             if (speed_p)
11348               {
11349                 if (extra_cost->alu.non_exec_costs_exec)
11350                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11351                 else
11352                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11353               }
11354             else
11355               *cost += op1cost + op2cost;
11356           }
11357       }
11358       return true;
11359
11360     case COMPARE:
11361       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11362         *cost = 0;
11363       else
11364         {
11365           machine_mode op0mode;
11366           /* We'll mostly assume that the cost of a compare is the cost of the
11367              LHS.  However, there are some notable exceptions.  */
11368
11369           /* Floating point compares are never done as side-effects.  */
11370           op0mode = GET_MODE (XEXP (x, 0));
11371           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11372               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11373             {
11374               if (speed_p)
11375                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11376
11377               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11378                 {
11379                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11380                   return true;
11381                 }
11382
11383               return false;
11384             }
11385           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11386             {
11387               *cost = LIBCALL_COST (2);
11388               return false;
11389             }
11390
11391           /* DImode compares normally take two insns.  */
11392           if (op0mode == DImode)
11393             {
11394               *cost += COSTS_N_INSNS (1);
11395               if (speed_p)
11396                 *cost += 2 * extra_cost->alu.arith;
11397               return false;
11398             }
11399
11400           if (op0mode == SImode)
11401             {
11402               rtx shift_op;
11403               rtx shift_reg;
11404
11405               if (XEXP (x, 1) == const0_rtx
11406                   && !(REG_P (XEXP (x, 0))
11407                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11408                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11409                 {
11410                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11411
11412                   /* Multiply operations that set the flags are often
11413                      significantly more expensive.  */
11414                   if (speed_p
11415                       && GET_CODE (XEXP (x, 0)) == MULT
11416                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11417                     *cost += extra_cost->mult[0].flag_setting;
11418
11419                   if (speed_p
11420                       && GET_CODE (XEXP (x, 0)) == PLUS
11421                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11422                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11423                                                             0), 1), mode))
11424                     *cost += extra_cost->mult[0].flag_setting;
11425                   return true;
11426                 }
11427
11428               shift_reg = NULL;
11429               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11430               if (shift_op != NULL)
11431                 {
11432                   if (shift_reg != NULL)
11433                     {
11434                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11435                                          1, speed_p);
11436                       if (speed_p)
11437                         *cost += extra_cost->alu.arith_shift_reg;
11438                     }
11439                   else if (speed_p)
11440                     *cost += extra_cost->alu.arith_shift;
11441                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11442                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11443                   return true;
11444                 }
11445
11446               if (speed_p)
11447                 *cost += extra_cost->alu.arith;
11448               if (CONST_INT_P (XEXP (x, 1))
11449                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11450                 {
11451                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11452                   return true;
11453                 }
11454               return false;
11455             }
11456
11457           /* Vector mode?  */
11458
11459           *cost = LIBCALL_COST (2);
11460           return false;
11461         }
11462       return true;
11463
11464     case EQ:
11465     case GE:
11466     case GT:
11467     case LE:
11468     case LT:
11469       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11470          vcle and vclt). */
11471       if (TARGET_NEON
11472           && TARGET_HARD_FLOAT
11473           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11474           && (XEXP (x, 1) == CONST0_RTX (mode)))
11475         {
11476           *cost = 0;
11477           return true;
11478         }
11479
11480       /* Fall through.  */
11481     case NE:
11482     case LTU:
11483     case LEU:
11484     case GEU:
11485     case GTU:
11486     case ORDERED:
11487     case UNORDERED:
11488     case UNEQ:
11489     case UNLE:
11490     case UNLT:
11491     case UNGE:
11492     case UNGT:
11493     case LTGT:
11494       if (outer_code == SET)
11495         {
11496           /* Is it a store-flag operation?  */
11497           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11498               && XEXP (x, 1) == const0_rtx)
11499             {
11500               /* Thumb also needs an IT insn.  */
11501               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11502               return true;
11503             }
11504           if (XEXP (x, 1) == const0_rtx)
11505             {
11506               switch (code)
11507                 {
11508                 case LT:
11509                   /* LSR Rd, Rn, #31.  */
11510                   if (speed_p)
11511                     *cost += extra_cost->alu.shift;
11512                   break;
11513
11514                 case EQ:
11515                   /* RSBS T1, Rn, #0
11516                      ADC  Rd, Rn, T1.  */
11517
11518                 case NE:
11519                   /* SUBS T1, Rn, #1
11520                      SBC  Rd, Rn, T1.  */
11521                   *cost += COSTS_N_INSNS (1);
11522                   break;
11523
11524                 case LE:
11525                   /* RSBS T1, Rn, Rn, LSR #31
11526                      ADC  Rd, Rn, T1. */
11527                   *cost += COSTS_N_INSNS (1);
11528                   if (speed_p)
11529                     *cost += extra_cost->alu.arith_shift;
11530                   break;
11531
11532                 case GT:
11533                   /* RSB  Rd, Rn, Rn, ASR #1
11534                      LSR  Rd, Rd, #31.  */
11535                   *cost += COSTS_N_INSNS (1);
11536                   if (speed_p)
11537                     *cost += (extra_cost->alu.arith_shift
11538                               + extra_cost->alu.shift);
11539                   break;
11540
11541                 case GE:
11542                   /* ASR  Rd, Rn, #31
11543                      ADD  Rd, Rn, #1.  */
11544                   *cost += COSTS_N_INSNS (1);
11545                   if (speed_p)
11546                     *cost += extra_cost->alu.shift;
11547                   break;
11548
11549                 default:
11550                   /* Remaining cases are either meaningless or would take
11551                      three insns anyway.  */
11552                   *cost = COSTS_N_INSNS (3);
11553                   break;
11554                 }
11555               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11556               return true;
11557             }
11558           else
11559             {
11560               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11561               if (CONST_INT_P (XEXP (x, 1))
11562                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11563                 {
11564                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11565                   return true;
11566                 }
11567
11568               return false;
11569             }
11570         }
11571       /* Not directly inside a set.  If it involves the condition code
11572          register it must be the condition for a branch, cond_exec or
11573          I_T_E operation.  Since the comparison is performed elsewhere
11574          this is just the control part which has no additional
11575          cost.  */
11576       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11577                && XEXP (x, 1) == const0_rtx)
11578         {
11579           *cost = 0;
11580           return true;
11581         }
11582       return false;
11583
11584     case ABS:
11585       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11586           && (mode == SFmode || !TARGET_VFP_SINGLE))
11587         {
11588           if (speed_p)
11589             *cost += extra_cost->fp[mode != SFmode].neg;
11590
11591           return false;
11592         }
11593       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11594         {
11595           *cost = LIBCALL_COST (1);
11596           return false;
11597         }
11598
11599       if (mode == SImode)
11600         {
11601           if (speed_p)
11602             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11603           return false;
11604         }
11605       /* Vector mode?  */
11606       *cost = LIBCALL_COST (1);
11607       return false;
11608
11609     case SIGN_EXTEND:
11610       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11611           && MEM_P (XEXP (x, 0)))
11612         {
11613           if (mode == DImode)
11614             *cost += COSTS_N_INSNS (1);
11615
11616           if (!speed_p)
11617             return true;
11618
11619           if (GET_MODE (XEXP (x, 0)) == SImode)
11620             *cost += extra_cost->ldst.load;
11621           else
11622             *cost += extra_cost->ldst.load_sign_extend;
11623
11624           if (mode == DImode)
11625             *cost += extra_cost->alu.shift;
11626
11627           return true;
11628         }
11629
11630       /* Widening from less than 32-bits requires an extend operation.  */
11631       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11632         {
11633           /* We have SXTB/SXTH.  */
11634           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11635           if (speed_p)
11636             *cost += extra_cost->alu.extend;
11637         }
11638       else if (GET_MODE (XEXP (x, 0)) != SImode)
11639         {
11640           /* Needs two shifts.  */
11641           *cost += COSTS_N_INSNS (1);
11642           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11643           if (speed_p)
11644             *cost += 2 * extra_cost->alu.shift;
11645         }
11646
11647       /* Widening beyond 32-bits requires one more insn.  */
11648       if (mode == DImode)
11649         {
11650           *cost += COSTS_N_INSNS (1);
11651           if (speed_p)
11652             *cost += extra_cost->alu.shift;
11653         }
11654
11655       return true;
11656
11657     case ZERO_EXTEND:
11658       if ((arm_arch4
11659            || GET_MODE (XEXP (x, 0)) == SImode
11660            || GET_MODE (XEXP (x, 0)) == QImode)
11661           && MEM_P (XEXP (x, 0)))
11662         {
11663           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11664
11665           if (mode == DImode)
11666             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11667
11668           return true;
11669         }
11670
11671       /* Widening from less than 32-bits requires an extend operation.  */
11672       if (GET_MODE (XEXP (x, 0)) == QImode)
11673         {
11674           /* UXTB can be a shorter instruction in Thumb2, but it might
11675              be slower than the AND Rd, Rn, #255 alternative.  When
11676              optimizing for speed it should never be slower to use
11677              AND, and we don't really model 16-bit vs 32-bit insns
11678              here.  */
11679           if (speed_p)
11680             *cost += extra_cost->alu.logical;
11681         }
11682       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11683         {
11684           /* We have UXTB/UXTH.  */
11685           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11686           if (speed_p)
11687             *cost += extra_cost->alu.extend;
11688         }
11689       else if (GET_MODE (XEXP (x, 0)) != SImode)
11690         {
11691           /* Needs two shifts.  It's marginally preferable to use
11692              shifts rather than two BIC instructions as the second
11693              shift may merge with a subsequent insn as a shifter
11694              op.  */
11695           *cost = COSTS_N_INSNS (2);
11696           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11697           if (speed_p)
11698             *cost += 2 * extra_cost->alu.shift;
11699         }
11700
11701       /* Widening beyond 32-bits requires one more insn.  */
11702       if (mode == DImode)
11703         {
11704           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11705         }
11706
11707       return true;
11708
11709     case CONST_INT:
11710       *cost = 0;
11711       /* CONST_INT has no mode, so we cannot tell for sure how many
11712          insns are really going to be needed.  The best we can do is
11713          look at the value passed.  If it fits in SImode, then assume
11714          that's the mode it will be used for.  Otherwise assume it
11715          will be used in DImode.  */
11716       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11717         mode = SImode;
11718       else
11719         mode = DImode;
11720
11721       /* Avoid blowing up in arm_gen_constant ().  */
11722       if (!(outer_code == PLUS
11723             || outer_code == AND
11724             || outer_code == IOR
11725             || outer_code == XOR
11726             || outer_code == MINUS))
11727         outer_code = SET;
11728
11729     const_int_cost:
11730       if (mode == SImode)
11731         {
11732           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11733                                                     INTVAL (x), NULL, NULL,
11734                                                     0, 0));
11735           /* Extra costs?  */
11736         }
11737       else
11738         {
11739           *cost += COSTS_N_INSNS (arm_gen_constant
11740                                   (outer_code, SImode, NULL,
11741                                    trunc_int_for_mode (INTVAL (x), SImode),
11742                                    NULL, NULL, 0, 0)
11743                                   + arm_gen_constant (outer_code, SImode, NULL,
11744                                                       INTVAL (x) >> 32, NULL,
11745                                                       NULL, 0, 0));
11746           /* Extra costs?  */
11747         }
11748
11749       return true;
11750
11751     case CONST:
11752     case LABEL_REF:
11753     case SYMBOL_REF:
11754       if (speed_p)
11755         {
11756           if (arm_arch_thumb2 && !flag_pic)
11757             *cost += COSTS_N_INSNS (1);
11758           else
11759             *cost += extra_cost->ldst.load;
11760         }
11761       else
11762         *cost += COSTS_N_INSNS (1);
11763
11764       if (flag_pic)
11765         {
11766           *cost += COSTS_N_INSNS (1);
11767           if (speed_p)
11768             *cost += extra_cost->alu.arith;
11769         }
11770
11771       return true;
11772
11773     case CONST_FIXED:
11774       *cost = COSTS_N_INSNS (4);
11775       /* Fixme.  */
11776       return true;
11777
11778     case CONST_DOUBLE:
11779       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11780           && (mode == SFmode || !TARGET_VFP_SINGLE))
11781         {
11782           if (vfp3_const_double_rtx (x))
11783             {
11784               if (speed_p)
11785                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11786               return true;
11787             }
11788
11789           if (speed_p)
11790             {
11791               if (mode == DFmode)
11792                 *cost += extra_cost->ldst.loadd;
11793               else
11794                 *cost += extra_cost->ldst.loadf;
11795             }
11796           else
11797             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11798
11799           return true;
11800         }
11801       *cost = COSTS_N_INSNS (4);
11802       return true;
11803
11804     case CONST_VECTOR:
11805       /* Fixme.  */
11806       if (((TARGET_NEON && TARGET_HARD_FLOAT
11807             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11808            || TARGET_HAVE_MVE)
11809           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11810         *cost = COSTS_N_INSNS (1);
11811       else
11812         *cost = COSTS_N_INSNS (4);
11813       return true;
11814
11815     case HIGH:
11816     case LO_SUM:
11817       /* When optimizing for size, we prefer constant pool entries to
11818          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11819       if (!speed_p)
11820         *cost += 1;
11821       return true;
11822
11823     case CLZ:
11824       if (speed_p)
11825         *cost += extra_cost->alu.clz;
11826       return false;
11827
11828     case SMIN:
11829       if (XEXP (x, 1) == const0_rtx)
11830         {
11831           if (speed_p)
11832             *cost += extra_cost->alu.log_shift;
11833           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11834           return true;
11835         }
11836       /* Fall through.  */
11837     case SMAX:
11838     case UMIN:
11839     case UMAX:
11840       *cost += COSTS_N_INSNS (1);
11841       return false;
11842
11843     case TRUNCATE:
11844       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11845           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11846           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11847           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11848           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11849                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11850               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11851                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11852                       == ZERO_EXTEND))))
11853         {
11854           if (speed_p)
11855             *cost += extra_cost->mult[1].extend;
11856           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11857                               ZERO_EXTEND, 0, speed_p)
11858                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11859                                 ZERO_EXTEND, 0, speed_p));
11860           return true;
11861         }
11862       *cost = LIBCALL_COST (1);
11863       return false;
11864
11865     case UNSPEC_VOLATILE:
11866     case UNSPEC:
11867       return arm_unspec_cost (x, outer_code, speed_p, cost);
11868
11869     case PC:
11870       /* Reading the PC is like reading any other register.  Writing it
11871          is more expensive, but we take that into account elsewhere.  */
11872       *cost = 0;
11873       return true;
11874
11875     case ZERO_EXTRACT:
11876       /* TODO: Simple zero_extract of bottom bits using AND.  */
11877       /* Fall through.  */
11878     case SIGN_EXTRACT:
11879       if (arm_arch6
11880           && mode == SImode
11881           && CONST_INT_P (XEXP (x, 1))
11882           && CONST_INT_P (XEXP (x, 2)))
11883         {
11884           if (speed_p)
11885             *cost += extra_cost->alu.bfx;
11886           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11887           return true;
11888         }
11889       /* Without UBFX/SBFX, need to resort to shift operations.  */
11890       *cost += COSTS_N_INSNS (1);
11891       if (speed_p)
11892         *cost += 2 * extra_cost->alu.shift;
11893       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11894       return true;
11895
11896     case FLOAT_EXTEND:
11897       if (TARGET_HARD_FLOAT)
11898         {
11899           if (speed_p)
11900             *cost += extra_cost->fp[mode == DFmode].widen;
11901           if (!TARGET_VFP5
11902               && GET_MODE (XEXP (x, 0)) == HFmode)
11903             {
11904               /* Pre v8, widening HF->DF is a two-step process, first
11905                  widening to SFmode.  */
11906               *cost += COSTS_N_INSNS (1);
11907               if (speed_p)
11908                 *cost += extra_cost->fp[0].widen;
11909             }
11910           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11911           return true;
11912         }
11913
11914       *cost = LIBCALL_COST (1);
11915       return false;
11916
11917     case FLOAT_TRUNCATE:
11918       if (TARGET_HARD_FLOAT)
11919         {
11920           if (speed_p)
11921             *cost += extra_cost->fp[mode == DFmode].narrow;
11922           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11923           return true;
11924           /* Vector modes?  */
11925         }
11926       *cost = LIBCALL_COST (1);
11927       return false;
11928
11929     case FMA:
11930       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11931         {
11932           rtx op0 = XEXP (x, 0);
11933           rtx op1 = XEXP (x, 1);
11934           rtx op2 = XEXP (x, 2);
11935
11936
11937           /* vfms or vfnma.  */
11938           if (GET_CODE (op0) == NEG)
11939             op0 = XEXP (op0, 0);
11940
11941           /* vfnms or vfnma.  */
11942           if (GET_CODE (op2) == NEG)
11943             op2 = XEXP (op2, 0);
11944
11945           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11946           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11947           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11948
11949           if (speed_p)
11950             *cost += extra_cost->fp[mode ==DFmode].fma;
11951
11952           return true;
11953         }
11954
11955       *cost = LIBCALL_COST (3);
11956       return false;
11957
11958     case FIX:
11959     case UNSIGNED_FIX:
11960       if (TARGET_HARD_FLOAT)
11961         {
11962           /* The *combine_vcvtf2i reduces a vmul+vcvt into
11963              a vcvt fixed-point conversion.  */
11964           if (code == FIX && mode == SImode
11965               && GET_CODE (XEXP (x, 0)) == FIX
11966               && GET_MODE (XEXP (x, 0)) == SFmode
11967               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11968               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11969                  > 0)
11970             {
11971               if (speed_p)
11972                 *cost += extra_cost->fp[0].toint;
11973
11974               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11975                                  code, 0, speed_p);
11976               return true;
11977             }
11978
11979           if (GET_MODE_CLASS (mode) == MODE_INT)
11980             {
11981               mode = GET_MODE (XEXP (x, 0));
11982               if (speed_p)
11983                 *cost += extra_cost->fp[mode == DFmode].toint;
11984               /* Strip of the 'cost' of rounding towards zero.  */
11985               if (GET_CODE (XEXP (x, 0)) == FIX)
11986                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11987                                    0, speed_p);
11988               else
11989                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11990               /* ??? Increase the cost to deal with transferring from
11991                  FP -> CORE registers?  */
11992               return true;
11993             }
11994           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11995                    && TARGET_VFP5)
11996             {
11997               if (speed_p)
11998                 *cost += extra_cost->fp[mode == DFmode].roundint;
11999               return false;
12000             }
12001           /* Vector costs? */
12002         }
12003       *cost = LIBCALL_COST (1);
12004       return false;
12005
12006     case FLOAT:
12007     case UNSIGNED_FLOAT:
12008       if (TARGET_HARD_FLOAT)
12009         {
12010           /* ??? Increase the cost to deal with transferring from CORE
12011              -> FP registers?  */
12012           if (speed_p)
12013             *cost += extra_cost->fp[mode == DFmode].fromint;
12014           return false;
12015         }
12016       *cost = LIBCALL_COST (1);
12017       return false;
12018
12019     case CALL:
12020       return true;
12021
12022     case ASM_OPERANDS:
12023       {
12024       /* Just a guess.  Guess number of instructions in the asm
12025          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12026          though (see PR60663).  */
12027         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12028         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12029
12030         *cost = COSTS_N_INSNS (asm_length + num_operands);
12031         return true;
12032       }
12033     default:
12034       if (mode != VOIDmode)
12035         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12036       else
12037         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12038       return false;
12039     }
12040 }
12041
12042 #undef HANDLE_NARROW_SHIFT_ARITH
12043
12044 /* RTX costs entry point.  */
12045
12046 static bool
12047 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12048                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12049 {
12050   bool result;
12051   int code = GET_CODE (x);
12052   gcc_assert (current_tune->insn_extra_cost);
12053
12054   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12055                                 (enum rtx_code) outer_code,
12056                                 current_tune->insn_extra_cost,
12057                                 total, speed);
12058
12059   if (dump_file && arm_verbose_cost)
12060     {
12061       print_rtl_single (dump_file, x);
12062       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12063                *total, result ? "final" : "partial");
12064     }
12065   return result;
12066 }
12067
12068 static int
12069 arm_insn_cost (rtx_insn *insn, bool speed)
12070 {
12071   int cost;
12072
12073   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12074      will likely disappear during register allocation.  */
12075   if (!reload_completed
12076       && GET_CODE (PATTERN (insn)) == SET
12077       && REG_P (SET_DEST (PATTERN (insn)))
12078       && REG_P (SET_SRC (PATTERN (insn))))
12079     return 2;
12080   cost = pattern_cost (PATTERN (insn), speed);
12081   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12082      cost of these to be less than something we know about.  */
12083   return cost ? cost : COSTS_N_INSNS (2);
12084 }
12085
12086 /* All address computations that can be done are free, but rtx cost returns
12087    the same for practically all of them.  So we weight the different types
12088    of address here in the order (most pref first):
12089    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12090 static inline int
12091 arm_arm_address_cost (rtx x)
12092 {
12093   enum rtx_code c  = GET_CODE (x);
12094
12095   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12096     return 0;
12097   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12098     return 10;
12099
12100   if (c == PLUS)
12101     {
12102       if (CONST_INT_P (XEXP (x, 1)))
12103         return 2;
12104
12105       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12106         return 3;
12107
12108       return 4;
12109     }
12110
12111   return 6;
12112 }
12113
12114 static inline int
12115 arm_thumb_address_cost (rtx x)
12116 {
12117   enum rtx_code c  = GET_CODE (x);
12118
12119   if (c == REG)
12120     return 1;
12121   if (c == PLUS
12122       && REG_P (XEXP (x, 0))
12123       && CONST_INT_P (XEXP (x, 1)))
12124     return 1;
12125
12126   return 2;
12127 }
12128
12129 static int
12130 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12131                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12132 {
12133   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12134 }
12135
12136 /* Adjust cost hook for XScale.  */
12137 static bool
12138 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12139                           int * cost)
12140 {
12141   /* Some true dependencies can have a higher cost depending
12142      on precisely how certain input operands are used.  */
12143   if (dep_type == 0
12144       && recog_memoized (insn) >= 0
12145       && recog_memoized (dep) >= 0)
12146     {
12147       int shift_opnum = get_attr_shift (insn);
12148       enum attr_type attr_type = get_attr_type (dep);
12149
12150       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12151          operand for INSN.  If we have a shifted input operand and the
12152          instruction we depend on is another ALU instruction, then we may
12153          have to account for an additional stall.  */
12154       if (shift_opnum != 0
12155           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12156               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12157               || attr_type == TYPE_ALUS_SHIFT_IMM
12158               || attr_type == TYPE_LOGIC_SHIFT_IMM
12159               || attr_type == TYPE_LOGICS_SHIFT_IMM
12160               || attr_type == TYPE_ALU_SHIFT_REG
12161               || attr_type == TYPE_ALUS_SHIFT_REG
12162               || attr_type == TYPE_LOGIC_SHIFT_REG
12163               || attr_type == TYPE_LOGICS_SHIFT_REG
12164               || attr_type == TYPE_MOV_SHIFT
12165               || attr_type == TYPE_MVN_SHIFT
12166               || attr_type == TYPE_MOV_SHIFT_REG
12167               || attr_type == TYPE_MVN_SHIFT_REG))
12168         {
12169           rtx shifted_operand;
12170           int opno;
12171
12172           /* Get the shifted operand.  */
12173           extract_insn (insn);
12174           shifted_operand = recog_data.operand[shift_opnum];
12175
12176           /* Iterate over all the operands in DEP.  If we write an operand
12177              that overlaps with SHIFTED_OPERAND, then we have increase the
12178              cost of this dependency.  */
12179           extract_insn (dep);
12180           preprocess_constraints (dep);
12181           for (opno = 0; opno < recog_data.n_operands; opno++)
12182             {
12183               /* We can ignore strict inputs.  */
12184               if (recog_data.operand_type[opno] == OP_IN)
12185                 continue;
12186
12187               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12188                                            shifted_operand))
12189                 {
12190                   *cost = 2;
12191                   return false;
12192                 }
12193             }
12194         }
12195     }
12196   return true;
12197 }
12198
12199 /* Adjust cost hook for Cortex A9.  */
12200 static bool
12201 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12202                              int * cost)
12203 {
12204   switch (dep_type)
12205     {
12206     case REG_DEP_ANTI:
12207       *cost = 0;
12208       return false;
12209
12210     case REG_DEP_TRUE:
12211     case REG_DEP_OUTPUT:
12212         if (recog_memoized (insn) >= 0
12213             && recog_memoized (dep) >= 0)
12214           {
12215             if (GET_CODE (PATTERN (insn)) == SET)
12216               {
12217                 if (GET_MODE_CLASS
12218                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12219                   || GET_MODE_CLASS
12220                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12221                   {
12222                     enum attr_type attr_type_insn = get_attr_type (insn);
12223                     enum attr_type attr_type_dep = get_attr_type (dep);
12224
12225                     /* By default all dependencies of the form
12226                        s0 = s0 <op> s1
12227                        s0 = s0 <op> s2
12228                        have an extra latency of 1 cycle because
12229                        of the input and output dependency in this
12230                        case. However this gets modeled as an true
12231                        dependency and hence all these checks.  */
12232                     if (REG_P (SET_DEST (PATTERN (insn)))
12233                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12234                       {
12235                         /* FMACS is a special case where the dependent
12236                            instruction can be issued 3 cycles before
12237                            the normal latency in case of an output
12238                            dependency.  */
12239                         if ((attr_type_insn == TYPE_FMACS
12240                              || attr_type_insn == TYPE_FMACD)
12241                             && (attr_type_dep == TYPE_FMACS
12242                                 || attr_type_dep == TYPE_FMACD))
12243                           {
12244                             if (dep_type == REG_DEP_OUTPUT)
12245                               *cost = insn_default_latency (dep) - 3;
12246                             else
12247                               *cost = insn_default_latency (dep);
12248                             return false;
12249                           }
12250                         else
12251                           {
12252                             if (dep_type == REG_DEP_OUTPUT)
12253                               *cost = insn_default_latency (dep) + 1;
12254                             else
12255                               *cost = insn_default_latency (dep);
12256                           }
12257                         return false;
12258                       }
12259                   }
12260               }
12261           }
12262         break;
12263
12264     default:
12265       gcc_unreachable ();
12266     }
12267
12268   return true;
12269 }
12270
12271 /* Adjust cost hook for FA726TE.  */
12272 static bool
12273 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12274                            int * cost)
12275 {
12276   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12277      have penalty of 3.  */
12278   if (dep_type == REG_DEP_TRUE
12279       && recog_memoized (insn) >= 0
12280       && recog_memoized (dep) >= 0
12281       && get_attr_conds (dep) == CONDS_SET)
12282     {
12283       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12284       if (get_attr_conds (insn) == CONDS_USE
12285           && get_attr_type (insn) != TYPE_BRANCH)
12286         {
12287           *cost = 3;
12288           return false;
12289         }
12290
12291       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12292           || get_attr_conds (insn) == CONDS_USE)
12293         {
12294           *cost = 0;
12295           return false;
12296         }
12297     }
12298
12299   return true;
12300 }
12301
12302 /* Implement TARGET_REGISTER_MOVE_COST.
12303
12304    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12305    it is typically more expensive than a single memory access.  We set
12306    the cost to less than two memory accesses so that floating
12307    point to integer conversion does not go through memory.  */
12308
12309 int
12310 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12311                         reg_class_t from, reg_class_t to)
12312 {
12313   if (TARGET_32BIT)
12314     {
12315       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12316           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12317         return 15;
12318       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12319                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12320         return 4;
12321       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12322         return 20;
12323       else
12324         return 2;
12325     }
12326   else
12327     {
12328       if (from == HI_REGS || to == HI_REGS)
12329         return 4;
12330       else
12331         return 2;
12332     }
12333 }
12334
12335 /* Implement TARGET_MEMORY_MOVE_COST.  */
12336
12337 int
12338 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12339                       bool in ATTRIBUTE_UNUSED)
12340 {
12341   if (TARGET_32BIT)
12342     return 10;
12343   else
12344     {
12345       if (GET_MODE_SIZE (mode) < 4)
12346         return 8;
12347       else
12348         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12349     }
12350 }
12351
12352 /* Vectorizer cost model implementation.  */
12353
12354 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12355 static int
12356 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12357                                 tree vectype,
12358                                 int misalign ATTRIBUTE_UNUSED)
12359 {
12360   unsigned elements;
12361
12362   switch (type_of_cost)
12363     {
12364       case scalar_stmt:
12365         return current_tune->vec_costs->scalar_stmt_cost;
12366
12367       case scalar_load:
12368         return current_tune->vec_costs->scalar_load_cost;
12369
12370       case scalar_store:
12371         return current_tune->vec_costs->scalar_store_cost;
12372
12373       case vector_stmt:
12374         return current_tune->vec_costs->vec_stmt_cost;
12375
12376       case vector_load:
12377         return current_tune->vec_costs->vec_align_load_cost;
12378
12379       case vector_store:
12380         return current_tune->vec_costs->vec_store_cost;
12381
12382       case vec_to_scalar:
12383         return current_tune->vec_costs->vec_to_scalar_cost;
12384
12385       case scalar_to_vec:
12386         return current_tune->vec_costs->scalar_to_vec_cost;
12387
12388       case unaligned_load:
12389       case vector_gather_load:
12390         return current_tune->vec_costs->vec_unalign_load_cost;
12391
12392       case unaligned_store:
12393       case vector_scatter_store:
12394         return current_tune->vec_costs->vec_unalign_store_cost;
12395
12396       case cond_branch_taken:
12397         return current_tune->vec_costs->cond_taken_branch_cost;
12398
12399       case cond_branch_not_taken:
12400         return current_tune->vec_costs->cond_not_taken_branch_cost;
12401
12402       case vec_perm:
12403       case vec_promote_demote:
12404         return current_tune->vec_costs->vec_stmt_cost;
12405
12406       case vec_construct:
12407         elements = TYPE_VECTOR_SUBPARTS (vectype);
12408         return elements / 2 + 1;
12409
12410       default:
12411         gcc_unreachable ();
12412     }
12413 }
12414
12415 /* Return true if and only if this insn can dual-issue only as older.  */
12416 static bool
12417 cortexa7_older_only (rtx_insn *insn)
12418 {
12419   if (recog_memoized (insn) < 0)
12420     return false;
12421
12422   switch (get_attr_type (insn))
12423     {
12424     case TYPE_ALU_DSP_REG:
12425     case TYPE_ALU_SREG:
12426     case TYPE_ALUS_SREG:
12427     case TYPE_LOGIC_REG:
12428     case TYPE_LOGICS_REG:
12429     case TYPE_ADC_REG:
12430     case TYPE_ADCS_REG:
12431     case TYPE_ADR:
12432     case TYPE_BFM:
12433     case TYPE_REV:
12434     case TYPE_MVN_REG:
12435     case TYPE_SHIFT_IMM:
12436     case TYPE_SHIFT_REG:
12437     case TYPE_LOAD_BYTE:
12438     case TYPE_LOAD_4:
12439     case TYPE_STORE_4:
12440     case TYPE_FFARITHS:
12441     case TYPE_FADDS:
12442     case TYPE_FFARITHD:
12443     case TYPE_FADDD:
12444     case TYPE_FMOV:
12445     case TYPE_F_CVT:
12446     case TYPE_FCMPS:
12447     case TYPE_FCMPD:
12448     case TYPE_FCONSTS:
12449     case TYPE_FCONSTD:
12450     case TYPE_FMULS:
12451     case TYPE_FMACS:
12452     case TYPE_FMULD:
12453     case TYPE_FMACD:
12454     case TYPE_FDIVS:
12455     case TYPE_FDIVD:
12456     case TYPE_F_MRC:
12457     case TYPE_F_MRRC:
12458     case TYPE_F_FLAG:
12459     case TYPE_F_LOADS:
12460     case TYPE_F_STORES:
12461       return true;
12462     default:
12463       return false;
12464     }
12465 }
12466
12467 /* Return true if and only if this insn can dual-issue as younger.  */
12468 static bool
12469 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12470 {
12471   if (recog_memoized (insn) < 0)
12472     {
12473       if (verbose > 5)
12474         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12475       return false;
12476     }
12477
12478   switch (get_attr_type (insn))
12479     {
12480     case TYPE_ALU_IMM:
12481     case TYPE_ALUS_IMM:
12482     case TYPE_LOGIC_IMM:
12483     case TYPE_LOGICS_IMM:
12484     case TYPE_EXTEND:
12485     case TYPE_MVN_IMM:
12486     case TYPE_MOV_IMM:
12487     case TYPE_MOV_REG:
12488     case TYPE_MOV_SHIFT:
12489     case TYPE_MOV_SHIFT_REG:
12490     case TYPE_BRANCH:
12491     case TYPE_CALL:
12492       return true;
12493     default:
12494       return false;
12495     }
12496 }
12497
12498
12499 /* Look for an instruction that can dual issue only as an older
12500    instruction, and move it in front of any instructions that can
12501    dual-issue as younger, while preserving the relative order of all
12502    other instructions in the ready list.  This is a hueuristic to help
12503    dual-issue in later cycles, by postponing issue of more flexible
12504    instructions.  This heuristic may affect dual issue opportunities
12505    in the current cycle.  */
12506 static void
12507 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12508                         int *n_readyp, int clock)
12509 {
12510   int i;
12511   int first_older_only = -1, first_younger = -1;
12512
12513   if (verbose > 5)
12514     fprintf (file,
12515              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12516              clock,
12517              *n_readyp);
12518
12519   /* Traverse the ready list from the head (the instruction to issue
12520      first), and looking for the first instruction that can issue as
12521      younger and the first instruction that can dual-issue only as
12522      older.  */
12523   for (i = *n_readyp - 1; i >= 0; i--)
12524     {
12525       rtx_insn *insn = ready[i];
12526       if (cortexa7_older_only (insn))
12527         {
12528           first_older_only = i;
12529           if (verbose > 5)
12530             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12531           break;
12532         }
12533       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12534         first_younger = i;
12535     }
12536
12537   /* Nothing to reorder because either no younger insn found or insn
12538      that can dual-issue only as older appears before any insn that
12539      can dual-issue as younger.  */
12540   if (first_younger == -1)
12541     {
12542       if (verbose > 5)
12543         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12544       return;
12545     }
12546
12547   /* Nothing to reorder because no older-only insn in the ready list.  */
12548   if (first_older_only == -1)
12549     {
12550       if (verbose > 5)
12551         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12552       return;
12553     }
12554
12555   /* Move first_older_only insn before first_younger.  */
12556   if (verbose > 5)
12557     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12558              INSN_UID(ready [first_older_only]),
12559              INSN_UID(ready [first_younger]));
12560   rtx_insn *first_older_only_insn = ready [first_older_only];
12561   for (i = first_older_only; i < first_younger; i++)
12562     {
12563       ready[i] = ready[i+1];
12564     }
12565
12566   ready[i] = first_older_only_insn;
12567   return;
12568 }
12569
12570 /* Implement TARGET_SCHED_REORDER. */
12571 static int
12572 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12573                    int clock)
12574 {
12575   switch (arm_tune)
12576     {
12577     case TARGET_CPU_cortexa7:
12578       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12579       break;
12580     default:
12581       /* Do nothing for other cores.  */
12582       break;
12583     }
12584
12585   return arm_issue_rate ();
12586 }
12587
12588 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12589    It corrects the value of COST based on the relationship between
12590    INSN and DEP through the dependence LINK.  It returns the new
12591    value. There is a per-core adjust_cost hook to adjust scheduler costs
12592    and the per-core hook can choose to completely override the generic
12593    adjust_cost function. Only put bits of code into arm_adjust_cost that
12594    are common across all cores.  */
12595 static int
12596 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12597                  unsigned int)
12598 {
12599   rtx i_pat, d_pat;
12600
12601  /* When generating Thumb-1 code, we want to place flag-setting operations
12602     close to a conditional branch which depends on them, so that we can
12603     omit the comparison. */
12604   if (TARGET_THUMB1
12605       && dep_type == 0
12606       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12607       && recog_memoized (dep) >= 0
12608       && get_attr_conds (dep) == CONDS_SET)
12609     return 0;
12610
12611   if (current_tune->sched_adjust_cost != NULL)
12612     {
12613       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12614         return cost;
12615     }
12616
12617   /* XXX Is this strictly true?  */
12618   if (dep_type == REG_DEP_ANTI
12619       || dep_type == REG_DEP_OUTPUT)
12620     return 0;
12621
12622   /* Call insns don't incur a stall, even if they follow a load.  */
12623   if (dep_type == 0
12624       && CALL_P (insn))
12625     return 1;
12626
12627   if ((i_pat = single_set (insn)) != NULL
12628       && MEM_P (SET_SRC (i_pat))
12629       && (d_pat = single_set (dep)) != NULL
12630       && MEM_P (SET_DEST (d_pat)))
12631     {
12632       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12633       /* This is a load after a store, there is no conflict if the load reads
12634          from a cached area.  Assume that loads from the stack, and from the
12635          constant pool are cached, and that others will miss.  This is a
12636          hack.  */
12637
12638       if ((SYMBOL_REF_P (src_mem)
12639            && CONSTANT_POOL_ADDRESS_P (src_mem))
12640           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12641           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12642           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12643         return 1;
12644     }
12645
12646   return cost;
12647 }
12648
12649 int
12650 arm_max_conditional_execute (void)
12651 {
12652   return max_insns_skipped;
12653 }
12654
12655 static int
12656 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12657 {
12658   if (TARGET_32BIT)
12659     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12660   else
12661     return (optimize > 0) ? 2 : 0;
12662 }
12663
12664 static int
12665 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12666 {
12667   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12668 }
12669
12670 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12671    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12672    sequences of non-executed instructions in IT blocks probably take the same
12673    amount of time as executed instructions (and the IT instruction itself takes
12674    space in icache).  This function was experimentally determined to give good
12675    results on a popular embedded benchmark.  */
12676
12677 static int
12678 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12679 {
12680   return (TARGET_32BIT && speed_p) ? 1
12681          : arm_default_branch_cost (speed_p, predictable_p);
12682 }
12683
12684 static int
12685 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12686 {
12687   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12688 }
12689
12690 static bool fp_consts_inited = false;
12691
12692 static REAL_VALUE_TYPE value_fp0;
12693
12694 static void
12695 init_fp_table (void)
12696 {
12697   REAL_VALUE_TYPE r;
12698
12699   r = REAL_VALUE_ATOF ("0", DFmode);
12700   value_fp0 = r;
12701   fp_consts_inited = true;
12702 }
12703
12704 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12705 int
12706 arm_const_double_rtx (rtx x)
12707 {
12708   const REAL_VALUE_TYPE *r;
12709
12710   if (!fp_consts_inited)
12711     init_fp_table ();
12712
12713   r = CONST_DOUBLE_REAL_VALUE (x);
12714   if (REAL_VALUE_MINUS_ZERO (*r))
12715     return 0;
12716
12717   if (real_equal (r, &value_fp0))
12718     return 1;
12719
12720   return 0;
12721 }
12722
12723 /* VFPv3 has a fairly wide range of representable immediates, formed from
12724    "quarter-precision" floating-point values. These can be evaluated using this
12725    formula (with ^ for exponentiation):
12726
12727      -1^s * n * 2^-r
12728
12729    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12730    16 <= n <= 31 and 0 <= r <= 7.
12731
12732    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12733
12734      - A (most-significant) is the sign bit.
12735      - BCD are the exponent (encoded as r XOR 3).
12736      - EFGH are the mantissa (encoded as n - 16).
12737 */
12738
12739 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12740    fconst[sd] instruction, or -1 if X isn't suitable.  */
12741 static int
12742 vfp3_const_double_index (rtx x)
12743 {
12744   REAL_VALUE_TYPE r, m;
12745   int sign, exponent;
12746   unsigned HOST_WIDE_INT mantissa, mant_hi;
12747   unsigned HOST_WIDE_INT mask;
12748   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12749   bool fail;
12750
12751   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12752     return -1;
12753
12754   r = *CONST_DOUBLE_REAL_VALUE (x);
12755
12756   /* We can't represent these things, so detect them first.  */
12757   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12758     return -1;
12759
12760   /* Extract sign, exponent and mantissa.  */
12761   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12762   r = real_value_abs (&r);
12763   exponent = REAL_EXP (&r);
12764   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12765      highest (sign) bit, with a fixed binary point at bit point_pos.
12766      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12767      bits for the mantissa, this may fail (low bits would be lost).  */
12768   real_ldexp (&m, &r, point_pos - exponent);
12769   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12770   mantissa = w.elt (0);
12771   mant_hi = w.elt (1);
12772
12773   /* If there are bits set in the low part of the mantissa, we can't
12774      represent this value.  */
12775   if (mantissa != 0)
12776     return -1;
12777
12778   /* Now make it so that mantissa contains the most-significant bits, and move
12779      the point_pos to indicate that the least-significant bits have been
12780      discarded.  */
12781   point_pos -= HOST_BITS_PER_WIDE_INT;
12782   mantissa = mant_hi;
12783
12784   /* We can permit four significant bits of mantissa only, plus a high bit
12785      which is always 1.  */
12786   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12787   if ((mantissa & mask) != 0)
12788     return -1;
12789
12790   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12791   mantissa >>= point_pos - 5;
12792
12793   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12794      floating-point immediate zero with Neon using an integer-zero load, but
12795      that case is handled elsewhere.)  */
12796   if (mantissa == 0)
12797     return -1;
12798
12799   gcc_assert (mantissa >= 16 && mantissa <= 31);
12800
12801   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12802      normalized significands are in the range [1, 2). (Our mantissa is shifted
12803      left 4 places at this point relative to normalized IEEE754 values).  GCC
12804      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12805      REAL_EXP must be altered.  */
12806   exponent = 5 - exponent;
12807
12808   if (exponent < 0 || exponent > 7)
12809     return -1;
12810
12811   /* Sign, mantissa and exponent are now in the correct form to plug into the
12812      formula described in the comment above.  */
12813   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12814 }
12815
12816 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12817 int
12818 vfp3_const_double_rtx (rtx x)
12819 {
12820   if (!TARGET_VFP3)
12821     return 0;
12822
12823   return vfp3_const_double_index (x) != -1;
12824 }
12825
12826 /* Recognize immediates which can be used in various Neon and MVE instructions.
12827    Legal immediates are described by the following table (for VMVN variants, the
12828    bitwise inverse of the constant shown is recognized. In either case, VMOV
12829    is output and the correct instruction to use for a given constant is chosen
12830    by the assembler). The constant shown is replicated across all elements of
12831    the destination vector.
12832
12833    insn elems variant constant (binary)
12834    ---- ----- ------- -----------------
12835    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12836    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12837    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12838    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12839    vmov  i16     4    00000000 abcdefgh
12840    vmov  i16     5    abcdefgh 00000000
12841    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12842    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12843    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12844    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12845    vmvn  i16    10    00000000 abcdefgh
12846    vmvn  i16    11    abcdefgh 00000000
12847    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12848    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12849    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12850    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12851    vmov   i8    16    abcdefgh
12852    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12853                       eeeeeeee ffffffff gggggggg hhhhhhhh
12854    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12855    vmov  f32    19    00000000 00000000 00000000 00000000
12856
12857    For case 18, B = !b. Representable values are exactly those accepted by
12858    vfp3_const_double_index, but are output as floating-point numbers rather
12859    than indices.
12860
12861    For case 19, we will change it to vmov.i32 when assembling.
12862
12863    Variants 0-5 (inclusive) may also be used as immediates for the second
12864    operand of VORR/VBIC instructions.
12865
12866    The INVERSE argument causes the bitwise inverse of the given operand to be
12867    recognized instead (used for recognizing legal immediates for the VAND/VORN
12868    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12869    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12870    output, rather than the real insns vbic/vorr).
12871
12872    INVERSE makes no difference to the recognition of float vectors.
12873
12874    The return value is the variant of immediate as shown in the above table, or
12875    -1 if the given value doesn't match any of the listed patterns.
12876 */
12877 static int
12878 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12879                       rtx *modconst, int *elementwidth)
12880 {
12881 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12882   matches = 1;                                  \
12883   for (i = 0; i < idx; i += (STRIDE))           \
12884     if (!(TEST))                                \
12885       matches = 0;                              \
12886   if (matches)                                  \
12887     {                                           \
12888       immtype = (CLASS);                        \
12889       elsize = (ELSIZE);                        \
12890       break;                                    \
12891     }
12892
12893   unsigned int i, elsize = 0, idx = 0, n_elts;
12894   unsigned int innersize;
12895   unsigned char bytes[16] = {};
12896   int immtype = -1, matches;
12897   unsigned int invmask = inverse ? 0xff : 0;
12898   bool vector = GET_CODE (op) == CONST_VECTOR;
12899
12900   if (vector)
12901     n_elts = CONST_VECTOR_NUNITS (op);
12902   else
12903     {
12904       n_elts = 1;
12905       gcc_assert (mode != VOIDmode);
12906     }
12907
12908   innersize = GET_MODE_UNIT_SIZE (mode);
12909
12910   /* Only support 128-bit vectors for MVE.  */
12911   if (TARGET_HAVE_MVE
12912       && (!vector
12913           || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12914           || n_elts * innersize != 16))
12915     return -1;
12916
12917   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12918     return -1;
12919
12920   /* Vectors of float constants.  */
12921   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12922     {
12923       rtx el0 = CONST_VECTOR_ELT (op, 0);
12924
12925       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12926         return -1;
12927
12928       /* FP16 vectors cannot be represented.  */
12929       if (GET_MODE_INNER (mode) == HFmode)
12930         return -1;
12931
12932       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12933          are distinct in this context.  */
12934       if (!const_vec_duplicate_p (op))
12935         return -1;
12936
12937       if (modconst)
12938         *modconst = CONST_VECTOR_ELT (op, 0);
12939
12940       if (elementwidth)
12941         *elementwidth = 0;
12942
12943       if (el0 == CONST0_RTX (GET_MODE (el0)))
12944         return 19;
12945       else
12946         return 18;
12947     }
12948
12949   /* The tricks done in the code below apply for little-endian vector layout.
12950      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12951      FIXME: Implement logic for big-endian vectors.  */
12952   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12953     return -1;
12954
12955   /* Splat vector constant out into a byte vector.  */
12956   for (i = 0; i < n_elts; i++)
12957     {
12958       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12959       unsigned HOST_WIDE_INT elpart;
12960
12961       gcc_assert (CONST_INT_P (el));
12962       elpart = INTVAL (el);
12963
12964       for (unsigned int byte = 0; byte < innersize; byte++)
12965         {
12966           bytes[idx++] = (elpart & 0xff) ^ invmask;
12967           elpart >>= BITS_PER_UNIT;
12968         }
12969     }
12970
12971   /* Sanity check.  */
12972   gcc_assert (idx == GET_MODE_SIZE (mode));
12973
12974   do
12975     {
12976       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12977                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12978
12979       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12980                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12981
12982       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12983                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12984
12985       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12986                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12987
12988       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12989
12990       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12991
12992       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12993                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12994
12995       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12996                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12997
12998       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12999                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13000
13001       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13002                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13003
13004       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13005
13006       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13007
13008       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13009                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13010
13011       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13012                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13013
13014       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13015                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13016
13017       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13018                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13019
13020       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13021
13022       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13023                         && bytes[i] == bytes[(i + 8) % idx]);
13024     }
13025   while (0);
13026
13027   if (immtype == -1)
13028     return -1;
13029
13030   if (elementwidth)
13031     *elementwidth = elsize;
13032
13033   if (modconst)
13034     {
13035       unsigned HOST_WIDE_INT imm = 0;
13036
13037       /* Un-invert bytes of recognized vector, if necessary.  */
13038       if (invmask != 0)
13039         for (i = 0; i < idx; i++)
13040           bytes[i] ^= invmask;
13041
13042       if (immtype == 17)
13043         {
13044           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13045           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13046
13047           for (i = 0; i < 8; i++)
13048             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13049                    << (i * BITS_PER_UNIT);
13050
13051           *modconst = GEN_INT (imm);
13052         }
13053       else
13054         {
13055           unsigned HOST_WIDE_INT imm = 0;
13056
13057           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13058             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13059
13060           *modconst = GEN_INT (imm);
13061         }
13062     }
13063
13064   return immtype;
13065 #undef CHECK
13066 }
13067
13068 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13069    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13070    (or zero for float elements), and a modified constant (whatever should be
13071    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13072    modified to "simd_immediate_valid_for_move" as this function will be used
13073    both by neon and mve.  */
13074 int
13075 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13076                                rtx *modconst, int *elementwidth)
13077 {
13078   rtx tmpconst;
13079   int tmpwidth;
13080   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13081
13082   if (retval == -1)
13083     return 0;
13084
13085   if (modconst)
13086     *modconst = tmpconst;
13087
13088   if (elementwidth)
13089     *elementwidth = tmpwidth;
13090
13091   return 1;
13092 }
13093
13094 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13095    the immediate is valid, write a constant suitable for using as an operand
13096    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13097    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13098
13099 int
13100 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13101                                 rtx *modconst, int *elementwidth)
13102 {
13103   rtx tmpconst;
13104   int tmpwidth;
13105   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13106
13107   if (retval < 0 || retval > 5)
13108     return 0;
13109
13110   if (modconst)
13111     *modconst = tmpconst;
13112
13113   if (elementwidth)
13114     *elementwidth = tmpwidth;
13115
13116   return 1;
13117 }
13118
13119 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13120    the immediate is valid, write a constant suitable for using as an operand
13121    to VSHR/VSHL to *MODCONST and the corresponding element width to
13122    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13123    because they have different limitations.  */
13124
13125 int
13126 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13127                                 rtx *modconst, int *elementwidth,
13128                                 bool isleftshift)
13129 {
13130   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13131   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13132   unsigned HOST_WIDE_INT last_elt = 0;
13133   unsigned HOST_WIDE_INT maxshift;
13134
13135   /* Split vector constant out into a byte vector.  */
13136   for (i = 0; i < n_elts; i++)
13137     {
13138       rtx el = CONST_VECTOR_ELT (op, i);
13139       unsigned HOST_WIDE_INT elpart;
13140
13141       if (CONST_INT_P (el))
13142         elpart = INTVAL (el);
13143       else if (CONST_DOUBLE_P (el))
13144         return 0;
13145       else
13146         gcc_unreachable ();
13147
13148       if (i != 0 && elpart != last_elt)
13149         return 0;
13150
13151       last_elt = elpart;
13152     }
13153
13154   /* Shift less than element size.  */
13155   maxshift = innersize * 8;
13156
13157   if (isleftshift)
13158     {
13159       /* Left shift immediate value can be from 0 to <size>-1.  */
13160       if (last_elt >= maxshift)
13161         return 0;
13162     }
13163   else
13164     {
13165       /* Right shift immediate value can be from 1 to <size>.  */
13166       if (last_elt == 0 || last_elt > maxshift)
13167         return 0;
13168     }
13169
13170   if (elementwidth)
13171     *elementwidth = innersize * 8;
13172
13173   if (modconst)
13174     *modconst = CONST_VECTOR_ELT (op, 0);
13175
13176   return 1;
13177 }
13178
13179 /* Return a string suitable for output of Neon immediate logic operation
13180    MNEM.  */
13181
13182 char *
13183 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13184                              int inverse, int quad)
13185 {
13186   int width, is_valid;
13187   static char templ[40];
13188
13189   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13190
13191   gcc_assert (is_valid != 0);
13192
13193   if (quad)
13194     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13195   else
13196     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13197
13198   return templ;
13199 }
13200
13201 /* Return a string suitable for output of Neon immediate shift operation
13202    (VSHR or VSHL) MNEM.  */
13203
13204 char *
13205 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13206                              machine_mode mode, int quad,
13207                              bool isleftshift)
13208 {
13209   int width, is_valid;
13210   static char templ[40];
13211
13212   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13213   gcc_assert (is_valid != 0);
13214
13215   if (quad)
13216     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13217   else
13218     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13219
13220   return templ;
13221 }
13222
13223 /* Output a sequence of pairwise operations to implement a reduction.
13224    NOTE: We do "too much work" here, because pairwise operations work on two
13225    registers-worth of operands in one go. Unfortunately we can't exploit those
13226    extra calculations to do the full operation in fewer steps, I don't think.
13227    Although all vector elements of the result but the first are ignored, we
13228    actually calculate the same result in each of the elements. An alternative
13229    such as initially loading a vector with zero to use as each of the second
13230    operands would use up an additional register and take an extra instruction,
13231    for no particular gain.  */
13232
13233 void
13234 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13235                       rtx (*reduc) (rtx, rtx, rtx))
13236 {
13237   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13238   rtx tmpsum = op1;
13239
13240   for (i = parts / 2; i >= 1; i /= 2)
13241     {
13242       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13243       emit_insn (reduc (dest, tmpsum, tmpsum));
13244       tmpsum = dest;
13245     }
13246 }
13247
13248 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13249    loaded into a register using VDUP.
13250
13251    If this is the case, and GENERATE is set, we also generate
13252    instructions to do this and return an RTX to assign to the register.  */
13253
13254 static rtx
13255 neon_vdup_constant (rtx vals, bool generate)
13256 {
13257   machine_mode mode = GET_MODE (vals);
13258   machine_mode inner_mode = GET_MODE_INNER (mode);
13259   rtx x;
13260
13261   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13262     return NULL_RTX;
13263
13264   if (!const_vec_duplicate_p (vals, &x))
13265     /* The elements are not all the same.  We could handle repeating
13266        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13267        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13268        vdup.i16).  */
13269     return NULL_RTX;
13270
13271   if (!generate)
13272     return x;
13273
13274   /* We can load this constant by using VDUP and a constant in a
13275      single ARM register.  This will be cheaper than a vector
13276      load.  */
13277
13278   x = copy_to_mode_reg (inner_mode, x);
13279   return gen_vec_duplicate (mode, x);
13280 }
13281
13282 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13283 rtx
13284 mve_bool_vec_to_const (rtx const_vec)
13285 {
13286   int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13287   int repeat = 16 / n_elts;
13288   int i;
13289   int hi_val = 0;
13290
13291   for (i = 0; i < n_elts; i++)
13292     {
13293       rtx el = CONST_VECTOR_ELT (const_vec, i);
13294       unsigned HOST_WIDE_INT elpart;
13295
13296       gcc_assert (CONST_INT_P (el));
13297       elpart = INTVAL (el);
13298
13299       for (int j = 0; j < repeat; j++)
13300         hi_val |= elpart << (i * repeat + j);
13301     }
13302   return gen_int_mode (hi_val, HImode);
13303 }
13304
13305 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13306    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13307    into a register.
13308
13309    If this is the case, and GENERATE is set, we also generate code to do
13310    this and return an RTX to copy into the register.  */
13311
13312 rtx
13313 neon_make_constant (rtx vals, bool generate)
13314 {
13315   machine_mode mode = GET_MODE (vals);
13316   rtx target;
13317   rtx const_vec = NULL_RTX;
13318   int n_elts = GET_MODE_NUNITS (mode);
13319   int n_const = 0;
13320   int i;
13321
13322   if (GET_CODE (vals) == CONST_VECTOR)
13323     const_vec = vals;
13324   else if (GET_CODE (vals) == PARALLEL)
13325     {
13326       /* A CONST_VECTOR must contain only CONST_INTs and
13327          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13328          Only store valid constants in a CONST_VECTOR.  */
13329       for (i = 0; i < n_elts; ++i)
13330         {
13331           rtx x = XVECEXP (vals, 0, i);
13332           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13333             n_const++;
13334         }
13335       if (n_const == n_elts)
13336         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13337     }
13338   else
13339     gcc_unreachable ();
13340
13341   if (const_vec != NULL
13342       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13343     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13344     return const_vec;
13345   else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13346     return mve_bool_vec_to_const (const_vec);
13347   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13348     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13349        pipeline cycle; creating the constant takes one or two ARM
13350        pipeline cycles.  */
13351     return target;
13352   else if (const_vec != NULL_RTX)
13353     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13354        (for either double or quad vectors).  We cannot take advantage
13355        of single-cycle VLD1 because we need a PC-relative addressing
13356        mode.  */
13357     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13358   else
13359     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13360        We cannot construct an initializer.  */
13361     return NULL_RTX;
13362 }
13363
13364 /* Initialize vector TARGET to VALS.  */
13365
13366 void
13367 neon_expand_vector_init (rtx target, rtx vals)
13368 {
13369   machine_mode mode = GET_MODE (target);
13370   machine_mode inner_mode = GET_MODE_INNER (mode);
13371   int n_elts = GET_MODE_NUNITS (mode);
13372   int n_var = 0, one_var = -1;
13373   bool all_same = true;
13374   rtx x, mem;
13375   int i;
13376
13377   for (i = 0; i < n_elts; ++i)
13378     {
13379       x = XVECEXP (vals, 0, i);
13380       if (!CONSTANT_P (x))
13381         ++n_var, one_var = i;
13382
13383       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13384         all_same = false;
13385     }
13386
13387   if (n_var == 0)
13388     {
13389       rtx constant = neon_make_constant (vals);
13390       if (constant != NULL_RTX)
13391         {
13392           emit_move_insn (target, constant);
13393           return;
13394         }
13395     }
13396
13397   /* Splat a single non-constant element if we can.  */
13398   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13399     {
13400       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13401       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13402       return;
13403     }
13404
13405   /* One field is non-constant.  Load constant then overwrite varying
13406      field.  This is more efficient than using the stack.  */
13407   if (n_var == 1)
13408     {
13409       rtx copy = copy_rtx (vals);
13410       rtx merge_mask = GEN_INT (1 << one_var);
13411
13412       /* Load constant part of vector, substitute neighboring value for
13413          varying element.  */
13414       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13415       neon_expand_vector_init (target, copy);
13416
13417       /* Insert variable.  */
13418       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13419       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13420       return;
13421     }
13422
13423   /* Construct the vector in memory one field at a time
13424      and load the whole vector.  */
13425   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13426   for (i = 0; i < n_elts; i++)
13427     emit_move_insn (adjust_address_nv (mem, inner_mode,
13428                                     i * GET_MODE_SIZE (inner_mode)),
13429                     XVECEXP (vals, 0, i));
13430   emit_move_insn (target, mem);
13431 }
13432
13433 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13434    ERR if it doesn't.  EXP indicates the source location, which includes the
13435    inlining history for intrinsics.  */
13436
13437 static void
13438 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13439               const_tree exp, const char *desc)
13440 {
13441   HOST_WIDE_INT lane;
13442
13443   gcc_assert (CONST_INT_P (operand));
13444
13445   lane = INTVAL (operand);
13446
13447   if (lane < low || lane >= high)
13448     {
13449       if (exp)
13450         error_at (EXPR_LOCATION (exp),
13451                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13452       else
13453         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13454     }
13455 }
13456
13457 /* Bounds-check lanes.  */
13458
13459 void
13460 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13461                   const_tree exp)
13462 {
13463   bounds_check (operand, low, high, exp, "lane");
13464 }
13465
13466 /* Bounds-check constants.  */
13467
13468 void
13469 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13470 {
13471   bounds_check (operand, low, high, NULL_TREE, "constant");
13472 }
13473
13474 HOST_WIDE_INT
13475 neon_element_bits (machine_mode mode)
13476 {
13477   return GET_MODE_UNIT_BITSIZE (mode);
13478 }
13479
13480 \f
13481 /* Predicates for `match_operand' and `match_operator'.  */
13482
13483 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13484    WB level is 2 if full writeback address modes are allowed, 1
13485    if limited writeback address modes (POST_INC and PRE_DEC) are
13486    allowed and 0 if no writeback at all is supported.  */
13487
13488 int
13489 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13490 {
13491   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13492   rtx ind;
13493
13494   /* Reject eliminable registers.  */
13495   if (! (reload_in_progress || reload_completed || lra_in_progress)
13496       && (   reg_mentioned_p (frame_pointer_rtx, op)
13497           || reg_mentioned_p (arg_pointer_rtx, op)
13498           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13499           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13500           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13501           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13502     return FALSE;
13503
13504   /* Constants are converted into offsets from labels.  */
13505   if (!MEM_P (op))
13506     return FALSE;
13507
13508   ind = XEXP (op, 0);
13509
13510   if (reload_completed
13511       && (LABEL_REF_P (ind)
13512           || (GET_CODE (ind) == CONST
13513               && GET_CODE (XEXP (ind, 0)) == PLUS
13514               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13515               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13516     return TRUE;
13517
13518   /* Match: (mem (reg)).  */
13519   if (REG_P (ind))
13520     return arm_address_register_rtx_p (ind, 0);
13521
13522   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13523      acceptable in any case (subject to verification by
13524      arm_address_register_rtx_p).  We need full writeback to accept
13525      PRE_INC and POST_DEC, and at least restricted writeback for
13526      PRE_INC and POST_DEC.  */
13527   if (wb_level > 0
13528       && (GET_CODE (ind) == POST_INC
13529           || GET_CODE (ind) == PRE_DEC
13530           || (wb_level > 1
13531               && (GET_CODE (ind) == PRE_INC
13532                   || GET_CODE (ind) == POST_DEC))))
13533     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13534
13535   if (wb_level > 1
13536       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13537       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13538       && GET_CODE (XEXP (ind, 1)) == PLUS
13539       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13540     ind = XEXP (ind, 1);
13541
13542   /* Match:
13543      (plus (reg)
13544            (const))
13545
13546      The encoded immediate for 16-bit modes is multiplied by 2,
13547      while the encoded immediate for 32-bit and 64-bit modes is
13548      multiplied by 4.  */
13549   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13550   if (GET_CODE (ind) == PLUS
13551       && REG_P (XEXP (ind, 0))
13552       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13553       && CONST_INT_P (XEXP (ind, 1))
13554       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13555       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13556     return TRUE;
13557
13558   return FALSE;
13559 }
13560
13561 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13562    WB is true if full writeback address modes are allowed and is false
13563    if limited writeback address modes (POST_INC and PRE_DEC) are
13564    allowed.  */
13565
13566 int arm_coproc_mem_operand (rtx op, bool wb)
13567 {
13568   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13569 }
13570
13571 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13572    context in which no writeback address modes are allowed.  */
13573
13574 int
13575 arm_coproc_mem_operand_no_writeback (rtx op)
13576 {
13577   return arm_coproc_mem_operand_wb (op, 0);
13578 }
13579
13580 /* This function returns TRUE on matching mode and op.
13581 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13582 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13583 int
13584 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13585 {
13586   enum rtx_code code;
13587   int val, reg_no;
13588
13589   /* Match: (mem (reg)).  */
13590   if (REG_P (op))
13591     {
13592       int reg_no = REGNO (op);
13593       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13594                ? reg_no <= LAST_LO_REGNUM
13595                : reg_no < LAST_ARM_REGNUM)
13596               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13597     }
13598   code = GET_CODE (op);
13599
13600   if (code == POST_INC || code == PRE_DEC
13601       || code == PRE_INC || code == POST_DEC)
13602     {
13603       reg_no = REGNO (XEXP (op, 0));
13604       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13605                ? reg_no <= LAST_LO_REGNUM
13606                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13607               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13608     }
13609   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13610             && GET_CODE (XEXP (op, 1)) == PLUS
13611             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13612             && REG_P (XEXP (op, 0))
13613             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13614            /* Make sure to only accept PLUS after reload_completed, otherwise
13615               this will interfere with auto_inc's pattern detection.  */
13616            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13617                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13618     {
13619       reg_no = REGNO (XEXP (op, 0));
13620       if (code == PLUS)
13621         val = INTVAL (XEXP (op, 1));
13622       else
13623         val = INTVAL (XEXP(XEXP (op, 1), 1));
13624
13625       switch (mode)
13626         {
13627           case E_V16QImode:
13628           case E_V8QImode:
13629           case E_V4QImode:
13630             if (abs (val) > 127)
13631               return FALSE;
13632             break;
13633           case E_V8HImode:
13634           case E_V8HFmode:
13635           case E_V4HImode:
13636           case E_V4HFmode:
13637             if (val % 2 != 0 || abs (val) > 254)
13638               return FALSE;
13639             break;
13640           case E_V4SImode:
13641           case E_V4SFmode:
13642             if (val % 4 != 0 || abs (val) > 508)
13643               return FALSE;
13644             break;
13645           default:
13646             return FALSE;
13647         }
13648       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13649               || (MVE_STN_LDW_MODE (mode)
13650                   ? reg_no <= LAST_LO_REGNUM
13651                   : (reg_no < LAST_ARM_REGNUM
13652                      && (code == PLUS || reg_no != SP_REGNUM))));
13653     }
13654   return FALSE;
13655 }
13656
13657 /* Return TRUE if OP is a memory operand which we can load or store a vector
13658    to/from. TYPE is one of the following values:
13659     0 - Vector load/stor (vldr)
13660     1 - Core registers (ldm)
13661     2 - Element/structure loads (vld1)
13662  */
13663 int
13664 neon_vector_mem_operand (rtx op, int type, bool strict)
13665 {
13666   rtx ind;
13667
13668   /* Reject eliminable registers.  */
13669   if (strict && ! (reload_in_progress || reload_completed)
13670       && (reg_mentioned_p (frame_pointer_rtx, op)
13671           || reg_mentioned_p (arg_pointer_rtx, op)
13672           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13673           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13674           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13675           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13676     return FALSE;
13677
13678   /* Constants are converted into offsets from labels.  */
13679   if (!MEM_P (op))
13680     return FALSE;
13681
13682   ind = XEXP (op, 0);
13683
13684   if (reload_completed
13685       && (LABEL_REF_P (ind)
13686           || (GET_CODE (ind) == CONST
13687               && GET_CODE (XEXP (ind, 0)) == PLUS
13688               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13689               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13690     return TRUE;
13691
13692   /* Match: (mem (reg)).  */
13693   if (REG_P (ind))
13694     return arm_address_register_rtx_p (ind, 0);
13695
13696   /* Allow post-increment with Neon registers.  */
13697   if ((type != 1 && GET_CODE (ind) == POST_INC)
13698       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13699     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13700
13701   /* Allow post-increment by register for VLDn */
13702   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13703       && GET_CODE (XEXP (ind, 1)) == PLUS
13704       && REG_P (XEXP (XEXP (ind, 1), 1))
13705       && REG_P (XEXP (ind, 0))
13706       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13707      return true;
13708
13709   /* Match:
13710      (plus (reg)
13711           (const)).  */
13712   if (type == 0
13713       && GET_CODE (ind) == PLUS
13714       && REG_P (XEXP (ind, 0))
13715       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13716       && CONST_INT_P (XEXP (ind, 1))
13717       && INTVAL (XEXP (ind, 1)) > -1024
13718       /* For quad modes, we restrict the constant offset to be slightly less
13719          than what the instruction format permits.  We have no such constraint
13720          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13721       && (INTVAL (XEXP (ind, 1))
13722           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13723       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13724     return TRUE;
13725
13726   return FALSE;
13727 }
13728
13729 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13730    type.  */
13731 int
13732 neon_struct_mem_operand (rtx op)
13733 {
13734   rtx ind;
13735
13736   /* Reject eliminable registers.  */
13737   if (! (reload_in_progress || reload_completed)
13738       && (   reg_mentioned_p (frame_pointer_rtx, op)
13739           || reg_mentioned_p (arg_pointer_rtx, op)
13740           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13741           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13742           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13743           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13744     return FALSE;
13745
13746   /* Constants are converted into offsets from labels.  */
13747   if (!MEM_P (op))
13748     return FALSE;
13749
13750   ind = XEXP (op, 0);
13751
13752   if (reload_completed
13753       && (LABEL_REF_P (ind)
13754           || (GET_CODE (ind) == CONST
13755               && GET_CODE (XEXP (ind, 0)) == PLUS
13756               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13757               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13758     return TRUE;
13759
13760   /* Match: (mem (reg)).  */
13761   if (REG_P (ind))
13762     return arm_address_register_rtx_p (ind, 0);
13763
13764   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13765   if (GET_CODE (ind) == POST_INC
13766       || GET_CODE (ind) == PRE_DEC)
13767     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13768
13769   return FALSE;
13770 }
13771
13772 /* Prepares the operands for the VCMLA by lane instruction such that the right
13773    register number is selected.  This instruction is special in that it always
13774    requires a D register, however there is a choice to be made between Dn[0],
13775    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13776
13777    The VCMLA by lane function always selects two values. For instance given D0
13778    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13779    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13780    D0[0] or D1[0] are both valid.
13781
13782    This function centralizes that information based on OPERANDS, OPERANDS[3]
13783    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13784    updated to contain the right index.  */
13785
13786 rtx *
13787 neon_vcmla_lane_prepare_operands (rtx *operands)
13788 {
13789   int lane = INTVAL (operands[4]);
13790   machine_mode constmode = SImode;
13791   machine_mode mode = GET_MODE (operands[3]);
13792   int regno = REGNO (operands[3]);
13793   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13794   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13795     {
13796       operands[3] = gen_int_mode (regno + 1, constmode);
13797       operands[4]
13798         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13799     }
13800   else
13801     {
13802       operands[3] = gen_int_mode (regno, constmode);
13803       operands[4] = gen_int_mode (lane, constmode);
13804     }
13805   return operands;
13806 }
13807
13808
13809 /* Return true if X is a register that will be eliminated later on.  */
13810 int
13811 arm_eliminable_register (rtx x)
13812 {
13813   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13814                        || REGNO (x) == ARG_POINTER_REGNUM
13815                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13816                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13817 }
13818
13819 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13820    coprocessor registers.  Otherwise return NO_REGS.  */
13821
13822 enum reg_class
13823 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13824 {
13825   if (mode == HFmode)
13826     {
13827       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13828         return GENERAL_REGS;
13829       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13830         return NO_REGS;
13831       return GENERAL_REGS;
13832     }
13833
13834   /* The neon move patterns handle all legitimate vector and struct
13835      addresses.  */
13836   if (TARGET_NEON
13837       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13838       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13839           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13840           || VALID_NEON_STRUCT_MODE (mode)))
13841     return NO_REGS;
13842
13843   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13844     return NO_REGS;
13845
13846   return GENERAL_REGS;
13847 }
13848
13849 /* Values which must be returned in the most-significant end of the return
13850    register.  */
13851
13852 static bool
13853 arm_return_in_msb (const_tree valtype)
13854 {
13855   return (TARGET_AAPCS_BASED
13856           && BYTES_BIG_ENDIAN
13857           && (AGGREGATE_TYPE_P (valtype)
13858               || TREE_CODE (valtype) == COMPLEX_TYPE
13859               || FIXED_POINT_TYPE_P (valtype)));
13860 }
13861
13862 /* Return TRUE if X references a SYMBOL_REF.  */
13863 int
13864 symbol_mentioned_p (rtx x)
13865 {
13866   const char * fmt;
13867   int i;
13868
13869   if (SYMBOL_REF_P (x))
13870     return 1;
13871
13872   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13873      are constant offsets, not symbols.  */
13874   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13875     return 0;
13876
13877   fmt = GET_RTX_FORMAT (GET_CODE (x));
13878
13879   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13880     {
13881       if (fmt[i] == 'E')
13882         {
13883           int j;
13884
13885           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13886             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13887               return 1;
13888         }
13889       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13890         return 1;
13891     }
13892
13893   return 0;
13894 }
13895
13896 /* Return TRUE if X references a LABEL_REF.  */
13897 int
13898 label_mentioned_p (rtx x)
13899 {
13900   const char * fmt;
13901   int i;
13902
13903   if (LABEL_REF_P (x))
13904     return 1;
13905
13906   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13907      instruction, but they are constant offsets, not symbols.  */
13908   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13909     return 0;
13910
13911   fmt = GET_RTX_FORMAT (GET_CODE (x));
13912   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13913     {
13914       if (fmt[i] == 'E')
13915         {
13916           int j;
13917
13918           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13919             if (label_mentioned_p (XVECEXP (x, i, j)))
13920               return 1;
13921         }
13922       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13923         return 1;
13924     }
13925
13926   return 0;
13927 }
13928
13929 int
13930 tls_mentioned_p (rtx x)
13931 {
13932   switch (GET_CODE (x))
13933     {
13934     case CONST:
13935       return tls_mentioned_p (XEXP (x, 0));
13936
13937     case UNSPEC:
13938       if (XINT (x, 1) == UNSPEC_TLS)
13939         return 1;
13940
13941     /* Fall through.  */
13942     default:
13943       return 0;
13944     }
13945 }
13946
13947 /* Must not copy any rtx that uses a pc-relative address.
13948    Also, disallow copying of load-exclusive instructions that
13949    may appear after splitting of compare-and-swap-style operations
13950    so as to prevent those loops from being transformed away from their
13951    canonical forms (see PR 69904).  */
13952
13953 static bool
13954 arm_cannot_copy_insn_p (rtx_insn *insn)
13955 {
13956   /* The tls call insn cannot be copied, as it is paired with a data
13957      word.  */
13958   if (recog_memoized (insn) == CODE_FOR_tlscall)
13959     return true;
13960
13961   subrtx_iterator::array_type array;
13962   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13963     {
13964       const_rtx x = *iter;
13965       if (GET_CODE (x) == UNSPEC
13966           && (XINT (x, 1) == UNSPEC_PIC_BASE
13967               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13968         return true;
13969     }
13970
13971   rtx set = single_set (insn);
13972   if (set)
13973     {
13974       rtx src = SET_SRC (set);
13975       if (GET_CODE (src) == ZERO_EXTEND)
13976         src = XEXP (src, 0);
13977
13978       /* Catch the load-exclusive and load-acquire operations.  */
13979       if (GET_CODE (src) == UNSPEC_VOLATILE
13980           && (XINT (src, 1) == VUNSPEC_LL
13981               || XINT (src, 1) == VUNSPEC_LAX))
13982         return true;
13983     }
13984   return false;
13985 }
13986
13987 enum rtx_code
13988 minmax_code (rtx x)
13989 {
13990   enum rtx_code code = GET_CODE (x);
13991
13992   switch (code)
13993     {
13994     case SMAX:
13995       return GE;
13996     case SMIN:
13997       return LE;
13998     case UMIN:
13999       return LEU;
14000     case UMAX:
14001       return GEU;
14002     default:
14003       gcc_unreachable ();
14004     }
14005 }
14006
14007 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14008
14009 bool
14010 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14011                         int *mask, bool *signed_sat)
14012 {
14013   /* The high bound must be a power of two minus one.  */
14014   int log = exact_log2 (INTVAL (hi_bound) + 1);
14015   if (log == -1)
14016     return false;
14017
14018   /* The low bound is either zero (for usat) or one less than the
14019      negation of the high bound (for ssat).  */
14020   if (INTVAL (lo_bound) == 0)
14021     {
14022       if (mask)
14023         *mask = log;
14024       if (signed_sat)
14025         *signed_sat = false;
14026
14027       return true;
14028     }
14029
14030   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14031     {
14032       if (mask)
14033         *mask = log + 1;
14034       if (signed_sat)
14035         *signed_sat = true;
14036
14037       return true;
14038     }
14039
14040   return false;
14041 }
14042
14043 /* Return 1 if memory locations are adjacent.  */
14044 int
14045 adjacent_mem_locations (rtx a, rtx b)
14046 {
14047   /* We don't guarantee to preserve the order of these memory refs.  */
14048   if (volatile_refs_p (a) || volatile_refs_p (b))
14049     return 0;
14050
14051   if ((REG_P (XEXP (a, 0))
14052        || (GET_CODE (XEXP (a, 0)) == PLUS
14053            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14054       && (REG_P (XEXP (b, 0))
14055           || (GET_CODE (XEXP (b, 0)) == PLUS
14056               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14057     {
14058       HOST_WIDE_INT val0 = 0, val1 = 0;
14059       rtx reg0, reg1;
14060       int val_diff;
14061
14062       if (GET_CODE (XEXP (a, 0)) == PLUS)
14063         {
14064           reg0 = XEXP (XEXP (a, 0), 0);
14065           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14066         }
14067       else
14068         reg0 = XEXP (a, 0);
14069
14070       if (GET_CODE (XEXP (b, 0)) == PLUS)
14071         {
14072           reg1 = XEXP (XEXP (b, 0), 0);
14073           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14074         }
14075       else
14076         reg1 = XEXP (b, 0);
14077
14078       /* Don't accept any offset that will require multiple
14079          instructions to handle, since this would cause the
14080          arith_adjacentmem pattern to output an overlong sequence.  */
14081       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14082         return 0;
14083
14084       /* Don't allow an eliminable register: register elimination can make
14085          the offset too large.  */
14086       if (arm_eliminable_register (reg0))
14087         return 0;
14088
14089       val_diff = val1 - val0;
14090
14091       if (arm_ld_sched)
14092         {
14093           /* If the target has load delay slots, then there's no benefit
14094              to using an ldm instruction unless the offset is zero and
14095              we are optimizing for size.  */
14096           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14097                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14098                   && (val_diff == 4 || val_diff == -4));
14099         }
14100
14101       return ((REGNO (reg0) == REGNO (reg1))
14102               && (val_diff == 4 || val_diff == -4));
14103     }
14104
14105   return 0;
14106 }
14107
14108 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14109    for load operations, false for store operations.  CONSECUTIVE is true
14110    if the register numbers in the operation must be consecutive in the register
14111    bank. RETURN_PC is true if value is to be loaded in PC.
14112    The pattern we are trying to match for load is:
14113      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14114       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14115        :
14116        :
14117       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14118      ]
14119      where
14120      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14121      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14122      3.  If consecutive is TRUE, then for kth register being loaded,
14123          REGNO (R_dk) = REGNO (R_d0) + k.
14124    The pattern for store is similar.  */
14125 bool
14126 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14127                      bool consecutive, bool return_pc)
14128 {
14129   HOST_WIDE_INT count = XVECLEN (op, 0);
14130   rtx reg, mem, addr;
14131   unsigned regno;
14132   unsigned first_regno;
14133   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14134   rtx elt;
14135   bool addr_reg_in_reglist = false;
14136   bool update = false;
14137   int reg_increment;
14138   int offset_adj;
14139   int regs_per_val;
14140
14141   /* If not in SImode, then registers must be consecutive
14142      (e.g., VLDM instructions for DFmode).  */
14143   gcc_assert ((mode == SImode) || consecutive);
14144   /* Setting return_pc for stores is illegal.  */
14145   gcc_assert (!return_pc || load);
14146
14147   /* Set up the increments and the regs per val based on the mode.  */
14148   reg_increment = GET_MODE_SIZE (mode);
14149   regs_per_val = reg_increment / 4;
14150   offset_adj = return_pc ? 1 : 0;
14151
14152   if (count <= 1
14153       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14154       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14155     return false;
14156
14157   /* Check if this is a write-back.  */
14158   elt = XVECEXP (op, 0, offset_adj);
14159   if (GET_CODE (SET_SRC (elt)) == PLUS)
14160     {
14161       i++;
14162       base = 1;
14163       update = true;
14164
14165       /* The offset adjustment must be the number of registers being
14166          popped times the size of a single register.  */
14167       if (!REG_P (SET_DEST (elt))
14168           || !REG_P (XEXP (SET_SRC (elt), 0))
14169           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14170           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14171           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14172              ((count - 1 - offset_adj) * reg_increment))
14173         return false;
14174     }
14175
14176   i = i + offset_adj;
14177   base = base + offset_adj;
14178   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14179      success depends on the type: VLDM can do just one reg,
14180      LDM must do at least two.  */
14181   if ((count <= i) && (mode == SImode))
14182       return false;
14183
14184   elt = XVECEXP (op, 0, i - 1);
14185   if (GET_CODE (elt) != SET)
14186     return false;
14187
14188   if (load)
14189     {
14190       reg = SET_DEST (elt);
14191       mem = SET_SRC (elt);
14192     }
14193   else
14194     {
14195       reg = SET_SRC (elt);
14196       mem = SET_DEST (elt);
14197     }
14198
14199   if (!REG_P (reg) || !MEM_P (mem))
14200     return false;
14201
14202   regno = REGNO (reg);
14203   first_regno = regno;
14204   addr = XEXP (mem, 0);
14205   if (GET_CODE (addr) == PLUS)
14206     {
14207       if (!CONST_INT_P (XEXP (addr, 1)))
14208         return false;
14209
14210       offset = INTVAL (XEXP (addr, 1));
14211       addr = XEXP (addr, 0);
14212     }
14213
14214   if (!REG_P (addr))
14215     return false;
14216
14217   /* Don't allow SP to be loaded unless it is also the base register. It
14218      guarantees that SP is reset correctly when an LDM instruction
14219      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14220   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14221     return false;
14222
14223   if (regno == REGNO (addr))
14224     addr_reg_in_reglist = true;
14225
14226   for (; i < count; i++)
14227     {
14228       elt = XVECEXP (op, 0, i);
14229       if (GET_CODE (elt) != SET)
14230         return false;
14231
14232       if (load)
14233         {
14234           reg = SET_DEST (elt);
14235           mem = SET_SRC (elt);
14236         }
14237       else
14238         {
14239           reg = SET_SRC (elt);
14240           mem = SET_DEST (elt);
14241         }
14242
14243       if (!REG_P (reg)
14244           || GET_MODE (reg) != mode
14245           || REGNO (reg) <= regno
14246           || (consecutive
14247               && (REGNO (reg) !=
14248                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14249           /* Don't allow SP to be loaded unless it is also the base register. It
14250              guarantees that SP is reset correctly when an LDM instruction
14251              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14252           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14253           || !MEM_P (mem)
14254           || GET_MODE (mem) != mode
14255           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14256                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14257                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14258                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14259                    offset + (i - base) * reg_increment))
14260               && (!REG_P (XEXP (mem, 0))
14261                   || offset + (i - base) * reg_increment != 0)))
14262         return false;
14263
14264       regno = REGNO (reg);
14265       if (regno == REGNO (addr))
14266         addr_reg_in_reglist = true;
14267     }
14268
14269   if (load)
14270     {
14271       if (update && addr_reg_in_reglist)
14272         return false;
14273
14274       /* For Thumb-1, address register is always modified - either by write-back
14275          or by explicit load.  If the pattern does not describe an update,
14276          then the address register must be in the list of loaded registers.  */
14277       if (TARGET_THUMB1)
14278         return update || addr_reg_in_reglist;
14279     }
14280
14281   return true;
14282 }
14283
14284 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14285    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14286    following form:
14287
14288    [(set (reg:SI <N>) (const_int 0))
14289     (set (reg:SI <M>) (const_int 0))
14290     ...
14291     (unspec_volatile [(const_int 0)]
14292                      VUNSPEC_CLRM_APSR)
14293     (clobber (reg:CC CC_REGNUM))
14294    ]
14295
14296    Any number (including 0) of set expressions is valid, the volatile unspec is
14297    optional.  All registers but SP and PC are allowed and registers must be in
14298    strict increasing order.
14299
14300    To be a valid VSCCLRM pattern, OP must have the following form:
14301
14302    [(unspec_volatile [(const_int 0)]
14303                      VUNSPEC_VSCCLRM_VPR)
14304     (set (reg:SF <N>) (const_int 0))
14305     (set (reg:SF <M>) (const_int 0))
14306     ...
14307    ]
14308
14309    As with CLRM, any number (including 0) of set expressions is valid, however
14310    the volatile unspec is mandatory here.  Any VFP single-precision register is
14311    accepted but all registers must be consecutive and in increasing order.  */
14312
14313 bool
14314 clear_operation_p (rtx op, bool vfp)
14315 {
14316   unsigned regno;
14317   unsigned last_regno = INVALID_REGNUM;
14318   rtx elt, reg, zero;
14319   int count = XVECLEN (op, 0);
14320   int first_set = vfp ? 1 : 0;
14321   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14322
14323   for (int i = first_set; i < count; i++)
14324     {
14325       elt = XVECEXP (op, 0, i);
14326
14327       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14328         {
14329           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14330               || XVECLEN (elt, 0) != 1
14331               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14332               || i != count - 2)
14333             return false;
14334
14335           continue;
14336         }
14337
14338       if (GET_CODE (elt) == CLOBBER)
14339         continue;
14340
14341       if (GET_CODE (elt) != SET)
14342         return false;
14343
14344       reg = SET_DEST (elt);
14345       zero = SET_SRC (elt);
14346
14347       if (!REG_P (reg)
14348           || GET_MODE (reg) != expected_mode
14349           || zero != CONST0_RTX (SImode))
14350         return false;
14351
14352       regno = REGNO (reg);
14353
14354       if (vfp)
14355         {
14356           if (i != first_set && regno != last_regno + 1)
14357             return false;
14358         }
14359       else
14360         {
14361           if (regno == SP_REGNUM || regno == PC_REGNUM)
14362             return false;
14363           if (i != first_set && regno <= last_regno)
14364             return false;
14365         }
14366
14367       last_regno = regno;
14368     }
14369
14370   return true;
14371 }
14372
14373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14374    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14375    instruction.  ADD_OFFSET is nonzero if the base address register needs
14376    to be modified with an add instruction before we can use it.  */
14377
14378 static bool
14379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14380                                  int nops, HOST_WIDE_INT add_offset)
14381  {
14382   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14383      if the offset isn't small enough.  The reason 2 ldrs are faster
14384      is because these ARMs are able to do more than one cache access
14385      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14386      whilst the ARM8 has a double bandwidth cache.  This means that
14387      these cores can do both an instruction fetch and a data fetch in
14388      a single cycle, so the trick of calculating the address into a
14389      scratch register (one of the result regs) and then doing a load
14390      multiple actually becomes slower (and no smaller in code size).
14391      That is the transformation
14392
14393         ldr     rd1, [rbase + offset]
14394         ldr     rd2, [rbase + offset + 4]
14395
14396      to
14397
14398         add     rd1, rbase, offset
14399         ldmia   rd1, {rd1, rd2}
14400
14401      produces worse code -- '3 cycles + any stalls on rd2' instead of
14402      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14403      access per cycle, the first sequence could never complete in less
14404      than 6 cycles, whereas the ldm sequence would only take 5 and
14405      would make better use of sequential accesses if not hitting the
14406      cache.
14407
14408      We cheat here and test 'arm_ld_sched' which we currently know to
14409      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14410      changes, then the test below needs to be reworked.  */
14411   if (nops == 2 && arm_ld_sched && add_offset != 0)
14412     return false;
14413
14414   /* XScale has load-store double instructions, but they have stricter
14415      alignment requirements than load-store multiple, so we cannot
14416      use them.
14417
14418      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14419      the pipeline until completion.
14420
14421         NREGS           CYCLES
14422           1               3
14423           2               4
14424           3               5
14425           4               6
14426
14427      An ldr instruction takes 1-3 cycles, but does not block the
14428      pipeline.
14429
14430         NREGS           CYCLES
14431           1              1-3
14432           2              2-6
14433           3              3-9
14434           4              4-12
14435
14436      Best case ldr will always win.  However, the more ldr instructions
14437      we issue, the less likely we are to be able to schedule them well.
14438      Using ldr instructions also increases code size.
14439
14440      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14441      for counts of 3 or 4 regs.  */
14442   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14443     return false;
14444   return true;
14445 }
14446
14447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14448    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14449    an array ORDER which describes the sequence to use when accessing the
14450    offsets that produces an ascending order.  In this sequence, each
14451    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14452    must have been filled in with the lowest offset by the caller.
14453    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14454    we use to verify that ORDER produces an ascending order of registers.
14455    Return true if it was possible to construct such an order, false if
14456    not.  */
14457
14458 static bool
14459 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14460                       int *unsorted_regs)
14461 {
14462   int i;
14463   for (i = 1; i < nops; i++)
14464     {
14465       int j;
14466
14467       order[i] = order[i - 1];
14468       for (j = 0; j < nops; j++)
14469         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14470           {
14471             /* We must find exactly one offset that is higher than the
14472                previous one by 4.  */
14473             if (order[i] != order[i - 1])
14474               return false;
14475             order[i] = j;
14476           }
14477       if (order[i] == order[i - 1])
14478         return false;
14479       /* The register numbers must be ascending.  */
14480       if (unsorted_regs != NULL
14481           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14482         return false;
14483     }
14484   return true;
14485 }
14486
14487 /* Used to determine in a peephole whether a sequence of load
14488    instructions can be changed into a load-multiple instruction.
14489    NOPS is the number of separate load instructions we are examining.  The
14490    first NOPS entries in OPERANDS are the destination registers, the
14491    next NOPS entries are memory operands.  If this function is
14492    successful, *BASE is set to the common base register of the memory
14493    accesses; *LOAD_OFFSET is set to the first memory location's offset
14494    from that base register.
14495    REGS is an array filled in with the destination register numbers.
14496    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14497    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14498    the sequence of registers in REGS matches the loads from ascending memory
14499    locations, and the function verifies that the register numbers are
14500    themselves ascending.  If CHECK_REGS is false, the register numbers
14501    are stored in the order they are found in the operands.  */
14502 static int
14503 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14504                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14505 {
14506   int unsorted_regs[MAX_LDM_STM_OPS];
14507   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14508   int order[MAX_LDM_STM_OPS];
14509   int base_reg = -1;
14510   int i, ldm_case;
14511
14512   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14513      easily extended if required.  */
14514   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14515
14516   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14517
14518   /* Loop over the operands and check that the memory references are
14519      suitable (i.e. immediate offsets from the same base register).  At
14520      the same time, extract the target register, and the memory
14521      offsets.  */
14522   for (i = 0; i < nops; i++)
14523     {
14524       rtx reg;
14525       rtx offset;
14526
14527       /* Convert a subreg of a mem into the mem itself.  */
14528       if (GET_CODE (operands[nops + i]) == SUBREG)
14529         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14530
14531       gcc_assert (MEM_P (operands[nops + i]));
14532
14533       /* Don't reorder volatile memory references; it doesn't seem worth
14534          looking for the case where the order is ok anyway.  */
14535       if (MEM_VOLATILE_P (operands[nops + i]))
14536         return 0;
14537
14538       offset = const0_rtx;
14539
14540       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14541            || (SUBREG_P (reg)
14542                && REG_P (reg = SUBREG_REG (reg))))
14543           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14544               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14545                   || (SUBREG_P (reg)
14546                       && REG_P (reg = SUBREG_REG (reg))))
14547               && (CONST_INT_P (offset
14548                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14549         {
14550           if (i == 0)
14551             {
14552               base_reg = REGNO (reg);
14553               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14554                 return 0;
14555             }
14556           else if (base_reg != (int) REGNO (reg))
14557             /* Not addressed from the same base register.  */
14558             return 0;
14559
14560           unsorted_regs[i] = (REG_P (operands[i])
14561                               ? REGNO (operands[i])
14562                               : REGNO (SUBREG_REG (operands[i])));
14563
14564           /* If it isn't an integer register, or if it overwrites the
14565              base register but isn't the last insn in the list, then
14566              we can't do this.  */
14567           if (unsorted_regs[i] < 0
14568               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14569               || unsorted_regs[i] > 14
14570               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14571             return 0;
14572
14573           /* Don't allow SP to be loaded unless it is also the base
14574              register.  It guarantees that SP is reset correctly when
14575              an LDM instruction is interrupted.  Otherwise, we might
14576              end up with a corrupt stack.  */
14577           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14578             return 0;
14579
14580           unsorted_offsets[i] = INTVAL (offset);
14581           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14582             order[0] = i;
14583         }
14584       else
14585         /* Not a suitable memory address.  */
14586         return 0;
14587     }
14588
14589   /* All the useful information has now been extracted from the
14590      operands into unsorted_regs and unsorted_offsets; additionally,
14591      order[0] has been set to the lowest offset in the list.  Sort
14592      the offsets into order, verifying that they are adjacent, and
14593      check that the register numbers are ascending.  */
14594   if (!compute_offset_order (nops, unsorted_offsets, order,
14595                              check_regs ? unsorted_regs : NULL))
14596     return 0;
14597
14598   if (saved_order)
14599     memcpy (saved_order, order, sizeof order);
14600
14601   if (base)
14602     {
14603       *base = base_reg;
14604
14605       for (i = 0; i < nops; i++)
14606         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14607
14608       *load_offset = unsorted_offsets[order[0]];
14609     }
14610
14611   if (unsorted_offsets[order[0]] == 0)
14612     ldm_case = 1; /* ldmia */
14613   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14614     ldm_case = 2; /* ldmib */
14615   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14616     ldm_case = 3; /* ldmda */
14617   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14618     ldm_case = 4; /* ldmdb */
14619   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14620            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14621     ldm_case = 5;
14622   else
14623     return 0;
14624
14625   if (!multiple_operation_profitable_p (false, nops,
14626                                         ldm_case == 5
14627                                         ? unsorted_offsets[order[0]] : 0))
14628     return 0;
14629
14630   return ldm_case;
14631 }
14632
14633 /* Used to determine in a peephole whether a sequence of store instructions can
14634    be changed into a store-multiple instruction.
14635    NOPS is the number of separate store instructions we are examining.
14636    NOPS_TOTAL is the total number of instructions recognized by the peephole
14637    pattern.
14638    The first NOPS entries in OPERANDS are the source registers, the next
14639    NOPS entries are memory operands.  If this function is successful, *BASE is
14640    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14641    to the first memory location's offset from that base register.  REGS is an
14642    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14643    likewise filled with the corresponding rtx's.
14644    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14645    numbers to an ascending order of stores.
14646    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14647    from ascending memory locations, and the function verifies that the register
14648    numbers are themselves ascending.  If CHECK_REGS is false, the register
14649    numbers are stored in the order they are found in the operands.  */
14650 static int
14651 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14652                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14653                          HOST_WIDE_INT *load_offset, bool check_regs)
14654 {
14655   int unsorted_regs[MAX_LDM_STM_OPS];
14656   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14657   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14658   int order[MAX_LDM_STM_OPS];
14659   int base_reg = -1;
14660   rtx base_reg_rtx = NULL;
14661   int i, stm_case;
14662
14663   /* Write back of base register is currently only supported for Thumb 1.  */
14664   int base_writeback = TARGET_THUMB1;
14665
14666   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14667      easily extended if required.  */
14668   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14669
14670   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14671
14672   /* Loop over the operands and check that the memory references are
14673      suitable (i.e. immediate offsets from the same base register).  At
14674      the same time, extract the target register, and the memory
14675      offsets.  */
14676   for (i = 0; i < nops; i++)
14677     {
14678       rtx reg;
14679       rtx offset;
14680
14681       /* Convert a subreg of a mem into the mem itself.  */
14682       if (GET_CODE (operands[nops + i]) == SUBREG)
14683         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14684
14685       gcc_assert (MEM_P (operands[nops + i]));
14686
14687       /* Don't reorder volatile memory references; it doesn't seem worth
14688          looking for the case where the order is ok anyway.  */
14689       if (MEM_VOLATILE_P (operands[nops + i]))
14690         return 0;
14691
14692       offset = const0_rtx;
14693
14694       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14695            || (SUBREG_P (reg)
14696                && REG_P (reg = SUBREG_REG (reg))))
14697           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14698               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14699                   || (SUBREG_P (reg)
14700                       && REG_P (reg = SUBREG_REG (reg))))
14701               && (CONST_INT_P (offset
14702                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14703         {
14704           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14705                                   ? operands[i] : SUBREG_REG (operands[i]));
14706           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14707
14708           if (i == 0)
14709             {
14710               base_reg = REGNO (reg);
14711               base_reg_rtx = reg;
14712               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14713                 return 0;
14714             }
14715           else if (base_reg != (int) REGNO (reg))
14716             /* Not addressed from the same base register.  */
14717             return 0;
14718
14719           /* If it isn't an integer register, then we can't do this.  */
14720           if (unsorted_regs[i] < 0
14721               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14722               /* The effects are unpredictable if the base register is
14723                  both updated and stored.  */
14724               || (base_writeback && unsorted_regs[i] == base_reg)
14725               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14726               || unsorted_regs[i] > 14)
14727             return 0;
14728
14729           unsorted_offsets[i] = INTVAL (offset);
14730           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14731             order[0] = i;
14732         }
14733       else
14734         /* Not a suitable memory address.  */
14735         return 0;
14736     }
14737
14738   /* All the useful information has now been extracted from the
14739      operands into unsorted_regs and unsorted_offsets; additionally,
14740      order[0] has been set to the lowest offset in the list.  Sort
14741      the offsets into order, verifying that they are adjacent, and
14742      check that the register numbers are ascending.  */
14743   if (!compute_offset_order (nops, unsorted_offsets, order,
14744                              check_regs ? unsorted_regs : NULL))
14745     return 0;
14746
14747   if (saved_order)
14748     memcpy (saved_order, order, sizeof order);
14749
14750   if (base)
14751     {
14752       *base = base_reg;
14753
14754       for (i = 0; i < nops; i++)
14755         {
14756           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14757           if (reg_rtxs)
14758             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14759         }
14760
14761       *load_offset = unsorted_offsets[order[0]];
14762     }
14763
14764   if (TARGET_THUMB1
14765       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14766     return 0;
14767
14768   if (unsorted_offsets[order[0]] == 0)
14769     stm_case = 1; /* stmia */
14770   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14771     stm_case = 2; /* stmib */
14772   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14773     stm_case = 3; /* stmda */
14774   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14775     stm_case = 4; /* stmdb */
14776   else
14777     return 0;
14778
14779   if (!multiple_operation_profitable_p (false, nops, 0))
14780     return 0;
14781
14782   return stm_case;
14783 }
14784 \f
14785 /* Routines for use in generating RTL.  */
14786
14787 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14788    the instruction; REGS and MEMS are arrays containing the operands.
14789    BASEREG is the base register to be used in addressing the memory operands.
14790    WBACK_OFFSET is nonzero if the instruction should update the base
14791    register.  */
14792
14793 static rtx
14794 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14795                          HOST_WIDE_INT wback_offset)
14796 {
14797   int i = 0, j;
14798   rtx result;
14799
14800   if (!multiple_operation_profitable_p (false, count, 0))
14801     {
14802       rtx seq;
14803
14804       start_sequence ();
14805
14806       for (i = 0; i < count; i++)
14807         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14808
14809       if (wback_offset != 0)
14810         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14811
14812       seq = get_insns ();
14813       end_sequence ();
14814
14815       return seq;
14816     }
14817
14818   result = gen_rtx_PARALLEL (VOIDmode,
14819                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14820   if (wback_offset != 0)
14821     {
14822       XVECEXP (result, 0, 0)
14823         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14824       i = 1;
14825       count++;
14826     }
14827
14828   for (j = 0; i < count; i++, j++)
14829     XVECEXP (result, 0, i)
14830       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14831
14832   return result;
14833 }
14834
14835 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14836    the instruction; REGS and MEMS are arrays containing the operands.
14837    BASEREG is the base register to be used in addressing the memory operands.
14838    WBACK_OFFSET is nonzero if the instruction should update the base
14839    register.  */
14840
14841 static rtx
14842 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14843                           HOST_WIDE_INT wback_offset)
14844 {
14845   int i = 0, j;
14846   rtx result;
14847
14848   if (GET_CODE (basereg) == PLUS)
14849     basereg = XEXP (basereg, 0);
14850
14851   if (!multiple_operation_profitable_p (false, count, 0))
14852     {
14853       rtx seq;
14854
14855       start_sequence ();
14856
14857       for (i = 0; i < count; i++)
14858         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14859
14860       if (wback_offset != 0)
14861         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14862
14863       seq = get_insns ();
14864       end_sequence ();
14865
14866       return seq;
14867     }
14868
14869   result = gen_rtx_PARALLEL (VOIDmode,
14870                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14871   if (wback_offset != 0)
14872     {
14873       XVECEXP (result, 0, 0)
14874         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14875       i = 1;
14876       count++;
14877     }
14878
14879   for (j = 0; i < count; i++, j++)
14880     XVECEXP (result, 0, i)
14881       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14882
14883   return result;
14884 }
14885
14886 /* Generate either a load-multiple or a store-multiple instruction.  This
14887    function can be used in situations where we can start with a single MEM
14888    rtx and adjust its address upwards.
14889    COUNT is the number of operations in the instruction, not counting a
14890    possible update of the base register.  REGS is an array containing the
14891    register operands.
14892    BASEREG is the base register to be used in addressing the memory operands,
14893    which are constructed from BASEMEM.
14894    WRITE_BACK specifies whether the generated instruction should include an
14895    update of the base register.
14896    OFFSETP is used to pass an offset to and from this function; this offset
14897    is not used when constructing the address (instead BASEMEM should have an
14898    appropriate offset in its address), it is used only for setting
14899    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14900
14901 static rtx
14902 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14903                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14904 {
14905   rtx mems[MAX_LDM_STM_OPS];
14906   HOST_WIDE_INT offset = *offsetp;
14907   int i;
14908
14909   gcc_assert (count <= MAX_LDM_STM_OPS);
14910
14911   if (GET_CODE (basereg) == PLUS)
14912     basereg = XEXP (basereg, 0);
14913
14914   for (i = 0; i < count; i++)
14915     {
14916       rtx addr = plus_constant (Pmode, basereg, i * 4);
14917       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14918       offset += 4;
14919     }
14920
14921   if (write_back)
14922     *offsetp = offset;
14923
14924   if (is_load)
14925     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14926                                     write_back ? 4 * count : 0);
14927   else
14928     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14929                                      write_back ? 4 * count : 0);
14930 }
14931
14932 rtx
14933 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14934                        rtx basemem, HOST_WIDE_INT *offsetp)
14935 {
14936   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14937                               offsetp);
14938 }
14939
14940 rtx
14941 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14942                         rtx basemem, HOST_WIDE_INT *offsetp)
14943 {
14944   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14945                               offsetp);
14946 }
14947
14948 /* Called from a peephole2 expander to turn a sequence of loads into an
14949    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14950    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14951    is true if we can reorder the registers because they are used commutatively
14952    subsequently.
14953    Returns true iff we could generate a new instruction.  */
14954
14955 bool
14956 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14957 {
14958   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14959   rtx mems[MAX_LDM_STM_OPS];
14960   int i, j, base_reg;
14961   rtx base_reg_rtx;
14962   HOST_WIDE_INT offset;
14963   int write_back = FALSE;
14964   int ldm_case;
14965   rtx addr;
14966
14967   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14968                                      &base_reg, &offset, !sort_regs);
14969
14970   if (ldm_case == 0)
14971     return false;
14972
14973   if (sort_regs)
14974     for (i = 0; i < nops - 1; i++)
14975       for (j = i + 1; j < nops; j++)
14976         if (regs[i] > regs[j])
14977           {
14978             int t = regs[i];
14979             regs[i] = regs[j];
14980             regs[j] = t;
14981           }
14982   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14983
14984   if (TARGET_THUMB1)
14985     {
14986       gcc_assert (ldm_case == 1 || ldm_case == 5);
14987
14988       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14989       write_back = true;
14990       for (i = 0; i < nops; i++)
14991         if (base_reg == regs[i])
14992           write_back = false;
14993
14994       /* Ensure the base is dead if it is updated.  */
14995       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14996         return false;
14997     }
14998
14999   if (ldm_case == 5)
15000     {
15001       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15002       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15003       offset = 0;
15004       base_reg_rtx = newbase;
15005     }
15006
15007   for (i = 0; i < nops; i++)
15008     {
15009       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15010       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15011                                               SImode, addr, 0);
15012     }
15013   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15014                                       write_back ? offset + i * 4 : 0));
15015   return true;
15016 }
15017
15018 /* Called from a peephole2 expander to turn a sequence of stores into an
15019    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15020    NOPS indicates how many separate stores we are trying to combine.
15021    Returns true iff we could generate a new instruction.  */
15022
15023 bool
15024 gen_stm_seq (rtx *operands, int nops)
15025 {
15026   int i;
15027   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15028   rtx mems[MAX_LDM_STM_OPS];
15029   int base_reg;
15030   rtx base_reg_rtx;
15031   HOST_WIDE_INT offset;
15032   int write_back = FALSE;
15033   int stm_case;
15034   rtx addr;
15035   bool base_reg_dies;
15036
15037   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15038                                       mem_order, &base_reg, &offset, true);
15039
15040   if (stm_case == 0)
15041     return false;
15042
15043   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15044
15045   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15046   if (TARGET_THUMB1)
15047     {
15048       gcc_assert (base_reg_dies);
15049       write_back = TRUE;
15050     }
15051
15052   if (stm_case == 5)
15053     {
15054       gcc_assert (base_reg_dies);
15055       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15056       offset = 0;
15057     }
15058
15059   addr = plus_constant (Pmode, base_reg_rtx, offset);
15060
15061   for (i = 0; i < nops; i++)
15062     {
15063       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15064       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15065                                               SImode, addr, 0);
15066     }
15067   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15068                                        write_back ? offset + i * 4 : 0));
15069   return true;
15070 }
15071
15072 /* Called from a peephole2 expander to turn a sequence of stores that are
15073    preceded by constant loads into an STM instruction.  OPERANDS are the
15074    operands found by the peephole matcher; NOPS indicates how many
15075    separate stores we are trying to combine; there are 2 * NOPS
15076    instructions in the peephole.
15077    Returns true iff we could generate a new instruction.  */
15078
15079 bool
15080 gen_const_stm_seq (rtx *operands, int nops)
15081 {
15082   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15083   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15084   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15085   rtx mems[MAX_LDM_STM_OPS];
15086   int base_reg;
15087   rtx base_reg_rtx;
15088   HOST_WIDE_INT offset;
15089   int write_back = FALSE;
15090   int stm_case;
15091   rtx addr;
15092   bool base_reg_dies;
15093   int i, j;
15094   HARD_REG_SET allocated;
15095
15096   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15097                                       mem_order, &base_reg, &offset, false);
15098
15099   if (stm_case == 0)
15100     return false;
15101
15102   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15103
15104   /* If the same register is used more than once, try to find a free
15105      register.  */
15106   CLEAR_HARD_REG_SET (allocated);
15107   for (i = 0; i < nops; i++)
15108     {
15109       for (j = i + 1; j < nops; j++)
15110         if (regs[i] == regs[j])
15111           {
15112             rtx t = peep2_find_free_register (0, nops * 2,
15113                                               TARGET_THUMB1 ? "l" : "r",
15114                                               SImode, &allocated);
15115             if (t == NULL_RTX)
15116               return false;
15117             reg_rtxs[i] = t;
15118             regs[i] = REGNO (t);
15119           }
15120     }
15121
15122   /* Compute an ordering that maps the register numbers to an ascending
15123      sequence.  */
15124   reg_order[0] = 0;
15125   for (i = 0; i < nops; i++)
15126     if (regs[i] < regs[reg_order[0]])
15127       reg_order[0] = i;
15128
15129   for (i = 1; i < nops; i++)
15130     {
15131       int this_order = reg_order[i - 1];
15132       for (j = 0; j < nops; j++)
15133         if (regs[j] > regs[reg_order[i - 1]]
15134             && (this_order == reg_order[i - 1]
15135                 || regs[j] < regs[this_order]))
15136           this_order = j;
15137       reg_order[i] = this_order;
15138     }
15139
15140   /* Ensure that registers that must be live after the instruction end
15141      up with the correct value.  */
15142   for (i = 0; i < nops; i++)
15143     {
15144       int this_order = reg_order[i];
15145       if ((this_order != mem_order[i]
15146            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15147           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15148         return false;
15149     }
15150
15151   /* Load the constants.  */
15152   for (i = 0; i < nops; i++)
15153     {
15154       rtx op = operands[2 * nops + mem_order[i]];
15155       sorted_regs[i] = regs[reg_order[i]];
15156       emit_move_insn (reg_rtxs[reg_order[i]], op);
15157     }
15158
15159   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15160
15161   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15162   if (TARGET_THUMB1)
15163     {
15164       gcc_assert (base_reg_dies);
15165       write_back = TRUE;
15166     }
15167
15168   if (stm_case == 5)
15169     {
15170       gcc_assert (base_reg_dies);
15171       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15172       offset = 0;
15173     }
15174
15175   addr = plus_constant (Pmode, base_reg_rtx, offset);
15176
15177   for (i = 0; i < nops; i++)
15178     {
15179       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15180       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15181                                               SImode, addr, 0);
15182     }
15183   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15184                                        write_back ? offset + i * 4 : 0));
15185   return true;
15186 }
15187
15188 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15189    unaligned copies on processors which support unaligned semantics for those
15190    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15191    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15192    An interleave factor of 1 (the minimum) will perform no interleaving.
15193    Load/store multiple are used for aligned addresses where possible.  */
15194
15195 static void
15196 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15197                                    HOST_WIDE_INT length,
15198                                    unsigned int interleave_factor)
15199 {
15200   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15201   int *regnos = XALLOCAVEC (int, interleave_factor);
15202   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15203   HOST_WIDE_INT i, j;
15204   HOST_WIDE_INT remaining = length, words;
15205   rtx halfword_tmp = NULL, byte_tmp = NULL;
15206   rtx dst, src;
15207   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15208   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15209   HOST_WIDE_INT srcoffset, dstoffset;
15210   HOST_WIDE_INT src_autoinc, dst_autoinc;
15211   rtx mem, addr;
15212
15213   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15214
15215   /* Use hard registers if we have aligned source or destination so we can use
15216      load/store multiple with contiguous registers.  */
15217   if (dst_aligned || src_aligned)
15218     for (i = 0; i < interleave_factor; i++)
15219       regs[i] = gen_rtx_REG (SImode, i);
15220   else
15221     for (i = 0; i < interleave_factor; i++)
15222       regs[i] = gen_reg_rtx (SImode);
15223
15224   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15225   src = copy_addr_to_reg (XEXP (srcbase, 0));
15226
15227   srcoffset = dstoffset = 0;
15228
15229   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15230      For copying the last bytes we want to subtract this offset again.  */
15231   src_autoinc = dst_autoinc = 0;
15232
15233   for (i = 0; i < interleave_factor; i++)
15234     regnos[i] = i;
15235
15236   /* Copy BLOCK_SIZE_BYTES chunks.  */
15237
15238   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15239     {
15240       /* Load words.  */
15241       if (src_aligned && interleave_factor > 1)
15242         {
15243           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15244                                             TRUE, srcbase, &srcoffset));
15245           src_autoinc += UNITS_PER_WORD * interleave_factor;
15246         }
15247       else
15248         {
15249           for (j = 0; j < interleave_factor; j++)
15250             {
15251               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15252                                                  - src_autoinc));
15253               mem = adjust_automodify_address (srcbase, SImode, addr,
15254                                                srcoffset + j * UNITS_PER_WORD);
15255               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15256             }
15257           srcoffset += block_size_bytes;
15258         }
15259
15260       /* Store words.  */
15261       if (dst_aligned && interleave_factor > 1)
15262         {
15263           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15264                                              TRUE, dstbase, &dstoffset));
15265           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15266         }
15267       else
15268         {
15269           for (j = 0; j < interleave_factor; j++)
15270             {
15271               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15272                                                  - dst_autoinc));
15273               mem = adjust_automodify_address (dstbase, SImode, addr,
15274                                                dstoffset + j * UNITS_PER_WORD);
15275               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15276             }
15277           dstoffset += block_size_bytes;
15278         }
15279
15280       remaining -= block_size_bytes;
15281     }
15282
15283   /* Copy any whole words left (note these aren't interleaved with any
15284      subsequent halfword/byte load/stores in the interests of simplicity).  */
15285
15286   words = remaining / UNITS_PER_WORD;
15287
15288   gcc_assert (words < interleave_factor);
15289
15290   if (src_aligned && words > 1)
15291     {
15292       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15293                                         &srcoffset));
15294       src_autoinc += UNITS_PER_WORD * words;
15295     }
15296   else
15297     {
15298       for (j = 0; j < words; j++)
15299         {
15300           addr = plus_constant (Pmode, src,
15301                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15302           mem = adjust_automodify_address (srcbase, SImode, addr,
15303                                            srcoffset + j * UNITS_PER_WORD);
15304           if (src_aligned)
15305             emit_move_insn (regs[j], mem);
15306           else
15307             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15308         }
15309       srcoffset += words * UNITS_PER_WORD;
15310     }
15311
15312   if (dst_aligned && words > 1)
15313     {
15314       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15315                                          &dstoffset));
15316       dst_autoinc += words * UNITS_PER_WORD;
15317     }
15318   else
15319     {
15320       for (j = 0; j < words; j++)
15321         {
15322           addr = plus_constant (Pmode, dst,
15323                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15324           mem = adjust_automodify_address (dstbase, SImode, addr,
15325                                            dstoffset + j * UNITS_PER_WORD);
15326           if (dst_aligned)
15327             emit_move_insn (mem, regs[j]);
15328           else
15329             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15330         }
15331       dstoffset += words * UNITS_PER_WORD;
15332     }
15333
15334   remaining -= words * UNITS_PER_WORD;
15335
15336   gcc_assert (remaining < 4);
15337
15338   /* Copy a halfword if necessary.  */
15339
15340   if (remaining >= 2)
15341     {
15342       halfword_tmp = gen_reg_rtx (SImode);
15343
15344       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15345       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15346       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15347
15348       /* Either write out immediately, or delay until we've loaded the last
15349          byte, depending on interleave factor.  */
15350       if (interleave_factor == 1)
15351         {
15352           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15353           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15354           emit_insn (gen_unaligned_storehi (mem,
15355                        gen_lowpart (HImode, halfword_tmp)));
15356           halfword_tmp = NULL;
15357           dstoffset += 2;
15358         }
15359
15360       remaining -= 2;
15361       srcoffset += 2;
15362     }
15363
15364   gcc_assert (remaining < 2);
15365
15366   /* Copy last byte.  */
15367
15368   if ((remaining & 1) != 0)
15369     {
15370       byte_tmp = gen_reg_rtx (SImode);
15371
15372       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15373       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15374       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15375
15376       if (interleave_factor == 1)
15377         {
15378           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15379           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15380           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15381           byte_tmp = NULL;
15382           dstoffset++;
15383         }
15384
15385       remaining--;
15386       srcoffset++;
15387     }
15388
15389   /* Store last halfword if we haven't done so already.  */
15390
15391   if (halfword_tmp)
15392     {
15393       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15394       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15395       emit_insn (gen_unaligned_storehi (mem,
15396                    gen_lowpart (HImode, halfword_tmp)));
15397       dstoffset += 2;
15398     }
15399
15400   /* Likewise for last byte.  */
15401
15402   if (byte_tmp)
15403     {
15404       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15405       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15406       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15407       dstoffset++;
15408     }
15409
15410   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15411 }
15412
15413 /* From mips_adjust_block_mem:
15414
15415    Helper function for doing a loop-based block operation on memory
15416    reference MEM.  Each iteration of the loop will operate on LENGTH
15417    bytes of MEM.
15418
15419    Create a new base register for use within the loop and point it to
15420    the start of MEM.  Create a new memory reference that uses this
15421    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15422
15423 static void
15424 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15425                       rtx *loop_mem)
15426 {
15427   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15428
15429   /* Although the new mem does not refer to a known location,
15430      it does keep up to LENGTH bytes of alignment.  */
15431   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15432   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15433 }
15434
15435 /* From mips_block_move_loop:
15436
15437    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15438    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15439    the memory regions do not overlap.  */
15440
15441 static void
15442 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15443                                unsigned int interleave_factor,
15444                                HOST_WIDE_INT bytes_per_iter)
15445 {
15446   rtx src_reg, dest_reg, final_src, test;
15447   HOST_WIDE_INT leftover;
15448
15449   leftover = length % bytes_per_iter;
15450   length -= leftover;
15451
15452   /* Create registers and memory references for use within the loop.  */
15453   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15454   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15455
15456   /* Calculate the value that SRC_REG should have after the last iteration of
15457      the loop.  */
15458   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15459                                    0, 0, OPTAB_WIDEN);
15460
15461   /* Emit the start of the loop.  */
15462   rtx_code_label *label = gen_label_rtx ();
15463   emit_label (label);
15464
15465   /* Emit the loop body.  */
15466   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15467                                      interleave_factor);
15468
15469   /* Move on to the next block.  */
15470   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15471   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15472
15473   /* Emit the loop condition.  */
15474   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15475   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15476
15477   /* Mop up any left-over bytes.  */
15478   if (leftover)
15479     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15480 }
15481
15482 /* Emit a block move when either the source or destination is unaligned (not
15483    aligned to a four-byte boundary).  This may need further tuning depending on
15484    core type, optimize_size setting, etc.  */
15485
15486 static int
15487 arm_cpymemqi_unaligned (rtx *operands)
15488 {
15489   HOST_WIDE_INT length = INTVAL (operands[2]);
15490
15491   if (optimize_size)
15492     {
15493       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15494       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15495       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15496          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15497          or dst_aligned though: allow more interleaving in those cases since the
15498          resulting code can be smaller.  */
15499       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15500       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15501
15502       if (length > 12)
15503         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15504                                        interleave_factor, bytes_per_iter);
15505       else
15506         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15507                                            interleave_factor);
15508     }
15509   else
15510     {
15511       /* Note that the loop created by arm_block_move_unaligned_loop may be
15512          subject to loop unrolling, which makes tuning this condition a little
15513          redundant.  */
15514       if (length > 32)
15515         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15516       else
15517         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15518     }
15519
15520   return 1;
15521 }
15522
15523 int
15524 arm_gen_cpymemqi (rtx *operands)
15525 {
15526   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15527   HOST_WIDE_INT srcoffset, dstoffset;
15528   rtx src, dst, srcbase, dstbase;
15529   rtx part_bytes_reg = NULL;
15530   rtx mem;
15531
15532   if (!CONST_INT_P (operands[2])
15533       || !CONST_INT_P (operands[3])
15534       || INTVAL (operands[2]) > 64)
15535     return 0;
15536
15537   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15538     return arm_cpymemqi_unaligned (operands);
15539
15540   if (INTVAL (operands[3]) & 3)
15541     return 0;
15542
15543   dstbase = operands[0];
15544   srcbase = operands[1];
15545
15546   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15547   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15548
15549   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15550   out_words_to_go = INTVAL (operands[2]) / 4;
15551   last_bytes = INTVAL (operands[2]) & 3;
15552   dstoffset = srcoffset = 0;
15553
15554   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15555     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15556
15557   while (in_words_to_go >= 2)
15558     {
15559       if (in_words_to_go > 4)
15560         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15561                                           TRUE, srcbase, &srcoffset));
15562       else
15563         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15564                                           src, FALSE, srcbase,
15565                                           &srcoffset));
15566
15567       if (out_words_to_go)
15568         {
15569           if (out_words_to_go > 4)
15570             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15571                                                TRUE, dstbase, &dstoffset));
15572           else if (out_words_to_go != 1)
15573             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15574                                                out_words_to_go, dst,
15575                                                (last_bytes == 0
15576                                                 ? FALSE : TRUE),
15577                                                dstbase, &dstoffset));
15578           else
15579             {
15580               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15581               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15582               if (last_bytes != 0)
15583                 {
15584                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15585                   dstoffset += 4;
15586                 }
15587             }
15588         }
15589
15590       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15591       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15592     }
15593
15594   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15595   if (out_words_to_go)
15596     {
15597       rtx sreg;
15598
15599       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15600       sreg = copy_to_reg (mem);
15601
15602       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15603       emit_move_insn (mem, sreg);
15604       in_words_to_go--;
15605
15606       gcc_assert (!in_words_to_go);     /* Sanity check */
15607     }
15608
15609   if (in_words_to_go)
15610     {
15611       gcc_assert (in_words_to_go > 0);
15612
15613       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15614       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15615     }
15616
15617   gcc_assert (!last_bytes || part_bytes_reg);
15618
15619   if (BYTES_BIG_ENDIAN && last_bytes)
15620     {
15621       rtx tmp = gen_reg_rtx (SImode);
15622
15623       /* The bytes we want are in the top end of the word.  */
15624       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15625                               GEN_INT (8 * (4 - last_bytes))));
15626       part_bytes_reg = tmp;
15627
15628       while (last_bytes)
15629         {
15630           mem = adjust_automodify_address (dstbase, QImode,
15631                                            plus_constant (Pmode, dst,
15632                                                           last_bytes - 1),
15633                                            dstoffset + last_bytes - 1);
15634           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15635
15636           if (--last_bytes)
15637             {
15638               tmp = gen_reg_rtx (SImode);
15639               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15640               part_bytes_reg = tmp;
15641             }
15642         }
15643
15644     }
15645   else
15646     {
15647       if (last_bytes > 1)
15648         {
15649           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15650           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15651           last_bytes -= 2;
15652           if (last_bytes)
15653             {
15654               rtx tmp = gen_reg_rtx (SImode);
15655               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15656               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15657               part_bytes_reg = tmp;
15658               dstoffset += 2;
15659             }
15660         }
15661
15662       if (last_bytes)
15663         {
15664           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15665           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15666         }
15667     }
15668
15669   return 1;
15670 }
15671
15672 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15673 by mode size.  */
15674 inline static rtx
15675 next_consecutive_mem (rtx mem)
15676 {
15677   machine_mode mode = GET_MODE (mem);
15678   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15679   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15680
15681   return adjust_automodify_address (mem, mode, addr, offset);
15682 }
15683
15684 /* Copy using LDRD/STRD instructions whenever possible.
15685    Returns true upon success. */
15686 bool
15687 gen_cpymem_ldrd_strd (rtx *operands)
15688 {
15689   unsigned HOST_WIDE_INT len;
15690   HOST_WIDE_INT align;
15691   rtx src, dst, base;
15692   rtx reg0;
15693   bool src_aligned, dst_aligned;
15694   bool src_volatile, dst_volatile;
15695
15696   gcc_assert (CONST_INT_P (operands[2]));
15697   gcc_assert (CONST_INT_P (operands[3]));
15698
15699   len = UINTVAL (operands[2]);
15700   if (len > 64)
15701     return false;
15702
15703   /* Maximum alignment we can assume for both src and dst buffers.  */
15704   align = INTVAL (operands[3]);
15705
15706   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15707     return false;
15708
15709   /* Place src and dst addresses in registers
15710      and update the corresponding mem rtx.  */
15711   dst = operands[0];
15712   dst_volatile = MEM_VOLATILE_P (dst);
15713   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15714   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15715   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15716
15717   src = operands[1];
15718   src_volatile = MEM_VOLATILE_P (src);
15719   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15720   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15721   src = adjust_automodify_address (src, VOIDmode, base, 0);
15722
15723   if (!unaligned_access && !(src_aligned && dst_aligned))
15724     return false;
15725
15726   if (src_volatile || dst_volatile)
15727     return false;
15728
15729   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15730   if (!(dst_aligned || src_aligned))
15731     return arm_gen_cpymemqi (operands);
15732
15733   /* If the either src or dst is unaligned we'll be accessing it as pairs
15734      of unaligned SImode accesses.  Otherwise we can generate DImode
15735      ldrd/strd instructions.  */
15736   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15737   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15738
15739   while (len >= 8)
15740     {
15741       len -= 8;
15742       reg0 = gen_reg_rtx (DImode);
15743       rtx first_reg = NULL_RTX;
15744       rtx second_reg = NULL_RTX;
15745
15746       if (!src_aligned || !dst_aligned)
15747         {
15748           if (BYTES_BIG_ENDIAN)
15749             {
15750               second_reg = gen_lowpart (SImode, reg0);
15751               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15752             }
15753           else
15754             {
15755               first_reg = gen_lowpart (SImode, reg0);
15756               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15757             }
15758         }
15759       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15760         emit_move_insn (reg0, src);
15761       else if (src_aligned)
15762         emit_insn (gen_unaligned_loaddi (reg0, src));
15763       else
15764         {
15765           emit_insn (gen_unaligned_loadsi (first_reg, src));
15766           src = next_consecutive_mem (src);
15767           emit_insn (gen_unaligned_loadsi (second_reg, src));
15768         }
15769
15770       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15771         emit_move_insn (dst, reg0);
15772       else if (dst_aligned)
15773         emit_insn (gen_unaligned_storedi (dst, reg0));
15774       else
15775         {
15776           emit_insn (gen_unaligned_storesi (dst, first_reg));
15777           dst = next_consecutive_mem (dst);
15778           emit_insn (gen_unaligned_storesi (dst, second_reg));
15779         }
15780
15781       src = next_consecutive_mem (src);
15782       dst = next_consecutive_mem (dst);
15783     }
15784
15785   gcc_assert (len < 8);
15786   if (len >= 4)
15787     {
15788       /* More than a word but less than a double-word to copy.  Copy a word.  */
15789       reg0 = gen_reg_rtx (SImode);
15790       src = adjust_address (src, SImode, 0);
15791       dst = adjust_address (dst, SImode, 0);
15792       if (src_aligned)
15793         emit_move_insn (reg0, src);
15794       else
15795         emit_insn (gen_unaligned_loadsi (reg0, src));
15796
15797       if (dst_aligned)
15798         emit_move_insn (dst, reg0);
15799       else
15800         emit_insn (gen_unaligned_storesi (dst, reg0));
15801
15802       src = next_consecutive_mem (src);
15803       dst = next_consecutive_mem (dst);
15804       len -= 4;
15805     }
15806
15807   if (len == 0)
15808     return true;
15809
15810   /* Copy the remaining bytes.  */
15811   if (len >= 2)
15812     {
15813       dst = adjust_address (dst, HImode, 0);
15814       src = adjust_address (src, HImode, 0);
15815       reg0 = gen_reg_rtx (SImode);
15816       if (src_aligned)
15817         emit_insn (gen_zero_extendhisi2 (reg0, src));
15818       else
15819         emit_insn (gen_unaligned_loadhiu (reg0, src));
15820
15821       if (dst_aligned)
15822         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15823       else
15824         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15825
15826       src = next_consecutive_mem (src);
15827       dst = next_consecutive_mem (dst);
15828       if (len == 2)
15829         return true;
15830     }
15831
15832   dst = adjust_address (dst, QImode, 0);
15833   src = adjust_address (src, QImode, 0);
15834   reg0 = gen_reg_rtx (QImode);
15835   emit_move_insn (reg0, src);
15836   emit_move_insn (dst, reg0);
15837   return true;
15838 }
15839
15840 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15841    into its component 32-bit subregs.  OP2 may be an immediate
15842    constant and we want to simplify it in that case.  */
15843 void
15844 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15845                         rtx *lo_op2, rtx *hi_op2)
15846 {
15847   *lo_op1 = gen_lowpart (SImode, op1);
15848   *hi_op1 = gen_highpart (SImode, op1);
15849   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15850                                  subreg_lowpart_offset (SImode, DImode));
15851   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15852                                  subreg_highpart_offset (SImode, DImode));
15853 }
15854
15855 /* Select a dominance comparison mode if possible for a test of the general
15856    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15857    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15858    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15859    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15860    In all cases OP will be either EQ or NE, but we don't need to know which
15861    here.  If we are unable to support a dominance comparison we return
15862    CC mode.  This will then fail to match for the RTL expressions that
15863    generate this call.  */
15864 machine_mode
15865 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15866 {
15867   enum rtx_code cond1, cond2;
15868   int swapped = 0;
15869
15870   /* Currently we will probably get the wrong result if the individual
15871      comparisons are not simple.  This also ensures that it is safe to
15872      reverse a comparison if necessary.  */
15873   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15874        != CCmode)
15875       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15876           != CCmode))
15877     return CCmode;
15878
15879   /* The if_then_else variant of this tests the second condition if the
15880      first passes, but is true if the first fails.  Reverse the first
15881      condition to get a true "inclusive-or" expression.  */
15882   if (cond_or == DOM_CC_NX_OR_Y)
15883     cond1 = reverse_condition (cond1);
15884
15885   /* If the comparisons are not equal, and one doesn't dominate the other,
15886      then we can't do this.  */
15887   if (cond1 != cond2
15888       && !comparison_dominates_p (cond1, cond2)
15889       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15890     return CCmode;
15891
15892   if (swapped)
15893     std::swap (cond1, cond2);
15894
15895   switch (cond1)
15896     {
15897     case EQ:
15898       if (cond_or == DOM_CC_X_AND_Y)
15899         return CC_DEQmode;
15900
15901       switch (cond2)
15902         {
15903         case EQ: return CC_DEQmode;
15904         case LE: return CC_DLEmode;
15905         case LEU: return CC_DLEUmode;
15906         case GE: return CC_DGEmode;
15907         case GEU: return CC_DGEUmode;
15908         default: gcc_unreachable ();
15909         }
15910
15911     case LT:
15912       if (cond_or == DOM_CC_X_AND_Y)
15913         return CC_DLTmode;
15914
15915       switch (cond2)
15916         {
15917         case  LT:
15918             return CC_DLTmode;
15919         case LE:
15920           return CC_DLEmode;
15921         case NE:
15922           return CC_DNEmode;
15923         default:
15924           gcc_unreachable ();
15925         }
15926
15927     case GT:
15928       if (cond_or == DOM_CC_X_AND_Y)
15929         return CC_DGTmode;
15930
15931       switch (cond2)
15932         {
15933         case GT:
15934           return CC_DGTmode;
15935         case GE:
15936           return CC_DGEmode;
15937         case NE:
15938           return CC_DNEmode;
15939         default:
15940           gcc_unreachable ();
15941         }
15942
15943     case LTU:
15944       if (cond_or == DOM_CC_X_AND_Y)
15945         return CC_DLTUmode;
15946
15947       switch (cond2)
15948         {
15949         case LTU:
15950           return CC_DLTUmode;
15951         case LEU:
15952           return CC_DLEUmode;
15953         case NE:
15954           return CC_DNEmode;
15955         default:
15956           gcc_unreachable ();
15957         }
15958
15959     case GTU:
15960       if (cond_or == DOM_CC_X_AND_Y)
15961         return CC_DGTUmode;
15962
15963       switch (cond2)
15964         {
15965         case GTU:
15966           return CC_DGTUmode;
15967         case GEU:
15968           return CC_DGEUmode;
15969         case NE:
15970           return CC_DNEmode;
15971         default:
15972           gcc_unreachable ();
15973         }
15974
15975     /* The remaining cases only occur when both comparisons are the
15976        same.  */
15977     case NE:
15978       gcc_assert (cond1 == cond2);
15979       return CC_DNEmode;
15980
15981     case LE:
15982       gcc_assert (cond1 == cond2);
15983       return CC_DLEmode;
15984
15985     case GE:
15986       gcc_assert (cond1 == cond2);
15987       return CC_DGEmode;
15988
15989     case LEU:
15990       gcc_assert (cond1 == cond2);
15991       return CC_DLEUmode;
15992
15993     case GEU:
15994       gcc_assert (cond1 == cond2);
15995       return CC_DGEUmode;
15996
15997     default:
15998       gcc_unreachable ();
15999     }
16000 }
16001
16002 machine_mode
16003 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16004 {
16005   /* All floating point compares return CCFP if it is an equality
16006      comparison, and CCFPE otherwise.  */
16007   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16008     {
16009       switch (op)
16010         {
16011         case EQ:
16012         case NE:
16013         case UNORDERED:
16014         case ORDERED:
16015         case UNLT:
16016         case UNLE:
16017         case UNGT:
16018         case UNGE:
16019         case UNEQ:
16020         case LTGT:
16021           return CCFPmode;
16022
16023         case LT:
16024         case LE:
16025         case GT:
16026         case GE:
16027           return CCFPEmode;
16028
16029         default:
16030           gcc_unreachable ();
16031         }
16032     }
16033
16034   /* A compare with a shifted operand.  Because of canonicalization, the
16035      comparison will have to be swapped when we emit the assembler.  */
16036   if (GET_MODE (y) == SImode
16037       && (REG_P (y) || (SUBREG_P (y)))
16038       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16039           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16040           || GET_CODE (x) == ROTATERT))
16041     return CC_SWPmode;
16042
16043   /* A widened compare of the sum of a value plus a carry against a
16044      constant.  This is a representation of RSC.  We want to swap the
16045      result of the comparison at output.  Not valid if the Z bit is
16046      needed.  */
16047   if (GET_MODE (x) == DImode
16048       && GET_CODE (x) == PLUS
16049       && arm_borrow_operation (XEXP (x, 1), DImode)
16050       && CONST_INT_P (y)
16051       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16052            && (op == LE || op == GT))
16053           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16054               && (op == LEU || op == GTU))))
16055     return CC_SWPmode;
16056
16057   /* If X is a constant we want to use CC_RSBmode.  This is
16058      non-canonical, but arm_gen_compare_reg uses this to generate the
16059      correct canonical form.  */
16060   if (GET_MODE (y) == SImode
16061       && (REG_P (y) || SUBREG_P (y))
16062       && CONST_INT_P (x))
16063     return CC_RSBmode;
16064
16065   /* This operation is performed swapped, but since we only rely on the Z
16066      flag we don't need an additional mode.  */
16067   if (GET_MODE (y) == SImode
16068       && (REG_P (y) || (SUBREG_P (y)))
16069       && GET_CODE (x) == NEG
16070       && (op == EQ || op == NE))
16071     return CC_Zmode;
16072
16073   /* This is a special case that is used by combine to allow a
16074      comparison of a shifted byte load to be split into a zero-extend
16075      followed by a comparison of the shifted integer (only valid for
16076      equalities and unsigned inequalities).  */
16077   if (GET_MODE (x) == SImode
16078       && GET_CODE (x) == ASHIFT
16079       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16080       && GET_CODE (XEXP (x, 0)) == SUBREG
16081       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16082       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16083       && (op == EQ || op == NE
16084           || op == GEU || op == GTU || op == LTU || op == LEU)
16085       && CONST_INT_P (y))
16086     return CC_Zmode;
16087
16088   /* A construct for a conditional compare, if the false arm contains
16089      0, then both conditions must be true, otherwise either condition
16090      must be true.  Not all conditions are possible, so CCmode is
16091      returned if it can't be done.  */
16092   if (GET_CODE (x) == IF_THEN_ELSE
16093       && (XEXP (x, 2) == const0_rtx
16094           || XEXP (x, 2) == const1_rtx)
16095       && COMPARISON_P (XEXP (x, 0))
16096       && COMPARISON_P (XEXP (x, 1)))
16097     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16098                                          INTVAL (XEXP (x, 2)));
16099
16100   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16101   if (GET_CODE (x) == AND
16102       && (op == EQ || op == NE)
16103       && COMPARISON_P (XEXP (x, 0))
16104       && COMPARISON_P (XEXP (x, 1)))
16105     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16106                                          DOM_CC_X_AND_Y);
16107
16108   if (GET_CODE (x) == IOR
16109       && (op == EQ || op == NE)
16110       && COMPARISON_P (XEXP (x, 0))
16111       && COMPARISON_P (XEXP (x, 1)))
16112     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16113                                          DOM_CC_X_OR_Y);
16114
16115   /* An operation (on Thumb) where we want to test for a single bit.
16116      This is done by shifting that bit up into the top bit of a
16117      scratch register; we can then branch on the sign bit.  */
16118   if (TARGET_THUMB1
16119       && GET_MODE (x) == SImode
16120       && (op == EQ || op == NE)
16121       && GET_CODE (x) == ZERO_EXTRACT
16122       && XEXP (x, 1) == const1_rtx)
16123     return CC_Nmode;
16124
16125   /* An operation that sets the condition codes as a side-effect, the
16126      V flag is not set correctly, so we can only use comparisons where
16127      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16128      instead.)  */
16129   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16130   if (GET_MODE (x) == SImode
16131       && y == const0_rtx
16132       && (op == EQ || op == NE || op == LT || op == GE)
16133       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16134           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16135           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16136           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16137           || GET_CODE (x) == LSHIFTRT
16138           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16139           || GET_CODE (x) == ROTATERT
16140           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16141     return CC_NZmode;
16142
16143   /* A comparison of ~reg with a const is really a special
16144      canoncialization of compare (~const, reg), which is a reverse
16145      subtract operation.  We may not get here if CONST is 0, but that
16146      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16147   if (GET_MODE (x) == SImode
16148       && GET_CODE (x) == NOT
16149       && CONST_INT_P (y))
16150     return CC_RSBmode;
16151
16152   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16153     return CC_Zmode;
16154
16155   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16156       && GET_CODE (x) == PLUS
16157       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16158     return CC_Cmode;
16159
16160   if (GET_MODE (x) == DImode
16161       && GET_CODE (x) == PLUS
16162       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16163       && CONST_INT_P (y)
16164       && UINTVAL (y) == 0x800000000
16165       && (op == GEU || op == LTU))
16166     return CC_ADCmode;
16167
16168   if (GET_MODE (x) == DImode
16169       && (op == GE || op == LT)
16170       && GET_CODE (x) == SIGN_EXTEND
16171       && ((GET_CODE (y) == PLUS
16172            && arm_borrow_operation (XEXP (y, 0), DImode))
16173           || arm_borrow_operation (y, DImode)))
16174     return CC_NVmode;
16175
16176   if (GET_MODE (x) == DImode
16177       && (op == GEU || op == LTU)
16178       && GET_CODE (x) == ZERO_EXTEND
16179       && ((GET_CODE (y) == PLUS
16180            && arm_borrow_operation (XEXP (y, 0), DImode))
16181           || arm_borrow_operation (y, DImode)))
16182     return CC_Bmode;
16183
16184   if (GET_MODE (x) == DImode
16185       && (op == EQ || op == NE)
16186       && (GET_CODE (x) == PLUS
16187           || GET_CODE (x) == MINUS)
16188       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16189           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16190       && GET_CODE (y) == SIGN_EXTEND
16191       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16192     return CC_Vmode;
16193
16194   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16195     return GET_MODE (x);
16196
16197   return CCmode;
16198 }
16199
16200 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16201    the sequence of instructions needed to generate a suitable condition
16202    code register.  Return the CC register result.  */
16203 static rtx
16204 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16205 {
16206   machine_mode mode;
16207   rtx cc_reg;
16208
16209     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16210   gcc_assert (TARGET_32BIT);
16211   gcc_assert (!CONST_INT_P (x));
16212
16213   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16214                                   subreg_lowpart_offset (SImode, DImode));
16215   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16216                                   subreg_highpart_offset (SImode, DImode));
16217   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16218                                   subreg_lowpart_offset (SImode, DImode));
16219   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16220                                   subreg_highpart_offset (SImode, DImode));
16221   switch (code)
16222     {
16223     case EQ:
16224     case NE:
16225       {
16226         if (y_lo == const0_rtx || y_hi == const0_rtx)
16227           {
16228             if (y_lo != const0_rtx)
16229               {
16230                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16231
16232                 gcc_assert (y_hi == const0_rtx);
16233                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16234                 if (!arm_add_operand (y_lo, SImode))
16235                   y_lo = force_reg (SImode, y_lo);
16236                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16237                 x_lo = scratch2;
16238               }
16239             else if (y_hi != const0_rtx)
16240               {
16241                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16242
16243                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16244                 if (!arm_add_operand (y_hi, SImode))
16245                   y_hi = force_reg (SImode, y_hi);
16246                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16247                 x_hi = scratch2;
16248               }
16249
16250             if (!scratch)
16251               {
16252                 gcc_assert (!reload_completed);
16253                 scratch = gen_rtx_SCRATCH (SImode);
16254               }
16255
16256             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16257             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16258
16259             rtx set
16260               = gen_rtx_SET (cc_reg,
16261                              gen_rtx_COMPARE (CC_NZmode,
16262                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16263                                               const0_rtx));
16264             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16265                                                               clobber)));
16266             return cc_reg;
16267           }
16268
16269         if (!arm_add_operand (y_lo, SImode))
16270           y_lo = force_reg (SImode, y_lo);
16271
16272         if (!arm_add_operand (y_hi, SImode))
16273           y_hi = force_reg (SImode, y_hi);
16274
16275         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16276         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16277         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16278         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16279         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16280
16281         emit_insn (gen_rtx_SET (cc_reg,
16282                                 gen_rtx_COMPARE (mode, conjunction,
16283                                                  const0_rtx)));
16284         return cc_reg;
16285       }
16286
16287     case LT:
16288     case GE:
16289       {
16290         if (y_lo == const0_rtx)
16291           {
16292             /* If the low word of y is 0, then this is simply a normal
16293                compare of the upper words.  */
16294             if (!arm_add_operand (y_hi, SImode))
16295               y_hi = force_reg (SImode, y_hi);
16296
16297             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16298           }
16299
16300         if (!arm_add_operand (y_lo, SImode))
16301           y_lo = force_reg (SImode, y_lo);
16302
16303         rtx cmp1
16304           = gen_rtx_LTU (DImode,
16305                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16306                          const0_rtx);
16307
16308         if (!scratch)
16309           scratch = gen_rtx_SCRATCH (SImode);
16310
16311         if (!arm_not_operand (y_hi, SImode))
16312           y_hi = force_reg (SImode, y_hi);
16313
16314         rtx_insn *insn;
16315         if (y_hi == const0_rtx)
16316           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16317                                                            cmp1));
16318         else if (CONST_INT_P (y_hi))
16319           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16320                                                              y_hi, cmp1));
16321         else
16322           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16323                                                          cmp1));
16324         return SET_DEST (single_set (insn));
16325       }
16326
16327     case LE:
16328     case GT:
16329       {
16330         /* During expansion, we only expect to get here if y is a
16331            constant that we want to handle, otherwise we should have
16332            swapped the operands already.  */
16333         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16334
16335         if (!const_ok_for_arm (INTVAL (y_lo)))
16336           y_lo = force_reg (SImode, y_lo);
16337
16338         /* Perform a reverse subtract and compare.  */
16339         rtx cmp1
16340           = gen_rtx_LTU (DImode,
16341                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16342                          const0_rtx);
16343         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16344                                                                  x_hi, cmp1));
16345         return SET_DEST (single_set (insn));
16346       }
16347
16348     case LTU:
16349     case GEU:
16350       {
16351         if (y_lo == const0_rtx)
16352           {
16353             /* If the low word of y is 0, then this is simply a normal
16354                compare of the upper words.  */
16355             if (!arm_add_operand (y_hi, SImode))
16356               y_hi = force_reg (SImode, y_hi);
16357
16358             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16359           }
16360
16361         if (!arm_add_operand (y_lo, SImode))
16362           y_lo = force_reg (SImode, y_lo);
16363
16364         rtx cmp1
16365           = gen_rtx_LTU (DImode,
16366                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16367                          const0_rtx);
16368
16369         if (!scratch)
16370           scratch = gen_rtx_SCRATCH (SImode);
16371         if (!arm_not_operand (y_hi, SImode))
16372           y_hi = force_reg (SImode, y_hi);
16373
16374         rtx_insn *insn;
16375         if (y_hi == const0_rtx)
16376           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16377                                                           cmp1));
16378         else if (CONST_INT_P (y_hi))
16379           {
16380             /* Constant is viewed as unsigned when zero-extended.  */
16381             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16382             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16383                                                               y_hi, cmp1));
16384           }
16385         else
16386           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16387                                                         cmp1));
16388         return SET_DEST (single_set (insn));
16389       }
16390
16391     case LEU:
16392     case GTU:
16393       {
16394         /* During expansion, we only expect to get here if y is a
16395            constant that we want to handle, otherwise we should have
16396            swapped the operands already.  */
16397         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16398
16399         if (!const_ok_for_arm (INTVAL (y_lo)))
16400           y_lo = force_reg (SImode, y_lo);
16401
16402         /* Perform a reverse subtract and compare.  */
16403         rtx cmp1
16404           = gen_rtx_LTU (DImode,
16405                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16406                          const0_rtx);
16407         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16408         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16409                                                                 x_hi, cmp1));
16410         return SET_DEST (single_set (insn));
16411       }
16412
16413     default:
16414       gcc_unreachable ();
16415     }
16416 }
16417
16418 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16419    return the rtx for register 0 in the proper mode.  */
16420 rtx
16421 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16422 {
16423   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16424     return arm_gen_dicompare_reg (code, x, y, scratch);
16425
16426   machine_mode mode = SELECT_CC_MODE (code, x, y);
16427   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16428   if (mode == CC_RSBmode)
16429     {
16430       if (!scratch)
16431         scratch = gen_rtx_SCRATCH (SImode);
16432       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16433                                               GEN_INT (~UINTVAL (x)), y));
16434     }
16435   else
16436     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16437
16438   return cc_reg;
16439 }
16440
16441 /* Generate a sequence of insns that will generate the correct return
16442    address mask depending on the physical architecture that the program
16443    is running on.  */
16444 rtx
16445 arm_gen_return_addr_mask (void)
16446 {
16447   rtx reg = gen_reg_rtx (Pmode);
16448
16449   emit_insn (gen_return_addr_mask (reg));
16450   return reg;
16451 }
16452
16453 void
16454 arm_reload_in_hi (rtx *operands)
16455 {
16456   rtx ref = operands[1];
16457   rtx base, scratch;
16458   HOST_WIDE_INT offset = 0;
16459
16460   if (SUBREG_P (ref))
16461     {
16462       offset = SUBREG_BYTE (ref);
16463       ref = SUBREG_REG (ref);
16464     }
16465
16466   if (REG_P (ref))
16467     {
16468       /* We have a pseudo which has been spilt onto the stack; there
16469          are two cases here: the first where there is a simple
16470          stack-slot replacement and a second where the stack-slot is
16471          out of range, or is used as a subreg.  */
16472       if (reg_equiv_mem (REGNO (ref)))
16473         {
16474           ref = reg_equiv_mem (REGNO (ref));
16475           base = find_replacement (&XEXP (ref, 0));
16476         }
16477       else
16478         /* The slot is out of range, or was dressed up in a SUBREG.  */
16479         base = reg_equiv_address (REGNO (ref));
16480
16481       /* PR 62554: If there is no equivalent memory location then just move
16482          the value as an SImode register move.  This happens when the target
16483          architecture variant does not have an HImode register move.  */
16484       if (base == NULL)
16485         {
16486           gcc_assert (REG_P (operands[0]));
16487           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16488                                 gen_rtx_SUBREG (SImode, ref, 0)));
16489           return;
16490         }
16491     }
16492   else
16493     base = find_replacement (&XEXP (ref, 0));
16494
16495   /* Handle the case where the address is too complex to be offset by 1.  */
16496   if (GET_CODE (base) == MINUS
16497       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16498     {
16499       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16500
16501       emit_set_insn (base_plus, base);
16502       base = base_plus;
16503     }
16504   else if (GET_CODE (base) == PLUS)
16505     {
16506       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16507       HOST_WIDE_INT hi, lo;
16508
16509       offset += INTVAL (XEXP (base, 1));
16510       base = XEXP (base, 0);
16511
16512       /* Rework the address into a legal sequence of insns.  */
16513       /* Valid range for lo is -4095 -> 4095 */
16514       lo = (offset >= 0
16515             ? (offset & 0xfff)
16516             : -((-offset) & 0xfff));
16517
16518       /* Corner case, if lo is the max offset then we would be out of range
16519          once we have added the additional 1 below, so bump the msb into the
16520          pre-loading insn(s).  */
16521       if (lo == 4095)
16522         lo &= 0x7ff;
16523
16524       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16525              ^ (HOST_WIDE_INT) 0x80000000)
16526             - (HOST_WIDE_INT) 0x80000000);
16527
16528       gcc_assert (hi + lo == offset);
16529
16530       if (hi != 0)
16531         {
16532           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16533
16534           /* Get the base address; addsi3 knows how to handle constants
16535              that require more than one insn.  */
16536           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16537           base = base_plus;
16538           offset = lo;
16539         }
16540     }
16541
16542   /* Operands[2] may overlap operands[0] (though it won't overlap
16543      operands[1]), that's why we asked for a DImode reg -- so we can
16544      use the bit that does not overlap.  */
16545   if (REGNO (operands[2]) == REGNO (operands[0]))
16546     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16547   else
16548     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16549
16550   emit_insn (gen_zero_extendqisi2 (scratch,
16551                                    gen_rtx_MEM (QImode,
16552                                                 plus_constant (Pmode, base,
16553                                                                offset))));
16554   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16555                                    gen_rtx_MEM (QImode,
16556                                                 plus_constant (Pmode, base,
16557                                                                offset + 1))));
16558   if (!BYTES_BIG_ENDIAN)
16559     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16560                    gen_rtx_IOR (SImode,
16561                                 gen_rtx_ASHIFT
16562                                 (SImode,
16563                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16564                                  GEN_INT (8)),
16565                                 scratch));
16566   else
16567     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16568                    gen_rtx_IOR (SImode,
16569                                 gen_rtx_ASHIFT (SImode, scratch,
16570                                                 GEN_INT (8)),
16571                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16572 }
16573
16574 /* Handle storing a half-word to memory during reload by synthesizing as two
16575    byte stores.  Take care not to clobber the input values until after we
16576    have moved them somewhere safe.  This code assumes that if the DImode
16577    scratch in operands[2] overlaps either the input value or output address
16578    in some way, then that value must die in this insn (we absolutely need
16579    two scratch registers for some corner cases).  */
16580 void
16581 arm_reload_out_hi (rtx *operands)
16582 {
16583   rtx ref = operands[0];
16584   rtx outval = operands[1];
16585   rtx base, scratch;
16586   HOST_WIDE_INT offset = 0;
16587
16588   if (SUBREG_P (ref))
16589     {
16590       offset = SUBREG_BYTE (ref);
16591       ref = SUBREG_REG (ref);
16592     }
16593
16594   if (REG_P (ref))
16595     {
16596       /* We have a pseudo which has been spilt onto the stack; there
16597          are two cases here: the first where there is a simple
16598          stack-slot replacement and a second where the stack-slot is
16599          out of range, or is used as a subreg.  */
16600       if (reg_equiv_mem (REGNO (ref)))
16601         {
16602           ref = reg_equiv_mem (REGNO (ref));
16603           base = find_replacement (&XEXP (ref, 0));
16604         }
16605       else
16606         /* The slot is out of range, or was dressed up in a SUBREG.  */
16607         base = reg_equiv_address (REGNO (ref));
16608
16609       /* PR 62254: If there is no equivalent memory location then just move
16610          the value as an SImode register move.  This happens when the target
16611          architecture variant does not have an HImode register move.  */
16612       if (base == NULL)
16613         {
16614           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16615
16616           if (REG_P (outval))
16617             {
16618               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16619                                     gen_rtx_SUBREG (SImode, outval, 0)));
16620             }
16621           else /* SUBREG_P (outval)  */
16622             {
16623               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16624                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16625                                       SUBREG_REG (outval)));
16626               else
16627                 /* FIXME: Handle other cases ?  */
16628                 gcc_unreachable ();
16629             }
16630           return;
16631         }
16632     }
16633   else
16634     base = find_replacement (&XEXP (ref, 0));
16635
16636   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16637
16638   /* Handle the case where the address is too complex to be offset by 1.  */
16639   if (GET_CODE (base) == MINUS
16640       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16641     {
16642       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16643
16644       /* Be careful not to destroy OUTVAL.  */
16645       if (reg_overlap_mentioned_p (base_plus, outval))
16646         {
16647           /* Updating base_plus might destroy outval, see if we can
16648              swap the scratch and base_plus.  */
16649           if (!reg_overlap_mentioned_p (scratch, outval))
16650             std::swap (scratch, base_plus);
16651           else
16652             {
16653               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16654
16655               /* Be conservative and copy OUTVAL into the scratch now,
16656                  this should only be necessary if outval is a subreg
16657                  of something larger than a word.  */
16658               /* XXX Might this clobber base?  I can't see how it can,
16659                  since scratch is known to overlap with OUTVAL, and
16660                  must be wider than a word.  */
16661               emit_insn (gen_movhi (scratch_hi, outval));
16662               outval = scratch_hi;
16663             }
16664         }
16665
16666       emit_set_insn (base_plus, base);
16667       base = base_plus;
16668     }
16669   else if (GET_CODE (base) == PLUS)
16670     {
16671       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16672       HOST_WIDE_INT hi, lo;
16673
16674       offset += INTVAL (XEXP (base, 1));
16675       base = XEXP (base, 0);
16676
16677       /* Rework the address into a legal sequence of insns.  */
16678       /* Valid range for lo is -4095 -> 4095 */
16679       lo = (offset >= 0
16680             ? (offset & 0xfff)
16681             : -((-offset) & 0xfff));
16682
16683       /* Corner case, if lo is the max offset then we would be out of range
16684          once we have added the additional 1 below, so bump the msb into the
16685          pre-loading insn(s).  */
16686       if (lo == 4095)
16687         lo &= 0x7ff;
16688
16689       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16690              ^ (HOST_WIDE_INT) 0x80000000)
16691             - (HOST_WIDE_INT) 0x80000000);
16692
16693       gcc_assert (hi + lo == offset);
16694
16695       if (hi != 0)
16696         {
16697           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16698
16699           /* Be careful not to destroy OUTVAL.  */
16700           if (reg_overlap_mentioned_p (base_plus, outval))
16701             {
16702               /* Updating base_plus might destroy outval, see if we
16703                  can swap the scratch and base_plus.  */
16704               if (!reg_overlap_mentioned_p (scratch, outval))
16705                 std::swap (scratch, base_plus);
16706               else
16707                 {
16708                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16709
16710                   /* Be conservative and copy outval into scratch now,
16711                      this should only be necessary if outval is a
16712                      subreg of something larger than a word.  */
16713                   /* XXX Might this clobber base?  I can't see how it
16714                      can, since scratch is known to overlap with
16715                      outval.  */
16716                   emit_insn (gen_movhi (scratch_hi, outval));
16717                   outval = scratch_hi;
16718                 }
16719             }
16720
16721           /* Get the base address; addsi3 knows how to handle constants
16722              that require more than one insn.  */
16723           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16724           base = base_plus;
16725           offset = lo;
16726         }
16727     }
16728
16729   if (BYTES_BIG_ENDIAN)
16730     {
16731       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16732                                          plus_constant (Pmode, base,
16733                                                         offset + 1)),
16734                             gen_lowpart (QImode, outval)));
16735       emit_insn (gen_lshrsi3 (scratch,
16736                               gen_rtx_SUBREG (SImode, outval, 0),
16737                               GEN_INT (8)));
16738       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16739                                                                 offset)),
16740                             gen_lowpart (QImode, scratch)));
16741     }
16742   else
16743     {
16744       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16745                                                                 offset)),
16746                             gen_lowpart (QImode, outval)));
16747       emit_insn (gen_lshrsi3 (scratch,
16748                               gen_rtx_SUBREG (SImode, outval, 0),
16749                               GEN_INT (8)));
16750       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16751                                          plus_constant (Pmode, base,
16752                                                         offset + 1)),
16753                             gen_lowpart (QImode, scratch)));
16754     }
16755 }
16756
16757 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16758    (padded to the size of a word) should be passed in a register.  */
16759
16760 static bool
16761 arm_must_pass_in_stack (const function_arg_info &arg)
16762 {
16763   if (TARGET_AAPCS_BASED)
16764     return must_pass_in_stack_var_size (arg);
16765   else
16766     return must_pass_in_stack_var_size_or_pad (arg);
16767 }
16768
16769
16770 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16771    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16772    the default.  For AAPCS based ABIs small aggregate types are placed
16773    in the lowest memory address.  */
16774
16775 static pad_direction
16776 arm_function_arg_padding (machine_mode mode, const_tree type)
16777 {
16778   if (!TARGET_AAPCS_BASED)
16779     return default_function_arg_padding (mode, type);
16780
16781   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16782     return PAD_DOWNWARD;
16783
16784   return PAD_UPWARD;
16785 }
16786
16787
16788 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16789    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16790    register has useful data, and return the opposite if the most
16791    significant byte does.  */
16792
16793 bool
16794 arm_pad_reg_upward (machine_mode mode,
16795                     tree type, int first ATTRIBUTE_UNUSED)
16796 {
16797   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16798     {
16799       /* For AAPCS, small aggregates, small fixed-point types,
16800          and small complex types are always padded upwards.  */
16801       if (type)
16802         {
16803           if ((AGGREGATE_TYPE_P (type)
16804                || TREE_CODE (type) == COMPLEX_TYPE
16805                || FIXED_POINT_TYPE_P (type))
16806               && int_size_in_bytes (type) <= 4)
16807             return true;
16808         }
16809       else
16810         {
16811           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16812               && GET_MODE_SIZE (mode) <= 4)
16813             return true;
16814         }
16815     }
16816
16817   /* Otherwise, use default padding.  */
16818   return !BYTES_BIG_ENDIAN;
16819 }
16820
16821 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16822    assuming that the address in the base register is word aligned.  */
16823 bool
16824 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16825 {
16826   HOST_WIDE_INT max_offset;
16827
16828   /* Offset must be a multiple of 4 in Thumb mode.  */
16829   if (TARGET_THUMB2 && ((offset & 3) != 0))
16830     return false;
16831
16832   if (TARGET_THUMB2)
16833     max_offset = 1020;
16834   else if (TARGET_ARM)
16835     max_offset = 255;
16836   else
16837     return false;
16838
16839   return ((offset <= max_offset) && (offset >= -max_offset));
16840 }
16841
16842 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16843    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16844    Assumes that the address in the base register RN is word aligned.  Pattern
16845    guarantees that both memory accesses use the same base register,
16846    the offsets are constants within the range, and the gap between the offsets is 4.
16847    If preload complete then check that registers are legal.  WBACK indicates whether
16848    address is updated.  LOAD indicates whether memory access is load or store.  */
16849 bool
16850 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16851                        bool wback, bool load)
16852 {
16853   unsigned int t, t2, n;
16854
16855   if (!reload_completed)
16856     return true;
16857
16858   if (!offset_ok_for_ldrd_strd (offset))
16859     return false;
16860
16861   t = REGNO (rt);
16862   t2 = REGNO (rt2);
16863   n = REGNO (rn);
16864
16865   if ((TARGET_THUMB2)
16866       && ((wback && (n == t || n == t2))
16867           || (t == SP_REGNUM)
16868           || (t == PC_REGNUM)
16869           || (t2 == SP_REGNUM)
16870           || (t2 == PC_REGNUM)
16871           || (!load && (n == PC_REGNUM))
16872           || (load && (t == t2))
16873           /* Triggers Cortex-M3 LDRD errata.  */
16874           || (!wback && load && fix_cm3_ldrd && (n == t))))
16875     return false;
16876
16877   if ((TARGET_ARM)
16878       && ((wback && (n == t || n == t2))
16879           || (t2 == PC_REGNUM)
16880           || (t % 2 != 0)   /* First destination register is not even.  */
16881           || (t2 != t + 1)
16882           /* PC can be used as base register (for offset addressing only),
16883              but it is depricated.  */
16884           || (n == PC_REGNUM)))
16885     return false;
16886
16887   return true;
16888 }
16889
16890 /* Return true if a 64-bit access with alignment ALIGN and with a
16891    constant offset OFFSET from the base pointer is permitted on this
16892    architecture.  */
16893 static bool
16894 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16895 {
16896   return (unaligned_access
16897           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16898           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16899 }
16900
16901 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16902    operand MEM's address contains an immediate offset from the base
16903    register and has no side effects, in which case it sets BASE,
16904    OFFSET and ALIGN accordingly.  */
16905 static bool
16906 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16907 {
16908   rtx addr;
16909
16910   gcc_assert (base != NULL && offset != NULL);
16911
16912   /* TODO: Handle more general memory operand patterns, such as
16913      PRE_DEC and PRE_INC.  */
16914
16915   if (side_effects_p (mem))
16916     return false;
16917
16918   /* Can't deal with subregs.  */
16919   if (SUBREG_P (mem))
16920     return false;
16921
16922   gcc_assert (MEM_P (mem));
16923
16924   *offset = const0_rtx;
16925   *align = MEM_ALIGN (mem);
16926
16927   addr = XEXP (mem, 0);
16928
16929   /* If addr isn't valid for DImode, then we can't handle it.  */
16930   if (!arm_legitimate_address_p (DImode, addr,
16931                                  reload_in_progress || reload_completed))
16932     return false;
16933
16934   if (REG_P (addr))
16935     {
16936       *base = addr;
16937       return true;
16938     }
16939   else if (GET_CODE (addr) == PLUS)
16940     {
16941       *base = XEXP (addr, 0);
16942       *offset = XEXP (addr, 1);
16943       return (REG_P (*base) && CONST_INT_P (*offset));
16944     }
16945
16946   return false;
16947 }
16948
16949 /* Called from a peephole2 to replace two word-size accesses with a
16950    single LDRD/STRD instruction.  Returns true iff we can generate a
16951    new instruction sequence.  That is, both accesses use the same base
16952    register and the gap between constant offsets is 4.  This function
16953    may reorder its operands to match ldrd/strd RTL templates.
16954    OPERANDS are the operands found by the peephole matcher;
16955    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16956    corresponding memory operands.  LOAD indicaates whether the access
16957    is load or store.  CONST_STORE indicates a store of constant
16958    integer values held in OPERANDS[4,5] and assumes that the pattern
16959    is of length 4 insn, for the purpose of checking dead registers.
16960    COMMUTE indicates that register operands may be reordered.  */
16961 bool
16962 gen_operands_ldrd_strd (rtx *operands, bool load,
16963                         bool const_store, bool commute)
16964 {
16965   int nops = 2;
16966   HOST_WIDE_INT offsets[2], offset, align[2];
16967   rtx base = NULL_RTX;
16968   rtx cur_base, cur_offset, tmp;
16969   int i, gap;
16970   HARD_REG_SET regset;
16971
16972   gcc_assert (!const_store || !load);
16973   /* Check that the memory references are immediate offsets from the
16974      same base register.  Extract the base register, the destination
16975      registers, and the corresponding memory offsets.  */
16976   for (i = 0; i < nops; i++)
16977     {
16978       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16979                                  &align[i]))
16980         return false;
16981
16982       if (i == 0)
16983         base = cur_base;
16984       else if (REGNO (base) != REGNO (cur_base))
16985         return false;
16986
16987       offsets[i] = INTVAL (cur_offset);
16988       if (GET_CODE (operands[i]) == SUBREG)
16989         {
16990           tmp = SUBREG_REG (operands[i]);
16991           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16992           operands[i] = tmp;
16993         }
16994     }
16995
16996   /* Make sure there is no dependency between the individual loads.  */
16997   if (load && REGNO (operands[0]) == REGNO (base))
16998     return false; /* RAW */
16999
17000   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17001     return false; /* WAW */
17002
17003   /* If the same input register is used in both stores
17004      when storing different constants, try to find a free register.
17005      For example, the code
17006         mov r0, 0
17007         str r0, [r2]
17008         mov r0, 1
17009         str r0, [r2, #4]
17010      can be transformed into
17011         mov r1, 0
17012         mov r0, 1
17013         strd r1, r0, [r2]
17014      in Thumb mode assuming that r1 is free.
17015      For ARM mode do the same but only if the starting register
17016      can be made to be even.  */
17017   if (const_store
17018       && REGNO (operands[0]) == REGNO (operands[1])
17019       && INTVAL (operands[4]) != INTVAL (operands[5]))
17020     {
17021     if (TARGET_THUMB2)
17022       {
17023         CLEAR_HARD_REG_SET (regset);
17024         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17025         if (tmp == NULL_RTX)
17026           return false;
17027
17028         /* Use the new register in the first load to ensure that
17029            if the original input register is not dead after peephole,
17030            then it will have the correct constant value.  */
17031         operands[0] = tmp;
17032       }
17033     else if (TARGET_ARM)
17034       {
17035         int regno = REGNO (operands[0]);
17036         if (!peep2_reg_dead_p (4, operands[0]))
17037           {
17038             /* When the input register is even and is not dead after the
17039                pattern, it has to hold the second constant but we cannot
17040                form a legal STRD in ARM mode with this register as the second
17041                register.  */
17042             if (regno % 2 == 0)
17043               return false;
17044
17045             /* Is regno-1 free? */
17046             SET_HARD_REG_SET (regset);
17047             CLEAR_HARD_REG_BIT(regset, regno - 1);
17048             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17049             if (tmp == NULL_RTX)
17050               return false;
17051
17052             operands[0] = tmp;
17053           }
17054         else
17055           {
17056             /* Find a DImode register.  */
17057             CLEAR_HARD_REG_SET (regset);
17058             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17059             if (tmp != NULL_RTX)
17060               {
17061                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17062                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17063               }
17064             else
17065               {
17066                 /* Can we use the input register to form a DI register?  */
17067                 SET_HARD_REG_SET (regset);
17068                 CLEAR_HARD_REG_BIT(regset,
17069                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17070                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17071                 if (tmp == NULL_RTX)
17072                   return false;
17073                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17074               }
17075           }
17076
17077         gcc_assert (operands[0] != NULL_RTX);
17078         gcc_assert (operands[1] != NULL_RTX);
17079         gcc_assert (REGNO (operands[0]) % 2 == 0);
17080         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17081       }
17082     }
17083
17084   /* Make sure the instructions are ordered with lower memory access first.  */
17085   if (offsets[0] > offsets[1])
17086     {
17087       gap = offsets[0] - offsets[1];
17088       offset = offsets[1];
17089
17090       /* Swap the instructions such that lower memory is accessed first.  */
17091       std::swap (operands[0], operands[1]);
17092       std::swap (operands[2], operands[3]);
17093       std::swap (align[0], align[1]);
17094       if (const_store)
17095         std::swap (operands[4], operands[5]);
17096     }
17097   else
17098     {
17099       gap = offsets[1] - offsets[0];
17100       offset = offsets[0];
17101     }
17102
17103   /* Make sure accesses are to consecutive memory locations.  */
17104   if (gap != GET_MODE_SIZE (SImode))
17105     return false;
17106
17107   if (!align_ok_ldrd_strd (align[0], offset))
17108     return false;
17109
17110   /* Make sure we generate legal instructions.  */
17111   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17112                              false, load))
17113     return true;
17114
17115   /* In Thumb state, where registers are almost unconstrained, there
17116      is little hope to fix it.  */
17117   if (TARGET_THUMB2)
17118     return false;
17119
17120   if (load && commute)
17121     {
17122       /* Try reordering registers.  */
17123       std::swap (operands[0], operands[1]);
17124       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17125                                  false, load))
17126         return true;
17127     }
17128
17129   if (const_store)
17130     {
17131       /* If input registers are dead after this pattern, they can be
17132          reordered or replaced by other registers that are free in the
17133          current pattern.  */
17134       if (!peep2_reg_dead_p (4, operands[0])
17135           || !peep2_reg_dead_p (4, operands[1]))
17136         return false;
17137
17138       /* Try to reorder the input registers.  */
17139       /* For example, the code
17140            mov r0, 0
17141            mov r1, 1
17142            str r1, [r2]
17143            str r0, [r2, #4]
17144          can be transformed into
17145            mov r1, 0
17146            mov r0, 1
17147            strd r0, [r2]
17148       */
17149       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17150                                   false, false))
17151         {
17152           std::swap (operands[0], operands[1]);
17153           return true;
17154         }
17155
17156       /* Try to find a free DI register.  */
17157       CLEAR_HARD_REG_SET (regset);
17158       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17159       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17160       while (true)
17161         {
17162           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17163           if (tmp == NULL_RTX)
17164             return false;
17165
17166           /* DREG must be an even-numbered register in DImode.
17167              Split it into SI registers.  */
17168           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17169           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17170           gcc_assert (operands[0] != NULL_RTX);
17171           gcc_assert (operands[1] != NULL_RTX);
17172           gcc_assert (REGNO (operands[0]) % 2 == 0);
17173           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17174
17175           return (operands_ok_ldrd_strd (operands[0], operands[1],
17176                                          base, offset,
17177                                          false, load));
17178         }
17179     }
17180
17181   return false;
17182 }
17183
17184
17185 /* Return true if parallel execution of the two word-size accesses provided
17186    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17187    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17188    register operands and OPERANDS[2,3] are the corresponding memory operands.
17189    */
17190 bool
17191 valid_operands_ldrd_strd (rtx *operands, bool load)
17192 {
17193   int nops = 2;
17194   HOST_WIDE_INT offsets[2], offset, align[2];
17195   rtx base = NULL_RTX;
17196   rtx cur_base, cur_offset;
17197   int i, gap;
17198
17199   /* Check that the memory references are immediate offsets from the
17200      same base register.  Extract the base register, the destination
17201      registers, and the corresponding memory offsets.  */
17202   for (i = 0; i < nops; i++)
17203     {
17204       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17205                                  &align[i]))
17206         return false;
17207
17208       if (i == 0)
17209         base = cur_base;
17210       else if (REGNO (base) != REGNO (cur_base))
17211         return false;
17212
17213       offsets[i] = INTVAL (cur_offset);
17214       if (GET_CODE (operands[i]) == SUBREG)
17215         return false;
17216     }
17217
17218   if (offsets[0] > offsets[1])
17219     return false;
17220
17221   gap = offsets[1] - offsets[0];
17222   offset = offsets[0];
17223
17224   /* Make sure accesses are to consecutive memory locations.  */
17225   if (gap != GET_MODE_SIZE (SImode))
17226     return false;
17227
17228   if (!align_ok_ldrd_strd (align[0], offset))
17229     return false;
17230
17231   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17232                                 false, load);
17233 }
17234
17235 \f
17236 /* Print a symbolic form of X to the debug file, F.  */
17237 static void
17238 arm_print_value (FILE *f, rtx x)
17239 {
17240   switch (GET_CODE (x))
17241     {
17242     case CONST_INT:
17243       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17244       return;
17245
17246     case CONST_DOUBLE:
17247       {
17248         char fpstr[20];
17249         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17250                          sizeof (fpstr), 0, 1);
17251         fputs (fpstr, f);
17252       }
17253       return;
17254
17255     case CONST_VECTOR:
17256       {
17257         int i;
17258
17259         fprintf (f, "<");
17260         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17261           {
17262             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17263             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17264               fputc (',', f);
17265           }
17266         fprintf (f, ">");
17267       }
17268       return;
17269
17270     case CONST_STRING:
17271       fprintf (f, "\"%s\"", XSTR (x, 0));
17272       return;
17273
17274     case SYMBOL_REF:
17275       fprintf (f, "`%s'", XSTR (x, 0));
17276       return;
17277
17278     case LABEL_REF:
17279       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17280       return;
17281
17282     case CONST:
17283       arm_print_value (f, XEXP (x, 0));
17284       return;
17285
17286     case PLUS:
17287       arm_print_value (f, XEXP (x, 0));
17288       fprintf (f, "+");
17289       arm_print_value (f, XEXP (x, 1));
17290       return;
17291
17292     case PC:
17293       fprintf (f, "pc");
17294       return;
17295
17296     default:
17297       fprintf (f, "????");
17298       return;
17299     }
17300 }
17301 \f
17302 /* Routines for manipulation of the constant pool.  */
17303
17304 /* Arm instructions cannot load a large constant directly into a
17305    register; they have to come from a pc relative load.  The constant
17306    must therefore be placed in the addressable range of the pc
17307    relative load.  Depending on the precise pc relative load
17308    instruction the range is somewhere between 256 bytes and 4k.  This
17309    means that we often have to dump a constant inside a function, and
17310    generate code to branch around it.
17311
17312    It is important to minimize this, since the branches will slow
17313    things down and make the code larger.
17314
17315    Normally we can hide the table after an existing unconditional
17316    branch so that there is no interruption of the flow, but in the
17317    worst case the code looks like this:
17318
17319         ldr     rn, L1
17320         ...
17321         b       L2
17322         align
17323         L1:     .long value
17324         L2:
17325         ...
17326
17327         ldr     rn, L3
17328         ...
17329         b       L4
17330         align
17331         L3:     .long value
17332         L4:
17333         ...
17334
17335    We fix this by performing a scan after scheduling, which notices
17336    which instructions need to have their operands fetched from the
17337    constant table and builds the table.
17338
17339    The algorithm starts by building a table of all the constants that
17340    need fixing up and all the natural barriers in the function (places
17341    where a constant table can be dropped without breaking the flow).
17342    For each fixup we note how far the pc-relative replacement will be
17343    able to reach and the offset of the instruction into the function.
17344
17345    Having built the table we then group the fixes together to form
17346    tables that are as large as possible (subject to addressing
17347    constraints) and emit each table of constants after the last
17348    barrier that is within range of all the instructions in the group.
17349    If a group does not contain a barrier, then we forcibly create one
17350    by inserting a jump instruction into the flow.  Once the table has
17351    been inserted, the insns are then modified to reference the
17352    relevant entry in the pool.
17353
17354    Possible enhancements to the algorithm (not implemented) are:
17355
17356    1) For some processors and object formats, there may be benefit in
17357    aligning the pools to the start of cache lines; this alignment
17358    would need to be taken into account when calculating addressability
17359    of a pool.  */
17360
17361 /* These typedefs are located at the start of this file, so that
17362    they can be used in the prototypes there.  This comment is to
17363    remind readers of that fact so that the following structures
17364    can be understood more easily.
17365
17366      typedef struct minipool_node    Mnode;
17367      typedef struct minipool_fixup   Mfix;  */
17368
17369 struct minipool_node
17370 {
17371   /* Doubly linked chain of entries.  */
17372   Mnode * next;
17373   Mnode * prev;
17374   /* The maximum offset into the code that this entry can be placed.  While
17375      pushing fixes for forward references, all entries are sorted in order
17376      of increasing max_address.  */
17377   HOST_WIDE_INT max_address;
17378   /* Similarly for an entry inserted for a backwards ref.  */
17379   HOST_WIDE_INT min_address;
17380   /* The number of fixes referencing this entry.  This can become zero
17381      if we "unpush" an entry.  In this case we ignore the entry when we
17382      come to emit the code.  */
17383   int refcount;
17384   /* The offset from the start of the minipool.  */
17385   HOST_WIDE_INT offset;
17386   /* The value in table.  */
17387   rtx value;
17388   /* The mode of value.  */
17389   machine_mode mode;
17390   /* The size of the value.  With iWMMXt enabled
17391      sizes > 4 also imply an alignment of 8-bytes.  */
17392   int fix_size;
17393 };
17394
17395 struct minipool_fixup
17396 {
17397   Mfix *            next;
17398   rtx_insn *        insn;
17399   HOST_WIDE_INT     address;
17400   rtx *             loc;
17401   machine_mode mode;
17402   int               fix_size;
17403   rtx               value;
17404   Mnode *           minipool;
17405   HOST_WIDE_INT     forwards;
17406   HOST_WIDE_INT     backwards;
17407 };
17408
17409 /* Fixes less than a word need padding out to a word boundary.  */
17410 #define MINIPOOL_FIX_SIZE(mode) \
17411   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17412
17413 static Mnode *  minipool_vector_head;
17414 static Mnode *  minipool_vector_tail;
17415 static rtx_code_label   *minipool_vector_label;
17416 static int      minipool_pad;
17417
17418 /* The linked list of all minipool fixes required for this function.  */
17419 Mfix *          minipool_fix_head;
17420 Mfix *          minipool_fix_tail;
17421 /* The fix entry for the current minipool, once it has been placed.  */
17422 Mfix *          minipool_barrier;
17423
17424 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17425 #define JUMP_TABLES_IN_TEXT_SECTION 0
17426 #endif
17427
17428 static HOST_WIDE_INT
17429 get_jump_table_size (rtx_jump_table_data *insn)
17430 {
17431   /* ADDR_VECs only take room if read-only data does into the text
17432      section.  */
17433   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17434     {
17435       rtx body = PATTERN (insn);
17436       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17437       HOST_WIDE_INT size;
17438       HOST_WIDE_INT modesize;
17439
17440       modesize = GET_MODE_SIZE (GET_MODE (body));
17441       size = modesize * XVECLEN (body, elt);
17442       switch (modesize)
17443         {
17444         case 1:
17445           /* Round up size  of TBB table to a halfword boundary.  */
17446           size = (size + 1) & ~HOST_WIDE_INT_1;
17447           break;
17448         case 2:
17449           /* No padding necessary for TBH.  */
17450           break;
17451         case 4:
17452           /* Add two bytes for alignment on Thumb.  */
17453           if (TARGET_THUMB)
17454             size += 2;
17455           break;
17456         default:
17457           gcc_unreachable ();
17458         }
17459       return size;
17460     }
17461
17462   return 0;
17463 }
17464
17465 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17466    function descriptor) into a register and the GOT address into the
17467    FDPIC register, returning an rtx for the register holding the
17468    function address.  */
17469
17470 rtx
17471 arm_load_function_descriptor (rtx funcdesc)
17472 {
17473   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17474   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17475   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17476   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17477
17478   emit_move_insn (fnaddr_reg, fnaddr);
17479
17480   /* The ABI requires the entry point address to be loaded first, but
17481      since we cannot support lazy binding for lack of atomic load of
17482      two 32-bits values, we do not need to bother to prevent the
17483      previous load from being moved after that of the GOT address.  */
17484   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17485
17486   return fnaddr_reg;
17487 }
17488
17489 /* Return the maximum amount of padding that will be inserted before
17490    label LABEL.  */
17491 static HOST_WIDE_INT
17492 get_label_padding (rtx label)
17493 {
17494   HOST_WIDE_INT align, min_insn_size;
17495
17496   align = 1 << label_to_alignment (label).levels[0].log;
17497   min_insn_size = TARGET_THUMB ? 2 : 4;
17498   return align > min_insn_size ? align - min_insn_size : 0;
17499 }
17500
17501 /* Move a minipool fix MP from its current location to before MAX_MP.
17502    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17503    constraints may need updating.  */
17504 static Mnode *
17505 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17506                                HOST_WIDE_INT max_address)
17507 {
17508   /* The code below assumes these are different.  */
17509   gcc_assert (mp != max_mp);
17510
17511   if (max_mp == NULL)
17512     {
17513       if (max_address < mp->max_address)
17514         mp->max_address = max_address;
17515     }
17516   else
17517     {
17518       if (max_address > max_mp->max_address - mp->fix_size)
17519         mp->max_address = max_mp->max_address - mp->fix_size;
17520       else
17521         mp->max_address = max_address;
17522
17523       /* Unlink MP from its current position.  Since max_mp is non-null,
17524        mp->prev must be non-null.  */
17525       mp->prev->next = mp->next;
17526       if (mp->next != NULL)
17527         mp->next->prev = mp->prev;
17528       else
17529         minipool_vector_tail = mp->prev;
17530
17531       /* Re-insert it before MAX_MP.  */
17532       mp->next = max_mp;
17533       mp->prev = max_mp->prev;
17534       max_mp->prev = mp;
17535
17536       if (mp->prev != NULL)
17537         mp->prev->next = mp;
17538       else
17539         minipool_vector_head = mp;
17540     }
17541
17542   /* Save the new entry.  */
17543   max_mp = mp;
17544
17545   /* Scan over the preceding entries and adjust their addresses as
17546      required.  */
17547   while (mp->prev != NULL
17548          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17549     {
17550       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17551       mp = mp->prev;
17552     }
17553
17554   return max_mp;
17555 }
17556
17557 /* Add a constant to the minipool for a forward reference.  Returns the
17558    node added or NULL if the constant will not fit in this pool.  */
17559 static Mnode *
17560 add_minipool_forward_ref (Mfix *fix)
17561 {
17562   /* If set, max_mp is the first pool_entry that has a lower
17563      constraint than the one we are trying to add.  */
17564   Mnode *       max_mp = NULL;
17565   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17566   Mnode *       mp;
17567
17568   /* If the minipool starts before the end of FIX->INSN then this FIX
17569      cannot be placed into the current pool.  Furthermore, adding the
17570      new constant pool entry may cause the pool to start FIX_SIZE bytes
17571      earlier.  */
17572   if (minipool_vector_head &&
17573       (fix->address + get_attr_length (fix->insn)
17574        >= minipool_vector_head->max_address - fix->fix_size))
17575     return NULL;
17576
17577   /* Scan the pool to see if a constant with the same value has
17578      already been added.  While we are doing this, also note the
17579      location where we must insert the constant if it doesn't already
17580      exist.  */
17581   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17582     {
17583       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17584           && fix->mode == mp->mode
17585           && (!LABEL_P (fix->value)
17586               || (CODE_LABEL_NUMBER (fix->value)
17587                   == CODE_LABEL_NUMBER (mp->value)))
17588           && rtx_equal_p (fix->value, mp->value))
17589         {
17590           /* More than one fix references this entry.  */
17591           mp->refcount++;
17592           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17593         }
17594
17595       /* Note the insertion point if necessary.  */
17596       if (max_mp == NULL
17597           && mp->max_address > max_address)
17598         max_mp = mp;
17599
17600       /* If we are inserting an 8-bytes aligned quantity and
17601          we have not already found an insertion point, then
17602          make sure that all such 8-byte aligned quantities are
17603          placed at the start of the pool.  */
17604       if (ARM_DOUBLEWORD_ALIGN
17605           && max_mp == NULL
17606           && fix->fix_size >= 8
17607           && mp->fix_size < 8)
17608         {
17609           max_mp = mp;
17610           max_address = mp->max_address;
17611         }
17612     }
17613
17614   /* The value is not currently in the minipool, so we need to create
17615      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17616      the end of the list since the placement is less constrained than
17617      any existing entry.  Otherwise, we insert the new fix before
17618      MAX_MP and, if necessary, adjust the constraints on the other
17619      entries.  */
17620   mp = XNEW (Mnode);
17621   mp->fix_size = fix->fix_size;
17622   mp->mode = fix->mode;
17623   mp->value = fix->value;
17624   mp->refcount = 1;
17625   /* Not yet required for a backwards ref.  */
17626   mp->min_address = -65536;
17627
17628   if (max_mp == NULL)
17629     {
17630       mp->max_address = max_address;
17631       mp->next = NULL;
17632       mp->prev = minipool_vector_tail;
17633
17634       if (mp->prev == NULL)
17635         {
17636           minipool_vector_head = mp;
17637           minipool_vector_label = gen_label_rtx ();
17638         }
17639       else
17640         mp->prev->next = mp;
17641
17642       minipool_vector_tail = mp;
17643     }
17644   else
17645     {
17646       if (max_address > max_mp->max_address - mp->fix_size)
17647         mp->max_address = max_mp->max_address - mp->fix_size;
17648       else
17649         mp->max_address = max_address;
17650
17651       mp->next = max_mp;
17652       mp->prev = max_mp->prev;
17653       max_mp->prev = mp;
17654       if (mp->prev != NULL)
17655         mp->prev->next = mp;
17656       else
17657         minipool_vector_head = mp;
17658     }
17659
17660   /* Save the new entry.  */
17661   max_mp = mp;
17662
17663   /* Scan over the preceding entries and adjust their addresses as
17664      required.  */
17665   while (mp->prev != NULL
17666          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17667     {
17668       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17669       mp = mp->prev;
17670     }
17671
17672   return max_mp;
17673 }
17674
17675 static Mnode *
17676 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17677                                 HOST_WIDE_INT  min_address)
17678 {
17679   HOST_WIDE_INT offset;
17680
17681   /* The code below assumes these are different.  */
17682   gcc_assert (mp != min_mp);
17683
17684   if (min_mp == NULL)
17685     {
17686       if (min_address > mp->min_address)
17687         mp->min_address = min_address;
17688     }
17689   else
17690     {
17691       /* We will adjust this below if it is too loose.  */
17692       mp->min_address = min_address;
17693
17694       /* Unlink MP from its current position.  Since min_mp is non-null,
17695          mp->next must be non-null.  */
17696       mp->next->prev = mp->prev;
17697       if (mp->prev != NULL)
17698         mp->prev->next = mp->next;
17699       else
17700         minipool_vector_head = mp->next;
17701
17702       /* Reinsert it after MIN_MP.  */
17703       mp->prev = min_mp;
17704       mp->next = min_mp->next;
17705       min_mp->next = mp;
17706       if (mp->next != NULL)
17707         mp->next->prev = mp;
17708       else
17709         minipool_vector_tail = mp;
17710     }
17711
17712   min_mp = mp;
17713
17714   offset = 0;
17715   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17716     {
17717       mp->offset = offset;
17718       if (mp->refcount > 0)
17719         offset += mp->fix_size;
17720
17721       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17722         mp->next->min_address = mp->min_address + mp->fix_size;
17723     }
17724
17725   return min_mp;
17726 }
17727
17728 /* Add a constant to the minipool for a backward reference.  Returns the
17729    node added or NULL if the constant will not fit in this pool.
17730
17731    Note that the code for insertion for a backwards reference can be
17732    somewhat confusing because the calculated offsets for each fix do
17733    not take into account the size of the pool (which is still under
17734    construction.  */
17735 static Mnode *
17736 add_minipool_backward_ref (Mfix *fix)
17737 {
17738   /* If set, min_mp is the last pool_entry that has a lower constraint
17739      than the one we are trying to add.  */
17740   Mnode *min_mp = NULL;
17741   /* This can be negative, since it is only a constraint.  */
17742   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17743   Mnode *mp;
17744
17745   /* If we can't reach the current pool from this insn, or if we can't
17746      insert this entry at the end of the pool without pushing other
17747      fixes out of range, then we don't try.  This ensures that we
17748      can't fail later on.  */
17749   if (min_address >= minipool_barrier->address
17750       || (minipool_vector_tail->min_address + fix->fix_size
17751           >= minipool_barrier->address))
17752     return NULL;
17753
17754   /* Scan the pool to see if a constant with the same value has
17755      already been added.  While we are doing this, also note the
17756      location where we must insert the constant if it doesn't already
17757      exist.  */
17758   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17759     {
17760       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17761           && fix->mode == mp->mode
17762           && (!LABEL_P (fix->value)
17763               || (CODE_LABEL_NUMBER (fix->value)
17764                   == CODE_LABEL_NUMBER (mp->value)))
17765           && rtx_equal_p (fix->value, mp->value)
17766           /* Check that there is enough slack to move this entry to the
17767              end of the table (this is conservative).  */
17768           && (mp->max_address
17769               > (minipool_barrier->address
17770                  + minipool_vector_tail->offset
17771                  + minipool_vector_tail->fix_size)))
17772         {
17773           mp->refcount++;
17774           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17775         }
17776
17777       if (min_mp != NULL)
17778         mp->min_address += fix->fix_size;
17779       else
17780         {
17781           /* Note the insertion point if necessary.  */
17782           if (mp->min_address < min_address)
17783             {
17784               /* For now, we do not allow the insertion of 8-byte alignment
17785                  requiring nodes anywhere but at the start of the pool.  */
17786               if (ARM_DOUBLEWORD_ALIGN
17787                   && fix->fix_size >= 8 && mp->fix_size < 8)
17788                 return NULL;
17789               else
17790                 min_mp = mp;
17791             }
17792           else if (mp->max_address
17793                    < minipool_barrier->address + mp->offset + fix->fix_size)
17794             {
17795               /* Inserting before this entry would push the fix beyond
17796                  its maximum address (which can happen if we have
17797                  re-located a forwards fix); force the new fix to come
17798                  after it.  */
17799               if (ARM_DOUBLEWORD_ALIGN
17800                   && fix->fix_size >= 8 && mp->fix_size < 8)
17801                 return NULL;
17802               else
17803                 {
17804                   min_mp = mp;
17805                   min_address = mp->min_address + fix->fix_size;
17806                 }
17807             }
17808           /* Do not insert a non-8-byte aligned quantity before 8-byte
17809              aligned quantities.  */
17810           else if (ARM_DOUBLEWORD_ALIGN
17811                    && fix->fix_size < 8
17812                    && mp->fix_size >= 8)
17813             {
17814               min_mp = mp;
17815               min_address = mp->min_address + fix->fix_size;
17816             }
17817         }
17818     }
17819
17820   /* We need to create a new entry.  */
17821   mp = XNEW (Mnode);
17822   mp->fix_size = fix->fix_size;
17823   mp->mode = fix->mode;
17824   mp->value = fix->value;
17825   mp->refcount = 1;
17826   mp->max_address = minipool_barrier->address + 65536;
17827
17828   mp->min_address = min_address;
17829
17830   if (min_mp == NULL)
17831     {
17832       mp->prev = NULL;
17833       mp->next = minipool_vector_head;
17834
17835       if (mp->next == NULL)
17836         {
17837           minipool_vector_tail = mp;
17838           minipool_vector_label = gen_label_rtx ();
17839         }
17840       else
17841         mp->next->prev = mp;
17842
17843       minipool_vector_head = mp;
17844     }
17845   else
17846     {
17847       mp->next = min_mp->next;
17848       mp->prev = min_mp;
17849       min_mp->next = mp;
17850
17851       if (mp->next != NULL)
17852         mp->next->prev = mp;
17853       else
17854         minipool_vector_tail = mp;
17855     }
17856
17857   /* Save the new entry.  */
17858   min_mp = mp;
17859
17860   if (mp->prev)
17861     mp = mp->prev;
17862   else
17863     mp->offset = 0;
17864
17865   /* Scan over the following entries and adjust their offsets.  */
17866   while (mp->next != NULL)
17867     {
17868       if (mp->next->min_address < mp->min_address + mp->fix_size)
17869         mp->next->min_address = mp->min_address + mp->fix_size;
17870
17871       if (mp->refcount)
17872         mp->next->offset = mp->offset + mp->fix_size;
17873       else
17874         mp->next->offset = mp->offset;
17875
17876       mp = mp->next;
17877     }
17878
17879   return min_mp;
17880 }
17881
17882 static void
17883 assign_minipool_offsets (Mfix *barrier)
17884 {
17885   HOST_WIDE_INT offset = 0;
17886   Mnode *mp;
17887
17888   minipool_barrier = barrier;
17889
17890   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17891     {
17892       mp->offset = offset;
17893
17894       if (mp->refcount > 0)
17895         offset += mp->fix_size;
17896     }
17897 }
17898
17899 /* Output the literal table */
17900 static void
17901 dump_minipool (rtx_insn *scan)
17902 {
17903   Mnode * mp;
17904   Mnode * nmp;
17905   int align64 = 0;
17906
17907   if (ARM_DOUBLEWORD_ALIGN)
17908     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17909       if (mp->refcount > 0 && mp->fix_size >= 8)
17910         {
17911           align64 = 1;
17912           break;
17913         }
17914
17915   if (dump_file)
17916     fprintf (dump_file,
17917              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17918              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17919
17920   scan = emit_label_after (gen_label_rtx (), scan);
17921   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17922   scan = emit_label_after (minipool_vector_label, scan);
17923
17924   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17925     {
17926       if (mp->refcount > 0)
17927         {
17928           if (dump_file)
17929             {
17930               fprintf (dump_file,
17931                        ";;  Offset %u, min %ld, max %ld ",
17932                        (unsigned) mp->offset, (unsigned long) mp->min_address,
17933                        (unsigned long) mp->max_address);
17934               arm_print_value (dump_file, mp->value);
17935               fputc ('\n', dump_file);
17936             }
17937
17938           rtx val = copy_rtx (mp->value);
17939
17940           switch (GET_MODE_SIZE (mp->mode))
17941             {
17942 #ifdef HAVE_consttable_1
17943             case 1:
17944               scan = emit_insn_after (gen_consttable_1 (val), scan);
17945               break;
17946
17947 #endif
17948 #ifdef HAVE_consttable_2
17949             case 2:
17950               scan = emit_insn_after (gen_consttable_2 (val), scan);
17951               break;
17952
17953 #endif
17954 #ifdef HAVE_consttable_4
17955             case 4:
17956               scan = emit_insn_after (gen_consttable_4 (val), scan);
17957               break;
17958
17959 #endif
17960 #ifdef HAVE_consttable_8
17961             case 8:
17962               scan = emit_insn_after (gen_consttable_8 (val), scan);
17963               break;
17964
17965 #endif
17966 #ifdef HAVE_consttable_16
17967             case 16:
17968               scan = emit_insn_after (gen_consttable_16 (val), scan);
17969               break;
17970
17971 #endif
17972             default:
17973               gcc_unreachable ();
17974             }
17975         }
17976
17977       nmp = mp->next;
17978       free (mp);
17979     }
17980
17981   minipool_vector_head = minipool_vector_tail = NULL;
17982   scan = emit_insn_after (gen_consttable_end (), scan);
17983   scan = emit_barrier_after (scan);
17984 }
17985
17986 /* Return the cost of forcibly inserting a barrier after INSN.  */
17987 static int
17988 arm_barrier_cost (rtx_insn *insn)
17989 {
17990   /* Basing the location of the pool on the loop depth is preferable,
17991      but at the moment, the basic block information seems to be
17992      corrupt by this stage of the compilation.  */
17993   int base_cost = 50;
17994   rtx_insn *next = next_nonnote_insn (insn);
17995
17996   if (next != NULL && LABEL_P (next))
17997     base_cost -= 20;
17998
17999   switch (GET_CODE (insn))
18000     {
18001     case CODE_LABEL:
18002       /* It will always be better to place the table before the label, rather
18003          than after it.  */
18004       return 50;
18005
18006     case INSN:
18007     case CALL_INSN:
18008       return base_cost;
18009
18010     case JUMP_INSN:
18011       return base_cost - 10;
18012
18013     default:
18014       return base_cost + 10;
18015     }
18016 }
18017
18018 /* Find the best place in the insn stream in the range
18019    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18020    Create the barrier by inserting a jump and add a new fix entry for
18021    it.  */
18022 static Mfix *
18023 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18024 {
18025   HOST_WIDE_INT count = 0;
18026   rtx_barrier *barrier;
18027   rtx_insn *from = fix->insn;
18028   /* The instruction after which we will insert the jump.  */
18029   rtx_insn *selected = NULL;
18030   int selected_cost;
18031   /* The address at which the jump instruction will be placed.  */
18032   HOST_WIDE_INT selected_address;
18033   Mfix * new_fix;
18034   HOST_WIDE_INT max_count = max_address - fix->address;
18035   rtx_code_label *label = gen_label_rtx ();
18036
18037   selected_cost = arm_barrier_cost (from);
18038   selected_address = fix->address;
18039
18040   while (from && count < max_count)
18041     {
18042       rtx_jump_table_data *tmp;
18043       int new_cost;
18044
18045       /* This code shouldn't have been called if there was a natural barrier
18046          within range.  */
18047       gcc_assert (!BARRIER_P (from));
18048
18049       /* Count the length of this insn.  This must stay in sync with the
18050          code that pushes minipool fixes.  */
18051       if (LABEL_P (from))
18052         count += get_label_padding (from);
18053       else
18054         count += get_attr_length (from);
18055
18056       /* If there is a jump table, add its length.  */
18057       if (tablejump_p (from, NULL, &tmp))
18058         {
18059           count += get_jump_table_size (tmp);
18060
18061           /* Jump tables aren't in a basic block, so base the cost on
18062              the dispatch insn.  If we select this location, we will
18063              still put the pool after the table.  */
18064           new_cost = arm_barrier_cost (from);
18065
18066           if (count < max_count
18067               && (!selected || new_cost <= selected_cost))
18068             {
18069               selected = tmp;
18070               selected_cost = new_cost;
18071               selected_address = fix->address + count;
18072             }
18073
18074           /* Continue after the dispatch table.  */
18075           from = NEXT_INSN (tmp);
18076           continue;
18077         }
18078
18079       new_cost = arm_barrier_cost (from);
18080
18081       if (count < max_count
18082           && (!selected || new_cost <= selected_cost))
18083         {
18084           selected = from;
18085           selected_cost = new_cost;
18086           selected_address = fix->address + count;
18087         }
18088
18089       from = NEXT_INSN (from);
18090     }
18091
18092   /* Make sure that we found a place to insert the jump.  */
18093   gcc_assert (selected);
18094
18095   /* Create a new JUMP_INSN that branches around a barrier.  */
18096   from = emit_jump_insn_after (gen_jump (label), selected);
18097   JUMP_LABEL (from) = label;
18098   barrier = emit_barrier_after (from);
18099   emit_label_after (label, barrier);
18100
18101   /* Create a minipool barrier entry for the new barrier.  */
18102   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18103   new_fix->insn = barrier;
18104   new_fix->address = selected_address;
18105   new_fix->next = fix->next;
18106   fix->next = new_fix;
18107
18108   return new_fix;
18109 }
18110
18111 /* Record that there is a natural barrier in the insn stream at
18112    ADDRESS.  */
18113 static void
18114 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18115 {
18116   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18117
18118   fix->insn = insn;
18119   fix->address = address;
18120
18121   fix->next = NULL;
18122   if (minipool_fix_head != NULL)
18123     minipool_fix_tail->next = fix;
18124   else
18125     minipool_fix_head = fix;
18126
18127   minipool_fix_tail = fix;
18128 }
18129
18130 /* Record INSN, which will need fixing up to load a value from the
18131    minipool.  ADDRESS is the offset of the insn since the start of the
18132    function; LOC is a pointer to the part of the insn which requires
18133    fixing; VALUE is the constant that must be loaded, which is of type
18134    MODE.  */
18135 static void
18136 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18137                    machine_mode mode, rtx value)
18138 {
18139   gcc_assert (!arm_disable_literal_pool);
18140   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18141
18142   fix->insn = insn;
18143   fix->address = address;
18144   fix->loc = loc;
18145   fix->mode = mode;
18146   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18147   fix->value = value;
18148   fix->forwards = get_attr_pool_range (insn);
18149   fix->backwards = get_attr_neg_pool_range (insn);
18150   fix->minipool = NULL;
18151
18152   /* If an insn doesn't have a range defined for it, then it isn't
18153      expecting to be reworked by this code.  Better to stop now than
18154      to generate duff assembly code.  */
18155   gcc_assert (fix->forwards || fix->backwards);
18156
18157   /* If an entry requires 8-byte alignment then assume all constant pools
18158      require 4 bytes of padding.  Trying to do this later on a per-pool
18159      basis is awkward because existing pool entries have to be modified.  */
18160   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18161     minipool_pad = 4;
18162
18163   if (dump_file)
18164     {
18165       fprintf (dump_file,
18166                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18167                GET_MODE_NAME (mode),
18168                INSN_UID (insn), (unsigned long) address,
18169                -1 * (long)fix->backwards, (long)fix->forwards);
18170       arm_print_value (dump_file, fix->value);
18171       fprintf (dump_file, "\n");
18172     }
18173
18174   /* Add it to the chain of fixes.  */
18175   fix->next = NULL;
18176
18177   if (minipool_fix_head != NULL)
18178     minipool_fix_tail->next = fix;
18179   else
18180     minipool_fix_head = fix;
18181
18182   minipool_fix_tail = fix;
18183 }
18184
18185 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18186    Returns the number of insns needed, or 99 if we always want to synthesize
18187    the value.  */
18188 int
18189 arm_max_const_double_inline_cost ()
18190 {
18191   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18192 }
18193
18194 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18195    Returns the number of insns needed, or 99 if we don't know how to
18196    do it.  */
18197 int
18198 arm_const_double_inline_cost (rtx val)
18199 {
18200   rtx lowpart, highpart;
18201   machine_mode mode;
18202
18203   mode = GET_MODE (val);
18204
18205   if (mode == VOIDmode)
18206     mode = DImode;
18207
18208   gcc_assert (GET_MODE_SIZE (mode) == 8);
18209
18210   lowpart = gen_lowpart (SImode, val);
18211   highpart = gen_highpart_mode (SImode, mode, val);
18212
18213   gcc_assert (CONST_INT_P (lowpart));
18214   gcc_assert (CONST_INT_P (highpart));
18215
18216   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18217                             NULL_RTX, NULL_RTX, 0, 0)
18218           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18219                               NULL_RTX, NULL_RTX, 0, 0));
18220 }
18221
18222 /* Cost of loading a SImode constant.  */
18223 static inline int
18224 arm_const_inline_cost (enum rtx_code code, rtx val)
18225 {
18226   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18227                            NULL_RTX, NULL_RTX, 1, 0);
18228 }
18229
18230 /* Return true if it is worthwhile to split a 64-bit constant into two
18231    32-bit operations.  This is the case if optimizing for size, or
18232    if we have load delay slots, or if one 32-bit part can be done with
18233    a single data operation.  */
18234 bool
18235 arm_const_double_by_parts (rtx val)
18236 {
18237   machine_mode mode = GET_MODE (val);
18238   rtx part;
18239
18240   if (optimize_size || arm_ld_sched)
18241     return true;
18242
18243   if (mode == VOIDmode)
18244     mode = DImode;
18245
18246   part = gen_highpart_mode (SImode, mode, val);
18247
18248   gcc_assert (CONST_INT_P (part));
18249
18250   if (const_ok_for_arm (INTVAL (part))
18251       || const_ok_for_arm (~INTVAL (part)))
18252     return true;
18253
18254   part = gen_lowpart (SImode, val);
18255
18256   gcc_assert (CONST_INT_P (part));
18257
18258   if (const_ok_for_arm (INTVAL (part))
18259       || const_ok_for_arm (~INTVAL (part)))
18260     return true;
18261
18262   return false;
18263 }
18264
18265 /* Return true if it is possible to inline both the high and low parts
18266    of a 64-bit constant into 32-bit data processing instructions.  */
18267 bool
18268 arm_const_double_by_immediates (rtx val)
18269 {
18270   machine_mode mode = GET_MODE (val);
18271   rtx part;
18272
18273   if (mode == VOIDmode)
18274     mode = DImode;
18275
18276   part = gen_highpart_mode (SImode, mode, val);
18277
18278   gcc_assert (CONST_INT_P (part));
18279
18280   if (!const_ok_for_arm (INTVAL (part)))
18281     return false;
18282
18283   part = gen_lowpart (SImode, val);
18284
18285   gcc_assert (CONST_INT_P (part));
18286
18287   if (!const_ok_for_arm (INTVAL (part)))
18288     return false;
18289
18290   return true;
18291 }
18292
18293 /* Scan INSN and note any of its operands that need fixing.
18294    If DO_PUSHES is false we do not actually push any of the fixups
18295    needed.  */
18296 static void
18297 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18298 {
18299   int opno;
18300
18301   extract_constrain_insn (insn);
18302
18303   if (recog_data.n_alternatives == 0)
18304     return;
18305
18306   /* Fill in recog_op_alt with information about the constraints of
18307      this insn.  */
18308   preprocess_constraints (insn);
18309
18310   const operand_alternative *op_alt = which_op_alt ();
18311   for (opno = 0; opno < recog_data.n_operands; opno++)
18312     {
18313       /* Things we need to fix can only occur in inputs.  */
18314       if (recog_data.operand_type[opno] != OP_IN)
18315         continue;
18316
18317       /* If this alternative is a memory reference, then any mention
18318          of constants in this alternative is really to fool reload
18319          into allowing us to accept one there.  We need to fix them up
18320          now so that we output the right code.  */
18321       if (op_alt[opno].memory_ok)
18322         {
18323           rtx op = recog_data.operand[opno];
18324
18325           if (CONSTANT_P (op))
18326             {
18327               if (do_pushes)
18328                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18329                                    recog_data.operand_mode[opno], op);
18330             }
18331           else if (MEM_P (op)
18332                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18333                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18334             {
18335               if (do_pushes)
18336                 {
18337                   rtx cop = avoid_constant_pool_reference (op);
18338
18339                   /* Casting the address of something to a mode narrower
18340                      than a word can cause avoid_constant_pool_reference()
18341                      to return the pool reference itself.  That's no good to
18342                      us here.  Lets just hope that we can use the
18343                      constant pool value directly.  */
18344                   if (op == cop)
18345                     cop = get_pool_constant (XEXP (op, 0));
18346
18347                   push_minipool_fix (insn, address,
18348                                      recog_data.operand_loc[opno],
18349                                      recog_data.operand_mode[opno], cop);
18350                 }
18351
18352             }
18353         }
18354     }
18355
18356   return;
18357 }
18358
18359 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18360    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18361    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18362    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18363    or four masks, depending on whether it is being computed for a
18364    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18365    respectively.  The tree for the type of the argument or a field within an
18366    argument is passed in ARG_TYPE, the current register this argument or field
18367    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18368    argument or field starts at is passed in STARTING_BIT and the last used bit
18369    is kept in LAST_USED_BIT which is also updated accordingly.  */
18370
18371 static unsigned HOST_WIDE_INT
18372 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18373                                uint32_t * padding_bits_to_clear,
18374                                unsigned starting_bit, int * last_used_bit)
18375
18376 {
18377   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18378
18379   if (TREE_CODE (arg_type) == RECORD_TYPE)
18380     {
18381       unsigned current_bit = starting_bit;
18382       tree field;
18383       long int offset, size;
18384
18385
18386       field = TYPE_FIELDS (arg_type);
18387       while (field)
18388         {
18389           /* The offset within a structure is always an offset from
18390              the start of that structure.  Make sure we take that into the
18391              calculation of the register based offset that we use here.  */
18392           offset = starting_bit;
18393           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18394           offset %= 32;
18395
18396           /* This is the actual size of the field, for bitfields this is the
18397              bitfield width and not the container size.  */
18398           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18399
18400           if (*last_used_bit != offset)
18401             {
18402               if (offset < *last_used_bit)
18403                 {
18404                   /* This field's offset is before the 'last_used_bit', that
18405                      means this field goes on the next register.  So we need to
18406                      pad the rest of the current register and increase the
18407                      register number.  */
18408                   uint32_t mask;
18409                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18410                   mask++;
18411
18412                   padding_bits_to_clear[*regno] |= mask;
18413                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18414                   (*regno)++;
18415                 }
18416               else
18417                 {
18418                   /* Otherwise we pad the bits between the last field's end and
18419                      the start of the new field.  */
18420                   uint32_t mask;
18421
18422                   mask = ((uint32_t)-1) >> (32 - offset);
18423                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18424                   padding_bits_to_clear[*regno] |= mask;
18425                 }
18426               current_bit = offset;
18427             }
18428
18429           /* Calculate further padding bits for inner structs/unions too.  */
18430           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18431             {
18432               *last_used_bit = current_bit;
18433               not_to_clear_reg_mask
18434                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18435                                                   padding_bits_to_clear, offset,
18436                                                   last_used_bit);
18437             }
18438           else
18439             {
18440               /* Update 'current_bit' with this field's size.  If the
18441                  'current_bit' lies in a subsequent register, update 'regno' and
18442                  reset 'current_bit' to point to the current bit in that new
18443                  register.  */
18444               current_bit += size;
18445               while (current_bit >= 32)
18446                 {
18447                   current_bit-=32;
18448                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18449                   (*regno)++;
18450                 }
18451               *last_used_bit = current_bit;
18452             }
18453
18454           field = TREE_CHAIN (field);
18455         }
18456       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18457     }
18458   else if (TREE_CODE (arg_type) == UNION_TYPE)
18459     {
18460       tree field, field_t;
18461       int i, regno_t, field_size;
18462       int max_reg = -1;
18463       int max_bit = -1;
18464       uint32_t mask;
18465       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18466         = {-1, -1, -1, -1};
18467
18468       /* To compute the padding bits in a union we only consider bits as
18469          padding bits if they are always either a padding bit or fall outside a
18470          fields size for all fields in the union.  */
18471       field = TYPE_FIELDS (arg_type);
18472       while (field)
18473         {
18474           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18475             = {0U, 0U, 0U, 0U};
18476           int last_used_bit_t = *last_used_bit;
18477           regno_t = *regno;
18478           field_t = TREE_TYPE (field);
18479
18480           /* If the field's type is either a record or a union make sure to
18481              compute their padding bits too.  */
18482           if (RECORD_OR_UNION_TYPE_P (field_t))
18483             not_to_clear_reg_mask
18484               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18485                                                 &padding_bits_to_clear_t[0],
18486                                                 starting_bit, &last_used_bit_t);
18487           else
18488             {
18489               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18490               regno_t = (field_size / 32) + *regno;
18491               last_used_bit_t = (starting_bit + field_size) % 32;
18492             }
18493
18494           for (i = *regno; i < regno_t; i++)
18495             {
18496               /* For all but the last register used by this field only keep the
18497                  padding bits that were padding bits in this field.  */
18498               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18499             }
18500
18501             /* For the last register, keep all padding bits that were padding
18502                bits in this field and any padding bits that are still valid
18503                as padding bits but fall outside of this field's size.  */
18504             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18505             padding_bits_to_clear_res[regno_t]
18506               &= padding_bits_to_clear_t[regno_t] | mask;
18507
18508           /* Update the maximum size of the fields in terms of registers used
18509              ('max_reg') and the 'last_used_bit' in said register.  */
18510           if (max_reg < regno_t)
18511             {
18512               max_reg = regno_t;
18513               max_bit = last_used_bit_t;
18514             }
18515           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18516             max_bit = last_used_bit_t;
18517
18518           field = TREE_CHAIN (field);
18519         }
18520
18521       /* Update the current padding_bits_to_clear using the intersection of the
18522          padding bits of all the fields.  */
18523       for (i=*regno; i < max_reg; i++)
18524         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18525
18526       /* Do not keep trailing padding bits, we do not know yet whether this
18527          is the end of the argument.  */
18528       mask = ((uint32_t) 1 << max_bit) - 1;
18529       padding_bits_to_clear[max_reg]
18530         |= padding_bits_to_clear_res[max_reg] & mask;
18531
18532       *regno = max_reg;
18533       *last_used_bit = max_bit;
18534     }
18535   else
18536     /* This function should only be used for structs and unions.  */
18537     gcc_unreachable ();
18538
18539   return not_to_clear_reg_mask;
18540 }
18541
18542 /* In the context of ARMv8-M Security Extensions, this function is used for both
18543    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18544    registers are used when returning or passing arguments, which is then
18545    returned as a mask.  It will also compute a mask to indicate padding/unused
18546    bits for each of these registers, and passes this through the
18547    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18548    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18549    the starting register used to pass this argument or return value is passed
18550    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18551    for struct and union types.  */
18552
18553 static unsigned HOST_WIDE_INT
18554 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18555                              uint32_t * padding_bits_to_clear)
18556
18557 {
18558   int last_used_bit = 0;
18559   unsigned HOST_WIDE_INT not_to_clear_mask;
18560
18561   if (RECORD_OR_UNION_TYPE_P (arg_type))
18562     {
18563       not_to_clear_mask
18564         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18565                                          padding_bits_to_clear, 0,
18566                                          &last_used_bit);
18567
18568
18569       /* If the 'last_used_bit' is not zero, that means we are still using a
18570          part of the last 'regno'.  In such cases we must clear the trailing
18571          bits.  Otherwise we are not using regno and we should mark it as to
18572          clear.  */
18573       if (last_used_bit != 0)
18574         padding_bits_to_clear[regno]
18575           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18576       else
18577         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18578     }
18579   else
18580     {
18581       not_to_clear_mask = 0;
18582       /* We are not dealing with structs nor unions.  So these arguments may be
18583          passed in floating point registers too.  In some cases a BLKmode is
18584          used when returning or passing arguments in multiple VFP registers.  */
18585       if (GET_MODE (arg_rtx) == BLKmode)
18586         {
18587           int i, arg_regs;
18588           rtx reg;
18589
18590           /* This should really only occur when dealing with the hard-float
18591              ABI.  */
18592           gcc_assert (TARGET_HARD_FLOAT_ABI);
18593
18594           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18595             {
18596               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18597               gcc_assert (REG_P (reg));
18598
18599               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18600
18601               /* If we are dealing with DF mode, make sure we don't
18602                  clear either of the registers it addresses.  */
18603               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18604               if (arg_regs > 1)
18605                 {
18606                   unsigned HOST_WIDE_INT mask;
18607                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18608                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18609                   not_to_clear_mask |= mask;
18610                 }
18611             }
18612         }
18613       else
18614         {
18615           /* Otherwise we can rely on the MODE to determine how many registers
18616              are being used by this argument.  */
18617           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18618           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18619           if (arg_regs > 1)
18620             {
18621               unsigned HOST_WIDE_INT
18622               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18623               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18624               not_to_clear_mask |= mask;
18625             }
18626         }
18627     }
18628
18629   return not_to_clear_mask;
18630 }
18631
18632 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18633    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18634    are to be fully cleared, using the value in register CLEARING_REG if more
18635    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18636    the bits that needs to be cleared in caller-saved core registers, with
18637    SCRATCH_REG used as a scratch register for that clearing.
18638
18639    NOTE: one of three following assertions must hold:
18640    - SCRATCH_REG is a low register
18641    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18642      in TO_CLEAR_BITMAP)
18643    - CLEARING_REG is a low register.  */
18644
18645 static void
18646 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18647                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18648 {
18649   bool saved_clearing = false;
18650   rtx saved_clearing_reg = NULL_RTX;
18651   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18652
18653   gcc_assert (arm_arch_cmse);
18654
18655   if (!bitmap_empty_p (to_clear_bitmap))
18656     {
18657       minregno = bitmap_first_set_bit (to_clear_bitmap);
18658       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18659     }
18660   clearing_regno = REGNO (clearing_reg);
18661
18662   /* Clear padding bits.  */
18663   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18664   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18665     {
18666       uint64_t mask;
18667       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18668
18669       if (padding_bits_to_clear[i] == 0)
18670         continue;
18671
18672       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18673          CLEARING_REG as scratch.  */
18674       if (TARGET_THUMB1
18675           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18676         {
18677           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18678              such that we can use clearing_reg to clear the unused bits in the
18679              arguments.  */
18680           if ((clearing_regno > maxregno
18681                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18682               && !saved_clearing)
18683             {
18684               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18685               emit_move_insn (scratch_reg, clearing_reg);
18686               saved_clearing = true;
18687               saved_clearing_reg = scratch_reg;
18688             }
18689           scratch_reg = clearing_reg;
18690         }
18691
18692       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18693       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18694       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18695
18696       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18697       mask = (~padding_bits_to_clear[i]) >> 16;
18698       rtx16 = gen_int_mode (16, SImode);
18699       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18700       if (mask)
18701         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18702
18703       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18704     }
18705   if (saved_clearing)
18706     emit_move_insn (clearing_reg, saved_clearing_reg);
18707
18708
18709   /* Clear full registers.  */
18710
18711   if (TARGET_HAVE_FPCXT_CMSE)
18712     {
18713       rtvec vunspec_vec;
18714       int i, j, k, nb_regs;
18715       rtx use_seq, par, reg, set, vunspec;
18716       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18717       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18718       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18719
18720       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18721         {
18722           /* Find next register to clear and exit if none.  */
18723           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18724           if (i > maxregno)
18725             break;
18726
18727           /* Compute number of consecutive registers to clear.  */
18728           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18729                j++);
18730           nb_regs = j - i;
18731
18732           /* Create VSCCLRM RTX pattern.  */
18733           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18734           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18735           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18736                                              VUNSPEC_VSCCLRM_VPR);
18737           XVECEXP (par, 0, 0) = vunspec;
18738
18739           /* Insert VFP register clearing RTX in the pattern.  */
18740           start_sequence ();
18741           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18742             {
18743               if (!bitmap_bit_p (to_clear_bitmap, j))
18744                 continue;
18745
18746               reg = gen_rtx_REG (SFmode, j);
18747               set = gen_rtx_SET (reg, const0_rtx);
18748               XVECEXP (par, 0, k++) = set;
18749               emit_use (reg);
18750             }
18751           use_seq = get_insns ();
18752           end_sequence ();
18753
18754           emit_insn_after (use_seq, emit_insn (par));
18755         }
18756
18757       /* Get set of core registers to clear.  */
18758       bitmap_clear (core_regs_bitmap);
18759       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18760                         IP_REGNUM - R0_REGNUM + 1);
18761       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18762                   core_regs_bitmap);
18763       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18764
18765       if (bitmap_empty_p (to_clear_core_bitmap))
18766         return;
18767
18768       /* Create clrm RTX pattern.  */
18769       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18770       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18771
18772       /* Insert core register clearing RTX in the pattern.  */
18773       start_sequence ();
18774       for (j = 0, i = minregno; j < nb_regs; i++)
18775         {
18776           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18777             continue;
18778
18779           reg = gen_rtx_REG (SImode, i);
18780           set = gen_rtx_SET (reg, const0_rtx);
18781           XVECEXP (par, 0, j++) = set;
18782           emit_use (reg);
18783         }
18784
18785       /* Insert APSR register clearing RTX in the pattern
18786        * along with clobbering CC.  */
18787       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18788       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18789                                          VUNSPEC_CLRM_APSR);
18790
18791       XVECEXP (par, 0, j++) = vunspec;
18792
18793       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18794       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18795       XVECEXP (par, 0, j) = clobber;
18796
18797       use_seq = get_insns ();
18798       end_sequence ();
18799
18800       emit_insn_after (use_seq, emit_insn (par));
18801     }
18802   else
18803     {
18804       /* If not marked for clearing, clearing_reg already does not contain
18805          any secret.  */
18806       if (clearing_regno <= maxregno
18807           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18808         {
18809           emit_move_insn (clearing_reg, const0_rtx);
18810           emit_use (clearing_reg);
18811           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18812         }
18813
18814       for (regno = minregno; regno <= maxregno; regno++)
18815         {
18816           if (!bitmap_bit_p (to_clear_bitmap, regno))
18817             continue;
18818
18819           if (IS_VFP_REGNUM (regno))
18820             {
18821               /* If regno is an even vfp register and its successor is also to
18822                  be cleared, use vmov.  */
18823               if (TARGET_VFP_DOUBLE
18824                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18825                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18826                 {
18827                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18828                                   CONST1_RTX (DFmode));
18829                   emit_use (gen_rtx_REG (DFmode, regno));
18830                   regno++;
18831                 }
18832               else
18833                 {
18834                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18835                                   CONST1_RTX (SFmode));
18836                   emit_use (gen_rtx_REG (SFmode, regno));
18837                 }
18838             }
18839           else
18840             {
18841               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18842               emit_use (gen_rtx_REG (SImode, regno));
18843             }
18844         }
18845     }
18846 }
18847
18848 /* Clear core and caller-saved VFP registers not used to pass arguments before
18849    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18850    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18851    libgcc/config/arm/cmse_nonsecure_call.S.  */
18852
18853 static void
18854 cmse_nonsecure_call_inline_register_clear (void)
18855 {
18856   basic_block bb;
18857
18858   FOR_EACH_BB_FN (bb, cfun)
18859     {
18860       rtx_insn *insn;
18861
18862       FOR_BB_INSNS (bb, insn)
18863         {
18864           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18865           /* frame = VFP regs + FPSCR + VPR.  */
18866           unsigned lazy_store_stack_frame_size
18867             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18868           unsigned long callee_saved_mask
18869             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18870             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18871           unsigned address_regnum, regno;
18872           unsigned max_int_regno
18873             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18874           unsigned max_fp_regno
18875             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18876           unsigned maxregno
18877             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18878           auto_sbitmap to_clear_bitmap (maxregno + 1);
18879           rtx_insn *seq;
18880           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18881           rtx address;
18882           CUMULATIVE_ARGS args_so_far_v;
18883           cumulative_args_t args_so_far;
18884           tree arg_type, fntype;
18885           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18886           function_args_iterator args_iter;
18887           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18888
18889           if (!NONDEBUG_INSN_P (insn))
18890             continue;
18891
18892           if (!CALL_P (insn))
18893             continue;
18894
18895           pat = PATTERN (insn);
18896           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18897           call = XVECEXP (pat, 0, 0);
18898
18899           /* Get the real call RTX if the insn sets a value, ie. returns.  */
18900           if (GET_CODE (call) == SET)
18901               call = SET_SRC (call);
18902
18903           /* Check if it is a cmse_nonsecure_call.  */
18904           unspec = XEXP (call, 0);
18905           if (GET_CODE (unspec) != UNSPEC
18906               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18907             continue;
18908
18909           /* Mark registers that needs to be cleared.  Those that holds a
18910              parameter are removed from the set further below.  */
18911           bitmap_clear (to_clear_bitmap);
18912           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18913                             max_int_regno - R0_REGNUM + 1);
18914
18915           /* Only look at the caller-saved floating point registers in case of
18916              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18917              lazy store and loads which clear both caller- and callee-saved
18918              registers.  */
18919           if (!lazy_fpclear)
18920             {
18921               auto_sbitmap float_bitmap (maxregno + 1);
18922
18923               bitmap_clear (float_bitmap);
18924               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18925                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
18926               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18927             }
18928
18929           /* Make sure the register used to hold the function address is not
18930              cleared.  */
18931           address = RTVEC_ELT (XVEC (unspec, 0), 0);
18932           gcc_assert (MEM_P (address));
18933           gcc_assert (REG_P (XEXP (address, 0)));
18934           address_regnum = REGNO (XEXP (address, 0));
18935           if (address_regnum <= max_int_regno)
18936             bitmap_clear_bit (to_clear_bitmap, address_regnum);
18937
18938           /* Set basic block of call insn so that df rescan is performed on
18939              insns inserted here.  */
18940           set_block_for_insn (insn, bb);
18941           df_set_flags (DF_DEFER_INSN_RESCAN);
18942           start_sequence ();
18943
18944           /* Make sure the scheduler doesn't schedule other insns beyond
18945              here.  */
18946           emit_insn (gen_blockage ());
18947
18948           /* Walk through all arguments and clear registers appropriately.
18949           */
18950           fntype = TREE_TYPE (MEM_EXPR (address));
18951           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18952                                     NULL_TREE);
18953           args_so_far = pack_cumulative_args (&args_so_far_v);
18954           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18955             {
18956               rtx arg_rtx;
18957               uint64_t to_clear_args_mask;
18958
18959               if (VOID_TYPE_P (arg_type))
18960                 continue;
18961
18962               function_arg_info arg (arg_type, /*named=*/true);
18963               if (!first_param)
18964                 /* ??? We should advance after processing the argument and pass
18965                    the argument we're advancing past.  */
18966                 arm_function_arg_advance (args_so_far, arg);
18967
18968               arg_rtx = arm_function_arg (args_so_far, arg);
18969               gcc_assert (REG_P (arg_rtx));
18970               to_clear_args_mask
18971                 = compute_not_to_clear_mask (arg_type, arg_rtx,
18972                                              REGNO (arg_rtx),
18973                                              &padding_bits_to_clear[0]);
18974               if (to_clear_args_mask)
18975                 {
18976                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
18977                     {
18978                       if (to_clear_args_mask & (1ULL << regno))
18979                         bitmap_clear_bit (to_clear_bitmap, regno);
18980                     }
18981                 }
18982
18983               first_param = false;
18984             }
18985
18986           /* We use right shift and left shift to clear the LSB of the address
18987              we jump to instead of using bic, to avoid having to use an extra
18988              register on Thumb-1.  */
18989           clearing_reg = XEXP (address, 0);
18990           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18991           emit_insn (gen_rtx_SET (clearing_reg, shift));
18992           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18993           emit_insn (gen_rtx_SET (clearing_reg, shift));
18994
18995           if (clear_callee_saved)
18996             {
18997               rtx push_insn =
18998                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18999               /* Disable frame debug info in push because it needs to be
19000                  disabled for pop (see below).  */
19001               RTX_FRAME_RELATED_P (push_insn) = 0;
19002
19003               /* Lazy store multiple.  */
19004               if (lazy_fpclear)
19005                 {
19006                   rtx imm;
19007                   rtx_insn *add_insn;
19008
19009                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19010                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19011                                                     stack_pointer_rtx, imm));
19012                   /* If we have the frame pointer, then it will be the
19013                      CFA reg.  Otherwise, the stack pointer is the CFA
19014                      reg, so we need to emit a CFA adjust.  */
19015                   if (!frame_pointer_needed)
19016                     arm_add_cfa_adjust_cfa_note (add_insn,
19017                                                  - lazy_store_stack_frame_size,
19018                                                  stack_pointer_rtx,
19019                                                  stack_pointer_rtx);
19020                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19021                 }
19022               /* Save VFP callee-saved registers.  */
19023               else
19024                 {
19025                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19026                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19027                   /* Disable frame debug info in push because it needs to be
19028                      disabled for vpop (see below).  */
19029                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19030                 }
19031             }
19032
19033           /* Clear caller-saved registers that leak before doing a non-secure
19034              call.  */
19035           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19036           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19037                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19038
19039           seq = get_insns ();
19040           end_sequence ();
19041           emit_insn_before (seq, insn);
19042
19043           if (TARGET_HAVE_FPCXT_CMSE)
19044             {
19045               rtx_insn *last, *pop_insn, *after = insn;
19046
19047               start_sequence ();
19048
19049               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19050               if (lazy_fpclear)
19051                 {
19052                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19053                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19054                   rtx_insn *add_insn =
19055                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19056                                            stack_pointer_rtx, imm));
19057                   if (!frame_pointer_needed)
19058                     arm_add_cfa_adjust_cfa_note (add_insn,
19059                                                  lazy_store_stack_frame_size,
19060                                                  stack_pointer_rtx,
19061                                                  stack_pointer_rtx);
19062                 }
19063               /* Restore VFP callee-saved registers.  */
19064               else
19065                 {
19066                   int nb_callee_saved_vfp_regs =
19067                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19068                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19069                                               nb_callee_saved_vfp_regs,
19070                                               stack_pointer_rtx);
19071                   /* Disable frame debug info in vpop because the SP adjustment
19072                      is made using a CFA adjustment note while CFA used is
19073                      sometimes R7.  This then causes an assert failure in the
19074                      CFI note creation code.  */
19075                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19076                 }
19077
19078               arm_emit_multi_reg_pop (callee_saved_mask);
19079               pop_insn = get_last_insn ();
19080
19081               /* Disable frame debug info in pop because they reset the state
19082                  of popped registers to what it was at the beginning of the
19083                  function, before the prologue.  This leads to incorrect state
19084                  when doing the pop after the nonsecure call for registers that
19085                  are pushed both in prologue and before the nonsecure call.
19086
19087                  It also occasionally triggers an assert failure in CFI note
19088                  creation code when there are two codepaths to the epilogue,
19089                  one of which does not go through the nonsecure call.
19090                  Obviously this mean that debugging between the push and pop is
19091                  not reliable.  */
19092               RTX_FRAME_RELATED_P (pop_insn) = 0;
19093
19094               seq = get_insns ();
19095               last = get_last_insn ();
19096               end_sequence ();
19097
19098               emit_insn_after (seq, after);
19099
19100               /* Skip pop we have just inserted after nonsecure call, we know
19101                  it does not contain a nonsecure call.  */
19102               insn = last;
19103             }
19104         }
19105     }
19106 }
19107
19108 /* Rewrite move insn into subtract of 0 if the condition codes will
19109    be useful in next conditional jump insn.  */
19110
19111 static void
19112 thumb1_reorg (void)
19113 {
19114   basic_block bb;
19115
19116   FOR_EACH_BB_FN (bb, cfun)
19117     {
19118       rtx dest, src;
19119       rtx cmp, op0, op1, set = NULL;
19120       rtx_insn *prev, *insn = BB_END (bb);
19121       bool insn_clobbered = false;
19122
19123       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19124         insn = PREV_INSN (insn);
19125
19126       /* Find the last cbranchsi4_insn in basic block BB.  */
19127       if (insn == BB_HEAD (bb)
19128           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19129         continue;
19130
19131       /* Get the register with which we are comparing.  */
19132       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19133       op0 = XEXP (cmp, 0);
19134       op1 = XEXP (cmp, 1);
19135
19136       /* Check that comparison is against ZERO.  */
19137       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19138         continue;
19139
19140       /* Find the first flag setting insn before INSN in basic block BB.  */
19141       gcc_assert (insn != BB_HEAD (bb));
19142       for (prev = PREV_INSN (insn);
19143            (!insn_clobbered
19144             && prev != BB_HEAD (bb)
19145             && (NOTE_P (prev)
19146                 || DEBUG_INSN_P (prev)
19147                 || ((set = single_set (prev)) != NULL
19148                     && get_attr_conds (prev) == CONDS_NOCOND)));
19149            prev = PREV_INSN (prev))
19150         {
19151           if (reg_set_p (op0, prev))
19152             insn_clobbered = true;
19153         }
19154
19155       /* Skip if op0 is clobbered by insn other than prev. */
19156       if (insn_clobbered)
19157         continue;
19158
19159       if (!set)
19160         continue;
19161
19162       dest = SET_DEST (set);
19163       src = SET_SRC (set);
19164       if (!low_register_operand (dest, SImode)
19165           || !low_register_operand (src, SImode))
19166         continue;
19167
19168       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19169          in INSN.  Both src and dest of the move insn are checked.  */
19170       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19171         {
19172           dest = copy_rtx (dest);
19173           src = copy_rtx (src);
19174           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19175           PATTERN (prev) = gen_rtx_SET (dest, src);
19176           INSN_CODE (prev) = -1;
19177           /* Set test register in INSN to dest.  */
19178           XEXP (cmp, 0) = copy_rtx (dest);
19179           INSN_CODE (insn) = -1;
19180         }
19181     }
19182 }
19183
19184 /* Convert instructions to their cc-clobbering variant if possible, since
19185    that allows us to use smaller encodings.  */
19186
19187 static void
19188 thumb2_reorg (void)
19189 {
19190   basic_block bb;
19191   regset_head live;
19192
19193   INIT_REG_SET (&live);
19194
19195   /* We are freeing block_for_insn in the toplev to keep compatibility
19196      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19197   compute_bb_for_insn ();
19198   df_analyze ();
19199
19200   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19201
19202   FOR_EACH_BB_FN (bb, cfun)
19203     {
19204       if ((current_tune->disparage_flag_setting_t16_encodings
19205            == tune_params::DISPARAGE_FLAGS_ALL)
19206           && optimize_bb_for_speed_p (bb))
19207         continue;
19208
19209       rtx_insn *insn;
19210       Convert_Action action = SKIP;
19211       Convert_Action action_for_partial_flag_setting
19212         = ((current_tune->disparage_flag_setting_t16_encodings
19213             != tune_params::DISPARAGE_FLAGS_NEITHER)
19214            && optimize_bb_for_speed_p (bb))
19215           ? SKIP : CONV;
19216
19217       COPY_REG_SET (&live, DF_LR_OUT (bb));
19218       df_simulate_initialize_backwards (bb, &live);
19219       FOR_BB_INSNS_REVERSE (bb, insn)
19220         {
19221           if (NONJUMP_INSN_P (insn)
19222               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19223               && GET_CODE (PATTERN (insn)) == SET)
19224             {
19225               action = SKIP;
19226               rtx pat = PATTERN (insn);
19227               rtx dst = XEXP (pat, 0);
19228               rtx src = XEXP (pat, 1);
19229               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19230
19231               if (UNARY_P (src) || BINARY_P (src))
19232                   op0 = XEXP (src, 0);
19233
19234               if (BINARY_P (src))
19235                   op1 = XEXP (src, 1);
19236
19237               if (low_register_operand (dst, SImode))
19238                 {
19239                   switch (GET_CODE (src))
19240                     {
19241                     case PLUS:
19242                       /* Adding two registers and storing the result
19243                          in the first source is already a 16-bit
19244                          operation.  */
19245                       if (rtx_equal_p (dst, op0)
19246                           && register_operand (op1, SImode))
19247                         break;
19248
19249                       if (low_register_operand (op0, SImode))
19250                         {
19251                           /* ADDS <Rd>,<Rn>,<Rm>  */
19252                           if (low_register_operand (op1, SImode))
19253                             action = CONV;
19254                           /* ADDS <Rdn>,#<imm8>  */
19255                           /* SUBS <Rdn>,#<imm8>  */
19256                           else if (rtx_equal_p (dst, op0)
19257                                    && CONST_INT_P (op1)
19258                                    && IN_RANGE (INTVAL (op1), -255, 255))
19259                             action = CONV;
19260                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19261                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19262                           else if (CONST_INT_P (op1)
19263                                    && IN_RANGE (INTVAL (op1), -7, 7))
19264                             action = CONV;
19265                         }
19266                       /* ADCS <Rd>, <Rn>  */
19267                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19268                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19269                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19270                                                        SImode)
19271                               && COMPARISON_P (op1)
19272                               && cc_register (XEXP (op1, 0), VOIDmode)
19273                               && maybe_get_arm_condition_code (op1) == ARM_CS
19274                               && XEXP (op1, 1) == const0_rtx)
19275                         action = CONV;
19276                       break;
19277
19278                     case MINUS:
19279                       /* RSBS <Rd>,<Rn>,#0
19280                          Not handled here: see NEG below.  */
19281                       /* SUBS <Rd>,<Rn>,#<imm3>
19282                          SUBS <Rdn>,#<imm8>
19283                          Not handled here: see PLUS above.  */
19284                       /* SUBS <Rd>,<Rn>,<Rm>  */
19285                       if (low_register_operand (op0, SImode)
19286                           && low_register_operand (op1, SImode))
19287                             action = CONV;
19288                       break;
19289
19290                     case MULT:
19291                       /* MULS <Rdm>,<Rn>,<Rdm>
19292                          As an exception to the rule, this is only used
19293                          when optimizing for size since MULS is slow on all
19294                          known implementations.  We do not even want to use
19295                          MULS in cold code, if optimizing for speed, so we
19296                          test the global flag here.  */
19297                       if (!optimize_size)
19298                         break;
19299                       /* Fall through.  */
19300                     case AND:
19301                     case IOR:
19302                     case XOR:
19303                       /* ANDS <Rdn>,<Rm>  */
19304                       if (rtx_equal_p (dst, op0)
19305                           && low_register_operand (op1, SImode))
19306                         action = action_for_partial_flag_setting;
19307                       else if (rtx_equal_p (dst, op1)
19308                                && low_register_operand (op0, SImode))
19309                         action = action_for_partial_flag_setting == SKIP
19310                                  ? SKIP : SWAP_CONV;
19311                       break;
19312
19313                     case ASHIFTRT:
19314                     case ASHIFT:
19315                     case LSHIFTRT:
19316                       /* ASRS <Rdn>,<Rm> */
19317                       /* LSRS <Rdn>,<Rm> */
19318                       /* LSLS <Rdn>,<Rm> */
19319                       if (rtx_equal_p (dst, op0)
19320                           && low_register_operand (op1, SImode))
19321                         action = action_for_partial_flag_setting;
19322                       /* ASRS <Rd>,<Rm>,#<imm5> */
19323                       /* LSRS <Rd>,<Rm>,#<imm5> */
19324                       /* LSLS <Rd>,<Rm>,#<imm5> */
19325                       else if (low_register_operand (op0, SImode)
19326                                && CONST_INT_P (op1)
19327                                && IN_RANGE (INTVAL (op1), 0, 31))
19328                         action = action_for_partial_flag_setting;
19329                       break;
19330
19331                     case ROTATERT:
19332                       /* RORS <Rdn>,<Rm>  */
19333                       if (rtx_equal_p (dst, op0)
19334                           && low_register_operand (op1, SImode))
19335                         action = action_for_partial_flag_setting;
19336                       break;
19337
19338                     case NOT:
19339                       /* MVNS <Rd>,<Rm>  */
19340                       if (low_register_operand (op0, SImode))
19341                         action = action_for_partial_flag_setting;
19342                       break;
19343
19344                     case NEG:
19345                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19346                       if (low_register_operand (op0, SImode))
19347                         action = CONV;
19348                       break;
19349
19350                     case CONST_INT:
19351                       /* MOVS <Rd>,#<imm8>  */
19352                       if (CONST_INT_P (src)
19353                           && IN_RANGE (INTVAL (src), 0, 255))
19354                         action = action_for_partial_flag_setting;
19355                       break;
19356
19357                     case REG:
19358                       /* MOVS and MOV<c> with registers have different
19359                          encodings, so are not relevant here.  */
19360                       break;
19361
19362                     default:
19363                       break;
19364                     }
19365                 }
19366
19367               if (action != SKIP)
19368                 {
19369                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19370                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19371                   rtvec vec;
19372
19373                   if (action == SWAP_CONV)
19374                     {
19375                       src = copy_rtx (src);
19376                       XEXP (src, 0) = op1;
19377                       XEXP (src, 1) = op0;
19378                       pat = gen_rtx_SET (dst, src);
19379                       vec = gen_rtvec (2, pat, clobber);
19380                     }
19381                   else /* action == CONV */
19382                     vec = gen_rtvec (2, pat, clobber);
19383
19384                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19385                   INSN_CODE (insn) = -1;
19386                 }
19387             }
19388
19389           if (NONDEBUG_INSN_P (insn))
19390             df_simulate_one_insn_backwards (bb, insn, &live);
19391         }
19392     }
19393
19394   CLEAR_REG_SET (&live);
19395 }
19396
19397 /* Gcc puts the pool in the wrong place for ARM, since we can only
19398    load addresses a limited distance around the pc.  We do some
19399    special munging to move the constant pool values to the correct
19400    point in the code.  */
19401 static void
19402 arm_reorg (void)
19403 {
19404   rtx_insn *insn;
19405   HOST_WIDE_INT address = 0;
19406   Mfix * fix;
19407
19408   if (use_cmse)
19409     cmse_nonsecure_call_inline_register_clear ();
19410
19411   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19412   if (cfun->is_thunk)
19413     ;
19414   else if (TARGET_THUMB1)
19415     thumb1_reorg ();
19416   else if (TARGET_THUMB2)
19417     thumb2_reorg ();
19418
19419   /* Ensure all insns that must be split have been split at this point.
19420      Otherwise, the pool placement code below may compute incorrect
19421      insn lengths.  Note that when optimizing, all insns have already
19422      been split at this point.  */
19423   if (!optimize)
19424     split_all_insns_noflow ();
19425
19426   /* Make sure we do not attempt to create a literal pool even though it should
19427      no longer be necessary to create any.  */
19428   if (arm_disable_literal_pool)
19429     return ;
19430
19431   minipool_fix_head = minipool_fix_tail = NULL;
19432
19433   /* The first insn must always be a note, or the code below won't
19434      scan it properly.  */
19435   insn = get_insns ();
19436   gcc_assert (NOTE_P (insn));
19437   minipool_pad = 0;
19438
19439   /* Scan all the insns and record the operands that will need fixing.  */
19440   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19441     {
19442       if (BARRIER_P (insn))
19443         push_minipool_barrier (insn, address);
19444       else if (INSN_P (insn))
19445         {
19446           rtx_jump_table_data *table;
19447
19448           note_invalid_constants (insn, address, true);
19449           address += get_attr_length (insn);
19450
19451           /* If the insn is a vector jump, add the size of the table
19452              and skip the table.  */
19453           if (tablejump_p (insn, NULL, &table))
19454             {
19455               address += get_jump_table_size (table);
19456               insn = table;
19457             }
19458         }
19459       else if (LABEL_P (insn))
19460         /* Add the worst-case padding due to alignment.  We don't add
19461            the _current_ padding because the minipool insertions
19462            themselves might change it.  */
19463         address += get_label_padding (insn);
19464     }
19465
19466   fix = minipool_fix_head;
19467
19468   /* Now scan the fixups and perform the required changes.  */
19469   while (fix)
19470     {
19471       Mfix * ftmp;
19472       Mfix * fdel;
19473       Mfix *  last_added_fix;
19474       Mfix * last_barrier = NULL;
19475       Mfix * this_fix;
19476
19477       /* Skip any further barriers before the next fix.  */
19478       while (fix && BARRIER_P (fix->insn))
19479         fix = fix->next;
19480
19481       /* No more fixes.  */
19482       if (fix == NULL)
19483         break;
19484
19485       last_added_fix = NULL;
19486
19487       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19488         {
19489           if (BARRIER_P (ftmp->insn))
19490             {
19491               if (ftmp->address >= minipool_vector_head->max_address)
19492                 break;
19493
19494               last_barrier = ftmp;
19495             }
19496           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19497             break;
19498
19499           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19500         }
19501
19502       /* If we found a barrier, drop back to that; any fixes that we
19503          could have reached but come after the barrier will now go in
19504          the next mini-pool.  */
19505       if (last_barrier != NULL)
19506         {
19507           /* Reduce the refcount for those fixes that won't go into this
19508              pool after all.  */
19509           for (fdel = last_barrier->next;
19510                fdel && fdel != ftmp;
19511                fdel = fdel->next)
19512             {
19513               fdel->minipool->refcount--;
19514               fdel->minipool = NULL;
19515             }
19516
19517           ftmp = last_barrier;
19518         }
19519       else
19520         {
19521           /* ftmp is first fix that we can't fit into this pool and
19522              there no natural barriers that we could use.  Insert a
19523              new barrier in the code somewhere between the previous
19524              fix and this one, and arrange to jump around it.  */
19525           HOST_WIDE_INT max_address;
19526
19527           /* The last item on the list of fixes must be a barrier, so
19528              we can never run off the end of the list of fixes without
19529              last_barrier being set.  */
19530           gcc_assert (ftmp);
19531
19532           max_address = minipool_vector_head->max_address;
19533           /* Check that there isn't another fix that is in range that
19534              we couldn't fit into this pool because the pool was
19535              already too large: we need to put the pool before such an
19536              instruction.  The pool itself may come just after the
19537              fix because create_fix_barrier also allows space for a
19538              jump instruction.  */
19539           if (ftmp->address < max_address)
19540             max_address = ftmp->address + 1;
19541
19542           last_barrier = create_fix_barrier (last_added_fix, max_address);
19543         }
19544
19545       assign_minipool_offsets (last_barrier);
19546
19547       while (ftmp)
19548         {
19549           if (!BARRIER_P (ftmp->insn)
19550               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19551                   == NULL))
19552             break;
19553
19554           ftmp = ftmp->next;
19555         }
19556
19557       /* Scan over the fixes we have identified for this pool, fixing them
19558          up and adding the constants to the pool itself.  */
19559       for (this_fix = fix; this_fix && ftmp != this_fix;
19560            this_fix = this_fix->next)
19561         if (!BARRIER_P (this_fix->insn))
19562           {
19563             rtx addr
19564               = plus_constant (Pmode,
19565                                gen_rtx_LABEL_REF (VOIDmode,
19566                                                   minipool_vector_label),
19567                                this_fix->minipool->offset);
19568             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19569           }
19570
19571       dump_minipool (last_barrier->insn);
19572       fix = ftmp;
19573     }
19574
19575   /* From now on we must synthesize any constants that we can't handle
19576      directly.  This can happen if the RTL gets split during final
19577      instruction generation.  */
19578   cfun->machine->after_arm_reorg = 1;
19579
19580   /* Free the minipool memory.  */
19581   obstack_free (&minipool_obstack, minipool_startobj);
19582 }
19583 \f
19584 /* Routines to output assembly language.  */
19585
19586 /* Return string representation of passed in real value.  */
19587 static const char *
19588 fp_const_from_val (REAL_VALUE_TYPE *r)
19589 {
19590   if (!fp_consts_inited)
19591     init_fp_table ();
19592
19593   gcc_assert (real_equal (r, &value_fp0));
19594   return "0";
19595 }
19596
19597 /* OPERANDS[0] is the entire list of insns that constitute pop,
19598    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19599    is in the list, UPDATE is true iff the list contains explicit
19600    update of base register.  */
19601 void
19602 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19603                          bool update)
19604 {
19605   int i;
19606   char pattern[100];
19607   int offset;
19608   const char *conditional;
19609   int num_saves = XVECLEN (operands[0], 0);
19610   unsigned int regno;
19611   unsigned int regno_base = REGNO (operands[1]);
19612   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19613
19614   offset = 0;
19615   offset += update ? 1 : 0;
19616   offset += return_pc ? 1 : 0;
19617
19618   /* Is the base register in the list?  */
19619   for (i = offset; i < num_saves; i++)
19620     {
19621       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19622       /* If SP is in the list, then the base register must be SP.  */
19623       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19624       /* If base register is in the list, there must be no explicit update.  */
19625       if (regno == regno_base)
19626         gcc_assert (!update);
19627     }
19628
19629   conditional = reverse ? "%?%D0" : "%?%d0";
19630   /* Can't use POP if returning from an interrupt.  */
19631   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19632     sprintf (pattern, "pop%s\t{", conditional);
19633   else
19634     {
19635       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19636          It's just a convention, their semantics are identical.  */
19637       if (regno_base == SP_REGNUM)
19638         sprintf (pattern, "ldmfd%s\t", conditional);
19639       else if (update)
19640         sprintf (pattern, "ldmia%s\t", conditional);
19641       else
19642         sprintf (pattern, "ldm%s\t", conditional);
19643
19644       strcat (pattern, reg_names[regno_base]);
19645       if (update)
19646         strcat (pattern, "!, {");
19647       else
19648         strcat (pattern, ", {");
19649     }
19650
19651   /* Output the first destination register.  */
19652   strcat (pattern,
19653           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19654
19655   /* Output the rest of the destination registers.  */
19656   for (i = offset + 1; i < num_saves; i++)
19657     {
19658       strcat (pattern, ", ");
19659       strcat (pattern,
19660               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19661     }
19662
19663   strcat (pattern, "}");
19664
19665   if (interrupt_p && return_pc)
19666     strcat (pattern, "^");
19667
19668   output_asm_insn (pattern, &cond);
19669 }
19670
19671
19672 /* Output the assembly for a store multiple.  */
19673
19674 const char *
19675 vfp_output_vstmd (rtx * operands)
19676 {
19677   char pattern[100];
19678   int p;
19679   int base;
19680   int i;
19681   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19682                    ? XEXP (operands[0], 0)
19683                    : XEXP (XEXP (operands[0], 0), 0);
19684   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19685
19686   if (push_p)
19687     strcpy (pattern, "vpush%?.64\t{%P1");
19688   else
19689     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19690
19691   p = strlen (pattern);
19692
19693   gcc_assert (REG_P (operands[1]));
19694
19695   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19696   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19697     {
19698       p += sprintf (&pattern[p], ", d%d", base + i);
19699     }
19700   strcpy (&pattern[p], "}");
19701
19702   output_asm_insn (pattern, operands);
19703   return "";
19704 }
19705
19706
19707 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19708    number of bytes pushed.  */
19709
19710 static int
19711 vfp_emit_fstmd (int base_reg, int count)
19712 {
19713   rtx par;
19714   rtx dwarf;
19715   rtx tmp, reg;
19716   int i;
19717
19718   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19719      register pairs are stored by a store multiple insn.  We avoid this
19720      by pushing an extra pair.  */
19721   if (count == 2 && !arm_arch6)
19722     {
19723       if (base_reg == LAST_VFP_REGNUM - 3)
19724         base_reg -= 2;
19725       count++;
19726     }
19727
19728   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19729      larger stores into multiple parts (up to a maximum of two, in
19730      practice).  */
19731   if (count > 16)
19732     {
19733       int saved;
19734       /* NOTE: base_reg is an internal register number, so each D register
19735          counts as 2.  */
19736       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19737       saved += vfp_emit_fstmd (base_reg, 16);
19738       return saved;
19739     }
19740
19741   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19742   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19743
19744   reg = gen_rtx_REG (DFmode, base_reg);
19745   base_reg += 2;
19746
19747   XVECEXP (par, 0, 0)
19748     = gen_rtx_SET (gen_frame_mem
19749                    (BLKmode,
19750                     gen_rtx_PRE_MODIFY (Pmode,
19751                                         stack_pointer_rtx,
19752                                         plus_constant
19753                                         (Pmode, stack_pointer_rtx,
19754                                          - (count * 8)))
19755                     ),
19756                    gen_rtx_UNSPEC (BLKmode,
19757                                    gen_rtvec (1, reg),
19758                                    UNSPEC_PUSH_MULT));
19759
19760   tmp = gen_rtx_SET (stack_pointer_rtx,
19761                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19762   RTX_FRAME_RELATED_P (tmp) = 1;
19763   XVECEXP (dwarf, 0, 0) = tmp;
19764
19765   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19766   RTX_FRAME_RELATED_P (tmp) = 1;
19767   XVECEXP (dwarf, 0, 1) = tmp;
19768
19769   for (i = 1; i < count; i++)
19770     {
19771       reg = gen_rtx_REG (DFmode, base_reg);
19772       base_reg += 2;
19773       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19774
19775       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19776                                         plus_constant (Pmode,
19777                                                        stack_pointer_rtx,
19778                                                        i * 8)),
19779                          reg);
19780       RTX_FRAME_RELATED_P (tmp) = 1;
19781       XVECEXP (dwarf, 0, i + 1) = tmp;
19782     }
19783
19784   par = emit_insn (par);
19785   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19786   RTX_FRAME_RELATED_P (par) = 1;
19787
19788   return count * 8;
19789 }
19790
19791 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19792    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19793
19794 bool
19795 detect_cmse_nonsecure_call (tree addr)
19796 {
19797   if (!addr)
19798     return FALSE;
19799
19800   tree fntype = TREE_TYPE (addr);
19801   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19802                                     TYPE_ATTRIBUTES (fntype)))
19803     return TRUE;
19804   return FALSE;
19805 }
19806
19807
19808 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19809    the call target.  */
19810
19811 void
19812 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19813 {
19814   rtx insn;
19815
19816   insn = emit_call_insn (pat);
19817
19818   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19819      If the call might use such an entry, add a use of the PIC register
19820      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19821   if (TARGET_VXWORKS_RTP
19822       && flag_pic
19823       && !sibcall
19824       && SYMBOL_REF_P (addr)
19825       && (SYMBOL_REF_DECL (addr)
19826           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19827           : !SYMBOL_REF_LOCAL_P (addr)))
19828     {
19829       require_pic_register (NULL_RTX, false /*compute_now*/);
19830       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19831     }
19832
19833   if (TARGET_FDPIC)
19834     {
19835       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19836       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19837     }
19838
19839   if (TARGET_AAPCS_BASED)
19840     {
19841       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19842          linker.  We need to add an IP clobber to allow setting
19843          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19844          is not needed since it's a fixed register.  */
19845       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19846       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19847     }
19848 }
19849
19850 /* Output a 'call' insn.  */
19851 const char *
19852 output_call (rtx *operands)
19853 {
19854   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19855
19856   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19857   if (REGNO (operands[0]) == LR_REGNUM)
19858     {
19859       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19860       output_asm_insn ("mov%?\t%0, %|lr", operands);
19861     }
19862
19863   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19864
19865   if (TARGET_INTERWORK || arm_arch4t)
19866     output_asm_insn ("bx%?\t%0", operands);
19867   else
19868     output_asm_insn ("mov%?\t%|pc, %0", operands);
19869
19870   return "";
19871 }
19872
19873 /* Output a move from arm registers to arm registers of a long double
19874    OPERANDS[0] is the destination.
19875    OPERANDS[1] is the source.  */
19876 const char *
19877 output_mov_long_double_arm_from_arm (rtx *operands)
19878 {
19879   /* We have to be careful here because the two might overlap.  */
19880   int dest_start = REGNO (operands[0]);
19881   int src_start = REGNO (operands[1]);
19882   rtx ops[2];
19883   int i;
19884
19885   if (dest_start < src_start)
19886     {
19887       for (i = 0; i < 3; i++)
19888         {
19889           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19890           ops[1] = gen_rtx_REG (SImode, src_start + i);
19891           output_asm_insn ("mov%?\t%0, %1", ops);
19892         }
19893     }
19894   else
19895     {
19896       for (i = 2; i >= 0; i--)
19897         {
19898           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19899           ops[1] = gen_rtx_REG (SImode, src_start + i);
19900           output_asm_insn ("mov%?\t%0, %1", ops);
19901         }
19902     }
19903
19904   return "";
19905 }
19906
19907 void
19908 arm_emit_movpair (rtx dest, rtx src)
19909  {
19910   /* If the src is an immediate, simplify it.  */
19911   if (CONST_INT_P (src))
19912     {
19913       HOST_WIDE_INT val = INTVAL (src);
19914       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19915       if ((val >> 16) & 0x0000ffff)
19916         {
19917           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19918                                                GEN_INT (16)),
19919                          GEN_INT ((val >> 16) & 0x0000ffff));
19920           rtx_insn *insn = get_last_insn ();
19921           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19922         }
19923       return;
19924     }
19925    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19926    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19927    rtx_insn *insn = get_last_insn ();
19928    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19929  }
19930
19931 /* Output a move between double words.  It must be REG<-MEM
19932    or MEM<-REG.  */
19933 const char *
19934 output_move_double (rtx *operands, bool emit, int *count)
19935 {
19936   enum rtx_code code0 = GET_CODE (operands[0]);
19937   enum rtx_code code1 = GET_CODE (operands[1]);
19938   rtx otherops[3];
19939   if (count)
19940     *count = 1;
19941
19942   /* The only case when this might happen is when
19943      you are looking at the length of a DImode instruction
19944      that has an invalid constant in it.  */
19945   if (code0 == REG && code1 != MEM)
19946     {
19947       gcc_assert (!emit);
19948       *count = 2;
19949       return "";
19950     }
19951
19952   if (code0 == REG)
19953     {
19954       unsigned int reg0 = REGNO (operands[0]);
19955       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19956
19957       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19958
19959       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19960
19961       switch (GET_CODE (XEXP (operands[1], 0)))
19962         {
19963         case REG:
19964
19965           if (emit)
19966             {
19967               if (can_ldrd
19968                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19969                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19970               else
19971                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19972             }
19973           break;
19974
19975         case PRE_INC:
19976           gcc_assert (can_ldrd);
19977           if (emit)
19978             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19979           break;
19980
19981         case PRE_DEC:
19982           if (emit)
19983             {
19984               if (can_ldrd)
19985                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19986               else
19987                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19988             }
19989           break;
19990
19991         case POST_INC:
19992           if (emit)
19993             {
19994               if (can_ldrd)
19995                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19996               else
19997                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19998             }
19999           break;
20000
20001         case POST_DEC:
20002           gcc_assert (can_ldrd);
20003           if (emit)
20004             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20005           break;
20006
20007         case PRE_MODIFY:
20008         case POST_MODIFY:
20009           /* Autoicrement addressing modes should never have overlapping
20010              base and destination registers, and overlapping index registers
20011              are already prohibited, so this doesn't need to worry about
20012              fix_cm3_ldrd.  */
20013           otherops[0] = operands[0];
20014           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20015           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20016
20017           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20018             {
20019               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20020                 {
20021                   /* Registers overlap so split out the increment.  */
20022                   if (emit)
20023                     {
20024                       gcc_assert (can_ldrd);
20025                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20026                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20027                     }
20028                   if (count)
20029                     *count = 2;
20030                 }
20031               else
20032                 {
20033                   /* Use a single insn if we can.
20034                      FIXME: IWMMXT allows offsets larger than ldrd can
20035                      handle, fix these up with a pair of ldr.  */
20036                   if (can_ldrd
20037                       && (TARGET_THUMB2
20038                       || !CONST_INT_P (otherops[2])
20039                       || (INTVAL (otherops[2]) > -256
20040                           && INTVAL (otherops[2]) < 256)))
20041                     {
20042                       if (emit)
20043                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20044                     }
20045                   else
20046                     {
20047                       if (emit)
20048                         {
20049                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20050                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20051                         }
20052                       if (count)
20053                         *count = 2;
20054
20055                     }
20056                 }
20057             }
20058           else
20059             {
20060               /* Use a single insn if we can.
20061                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20062                  fix these up with a pair of ldr.  */
20063               if (can_ldrd
20064                   && (TARGET_THUMB2
20065                   || !CONST_INT_P (otherops[2])
20066                   || (INTVAL (otherops[2]) > -256
20067                       && INTVAL (otherops[2]) < 256)))
20068                 {
20069                   if (emit)
20070                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20071                 }
20072               else
20073                 {
20074                   if (emit)
20075                     {
20076                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20077                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20078                     }
20079                   if (count)
20080                     *count = 2;
20081                 }
20082             }
20083           break;
20084
20085         case LABEL_REF:
20086         case CONST:
20087           /* We might be able to use ldrd %0, %1 here.  However the range is
20088              different to ldr/adr, and it is broken on some ARMv7-M
20089              implementations.  */
20090           /* Use the second register of the pair to avoid problematic
20091              overlap.  */
20092           otherops[1] = operands[1];
20093           if (emit)
20094             output_asm_insn ("adr%?\t%0, %1", otherops);
20095           operands[1] = otherops[0];
20096           if (emit)
20097             {
20098               if (can_ldrd)
20099                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20100               else
20101                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20102             }
20103
20104           if (count)
20105             *count = 2;
20106           break;
20107
20108           /* ??? This needs checking for thumb2.  */
20109         default:
20110           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20111                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20112             {
20113               otherops[0] = operands[0];
20114               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20115               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20116
20117               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20118                 {
20119                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20120                     {
20121                       switch ((int) INTVAL (otherops[2]))
20122                         {
20123                         case -8:
20124                           if (emit)
20125                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20126                           return "";
20127                         case -4:
20128                           if (TARGET_THUMB2)
20129                             break;
20130                           if (emit)
20131                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20132                           return "";
20133                         case 4:
20134                           if (TARGET_THUMB2)
20135                             break;
20136                           if (emit)
20137                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20138                           return "";
20139                         }
20140                     }
20141                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20142                   operands[1] = otherops[0];
20143                   if (can_ldrd
20144                       && (REG_P (otherops[2])
20145                           || TARGET_THUMB2
20146                           || (CONST_INT_P (otherops[2])
20147                               && INTVAL (otherops[2]) > -256
20148                               && INTVAL (otherops[2]) < 256)))
20149                     {
20150                       if (reg_overlap_mentioned_p (operands[0],
20151                                                    otherops[2]))
20152                         {
20153                           /* Swap base and index registers over to
20154                              avoid a conflict.  */
20155                           std::swap (otherops[1], otherops[2]);
20156                         }
20157                       /* If both registers conflict, it will usually
20158                          have been fixed by a splitter.  */
20159                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20160                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20161                         {
20162                           if (emit)
20163                             {
20164                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20165                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20166                             }
20167                           if (count)
20168                             *count = 2;
20169                         }
20170                       else
20171                         {
20172                           otherops[0] = operands[0];
20173                           if (emit)
20174                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20175                         }
20176                       return "";
20177                     }
20178
20179                   if (CONST_INT_P (otherops[2]))
20180                     {
20181                       if (emit)
20182                         {
20183                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20184                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20185                           else
20186                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20187                         }
20188                     }
20189                   else
20190                     {
20191                       if (emit)
20192                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20193                     }
20194                 }
20195               else
20196                 {
20197                   if (emit)
20198                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20199                 }
20200
20201               if (count)
20202                 *count = 2;
20203
20204               if (can_ldrd)
20205                 return "ldrd%?\t%0, [%1]";
20206
20207               return "ldmia%?\t%1, %M0";
20208             }
20209           else
20210             {
20211               otherops[1] = adjust_address (operands[1], SImode, 4);
20212               /* Take care of overlapping base/data reg.  */
20213               if (reg_mentioned_p (operands[0], operands[1]))
20214                 {
20215                   if (emit)
20216                     {
20217                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20218                       output_asm_insn ("ldr%?\t%0, %1", operands);
20219                     }
20220                   if (count)
20221                     *count = 2;
20222
20223                 }
20224               else
20225                 {
20226                   if (emit)
20227                     {
20228                       output_asm_insn ("ldr%?\t%0, %1", operands);
20229                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20230                     }
20231                   if (count)
20232                     *count = 2;
20233                 }
20234             }
20235         }
20236     }
20237   else
20238     {
20239       /* Constraints should ensure this.  */
20240       gcc_assert (code0 == MEM && code1 == REG);
20241       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20242                   || (TARGET_ARM && TARGET_LDRD));
20243
20244       /* For TARGET_ARM the first source register of an STRD
20245          must be even.  This is usually the case for double-word
20246          values but user assembly constraints can force an odd
20247          starting register.  */
20248       bool allow_strd = TARGET_LDRD
20249                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20250       switch (GET_CODE (XEXP (operands[0], 0)))
20251         {
20252         case REG:
20253           if (emit)
20254             {
20255               if (allow_strd)
20256                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20257               else
20258                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20259             }
20260           break;
20261
20262         case PRE_INC:
20263           gcc_assert (allow_strd);
20264           if (emit)
20265             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20266           break;
20267
20268         case PRE_DEC:
20269           if (emit)
20270             {
20271               if (allow_strd)
20272                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20273               else
20274                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20275             }
20276           break;
20277
20278         case POST_INC:
20279           if (emit)
20280             {
20281               if (allow_strd)
20282                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20283               else
20284                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20285             }
20286           break;
20287
20288         case POST_DEC:
20289           gcc_assert (allow_strd);
20290           if (emit)
20291             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20292           break;
20293
20294         case PRE_MODIFY:
20295         case POST_MODIFY:
20296           otherops[0] = operands[1];
20297           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20298           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20299
20300           /* IWMMXT allows offsets larger than strd can handle,
20301              fix these up with a pair of str.  */
20302           if (!TARGET_THUMB2
20303               && CONST_INT_P (otherops[2])
20304               && (INTVAL(otherops[2]) <= -256
20305                   || INTVAL(otherops[2]) >= 256))
20306             {
20307               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20308                 {
20309                   if (emit)
20310                     {
20311                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20312                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20313                     }
20314                   if (count)
20315                     *count = 2;
20316                 }
20317               else
20318                 {
20319                   if (emit)
20320                     {
20321                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20322                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20323                     }
20324                   if (count)
20325                     *count = 2;
20326                 }
20327             }
20328           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20329             {
20330               if (emit)
20331                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20332             }
20333           else
20334             {
20335               if (emit)
20336                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20337             }
20338           break;
20339
20340         case PLUS:
20341           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20342           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20343             {
20344               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20345                 {
20346                 case -8:
20347                   if (emit)
20348                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20349                   return "";
20350
20351                 case -4:
20352                   if (TARGET_THUMB2)
20353                     break;
20354                   if (emit)
20355                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20356                   return "";
20357
20358                 case 4:
20359                   if (TARGET_THUMB2)
20360                     break;
20361                   if (emit)
20362                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20363                   return "";
20364                 }
20365             }
20366           if (allow_strd
20367               && (REG_P (otherops[2])
20368                   || TARGET_THUMB2
20369                   || (CONST_INT_P (otherops[2])
20370                       && INTVAL (otherops[2]) > -256
20371                       && INTVAL (otherops[2]) < 256)))
20372             {
20373               otherops[0] = operands[1];
20374               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20375               if (emit)
20376                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20377               return "";
20378             }
20379           /* Fall through */
20380
20381         default:
20382           otherops[0] = adjust_address (operands[0], SImode, 4);
20383           otherops[1] = operands[1];
20384           if (emit)
20385             {
20386               output_asm_insn ("str%?\t%1, %0", operands);
20387               output_asm_insn ("str%?\t%H1, %0", otherops);
20388             }
20389           if (count)
20390             *count = 2;
20391         }
20392     }
20393
20394   return "";
20395 }
20396
20397 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20398    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20399
20400 const char *
20401 output_move_quad (rtx *operands)
20402 {
20403   if (REG_P (operands[0]))
20404     {
20405       /* Load, or reg->reg move.  */
20406
20407       if (MEM_P (operands[1]))
20408         {
20409           switch (GET_CODE (XEXP (operands[1], 0)))
20410             {
20411             case REG:
20412               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20413               break;
20414
20415             case LABEL_REF:
20416             case CONST:
20417               output_asm_insn ("adr%?\t%0, %1", operands);
20418               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20419               break;
20420
20421             default:
20422               gcc_unreachable ();
20423             }
20424         }
20425       else
20426         {
20427           rtx ops[2];
20428           int dest, src, i;
20429
20430           gcc_assert (REG_P (operands[1]));
20431
20432           dest = REGNO (operands[0]);
20433           src = REGNO (operands[1]);
20434
20435           /* This seems pretty dumb, but hopefully GCC won't try to do it
20436              very often.  */
20437           if (dest < src)
20438             for (i = 0; i < 4; i++)
20439               {
20440                 ops[0] = gen_rtx_REG (SImode, dest + i);
20441                 ops[1] = gen_rtx_REG (SImode, src + i);
20442                 output_asm_insn ("mov%?\t%0, %1", ops);
20443               }
20444           else
20445             for (i = 3; i >= 0; i--)
20446               {
20447                 ops[0] = gen_rtx_REG (SImode, dest + i);
20448                 ops[1] = gen_rtx_REG (SImode, src + i);
20449                 output_asm_insn ("mov%?\t%0, %1", ops);
20450               }
20451         }
20452     }
20453   else
20454     {
20455       gcc_assert (MEM_P (operands[0]));
20456       gcc_assert (REG_P (operands[1]));
20457       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20458
20459       switch (GET_CODE (XEXP (operands[0], 0)))
20460         {
20461         case REG:
20462           output_asm_insn ("stm%?\t%m0, %M1", operands);
20463           break;
20464
20465         default:
20466           gcc_unreachable ();
20467         }
20468     }
20469
20470   return "";
20471 }
20472
20473 /* Output a VFP load or store instruction.  */
20474
20475 const char *
20476 output_move_vfp (rtx *operands)
20477 {
20478   rtx reg, mem, addr, ops[2];
20479   int load = REG_P (operands[0]);
20480   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20481   int sp = (!TARGET_VFP_FP16INST
20482             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20483   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20484   const char *templ;
20485   char buff[50];
20486   machine_mode mode;
20487
20488   reg = operands[!load];
20489   mem = operands[load];
20490
20491   mode = GET_MODE (reg);
20492
20493   gcc_assert (REG_P (reg));
20494   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20495   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20496               || mode == SFmode
20497               || mode == DFmode
20498               || mode == HImode
20499               || mode == SImode
20500               || mode == DImode
20501               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20502   gcc_assert (MEM_P (mem));
20503
20504   addr = XEXP (mem, 0);
20505
20506   switch (GET_CODE (addr))
20507     {
20508     case PRE_DEC:
20509       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20510       ops[0] = XEXP (addr, 0);
20511       ops[1] = reg;
20512       break;
20513
20514     case POST_INC:
20515       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20516       ops[0] = XEXP (addr, 0);
20517       ops[1] = reg;
20518       break;
20519
20520     default:
20521       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20522       ops[0] = reg;
20523       ops[1] = mem;
20524       break;
20525     }
20526
20527   sprintf (buff, templ,
20528            load ? "ld" : "st",
20529            dp ? "64" : sp ? "32" : "16",
20530            dp ? "P" : "",
20531            integer_p ? "\t%@ int" : "");
20532   output_asm_insn (buff, ops);
20533
20534   return "";
20535 }
20536
20537 /* Output a Neon double-word or quad-word load or store, or a load
20538    or store for larger structure modes.
20539
20540    WARNING: The ordering of elements is weird in big-endian mode,
20541    because the EABI requires that vectors stored in memory appear
20542    as though they were stored by a VSTM, as required by the EABI.
20543    GCC RTL defines element ordering based on in-memory order.
20544    This can be different from the architectural ordering of elements
20545    within a NEON register. The intrinsics defined in arm_neon.h use the
20546    NEON register element ordering, not the GCC RTL element ordering.
20547
20548    For example, the in-memory ordering of a big-endian a quadword
20549    vector with 16-bit elements when stored from register pair {d0,d1}
20550    will be (lowest address first, d0[N] is NEON register element N):
20551
20552      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20553
20554    When necessary, quadword registers (dN, dN+1) are moved to ARM
20555    registers from rN in the order:
20556
20557      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20558
20559    So that STM/LDM can be used on vectors in ARM registers, and the
20560    same memory layout will result as if VSTM/VLDM were used.
20561
20562    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20563    possible, which allows use of appropriate alignment tags.
20564    Note that the choice of "64" is independent of the actual vector
20565    element size; this size simply ensures that the behavior is
20566    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20567
20568    Due to limitations of those instructions, use of VST1.64/VLD1.64
20569    is not possible if:
20570     - the address contains PRE_DEC, or
20571     - the mode refers to more than 4 double-word registers
20572
20573    In those cases, it would be possible to replace VSTM/VLDM by a
20574    sequence of instructions; this is not currently implemented since
20575    this is not certain to actually improve performance.  */
20576
20577 const char *
20578 output_move_neon (rtx *operands)
20579 {
20580   rtx reg, mem, addr, ops[2];
20581   int regno, nregs, load = REG_P (operands[0]);
20582   const char *templ;
20583   char buff[50];
20584   machine_mode mode;
20585
20586   reg = operands[!load];
20587   mem = operands[load];
20588
20589   mode = GET_MODE (reg);
20590
20591   gcc_assert (REG_P (reg));
20592   regno = REGNO (reg);
20593   nregs = REG_NREGS (reg) / 2;
20594   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20595               || NEON_REGNO_OK_FOR_QUAD (regno));
20596   gcc_assert (VALID_NEON_DREG_MODE (mode)
20597               || VALID_NEON_QREG_MODE (mode)
20598               || VALID_NEON_STRUCT_MODE (mode));
20599   gcc_assert (MEM_P (mem));
20600
20601   addr = XEXP (mem, 0);
20602
20603   /* Strip off const from addresses like (const (plus (...))).  */
20604   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20605     addr = XEXP (addr, 0);
20606
20607   switch (GET_CODE (addr))
20608     {
20609     case POST_INC:
20610       /* We have to use vldm / vstm for too-large modes.  */
20611       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20612         {
20613           templ = "v%smia%%?\t%%0!, %%h1";
20614           ops[0] = XEXP (addr, 0);
20615         }
20616       else
20617         {
20618           templ = "v%s1.64\t%%h1, %%A0";
20619           ops[0] = mem;
20620         }
20621       ops[1] = reg;
20622       break;
20623
20624     case PRE_DEC:
20625       /* We have to use vldm / vstm in this case, since there is no
20626          pre-decrement form of the vld1 / vst1 instructions.  */
20627       templ = "v%smdb%%?\t%%0!, %%h1";
20628       ops[0] = XEXP (addr, 0);
20629       ops[1] = reg;
20630       break;
20631
20632     case POST_MODIFY:
20633       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20634       gcc_unreachable ();
20635
20636     case REG:
20637       /* We have to use vldm / vstm for too-large modes.  */
20638       if (nregs > 1)
20639         {
20640           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20641             templ = "v%smia%%?\t%%m0, %%h1";
20642           else
20643             templ = "v%s1.64\t%%h1, %%A0";
20644
20645           ops[0] = mem;
20646           ops[1] = reg;
20647           break;
20648         }
20649       /* Fall through.  */
20650     case PLUS:
20651       if (GET_CODE (addr) == PLUS)
20652         addr = XEXP (addr, 0);
20653       /* Fall through.  */
20654     case LABEL_REF:
20655       {
20656         int i;
20657         int overlap = -1;
20658         for (i = 0; i < nregs; i++)
20659           {
20660             /* We're only using DImode here because it's a convenient
20661                size.  */
20662             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20663             ops[1] = adjust_address (mem, DImode, 8 * i);
20664             if (reg_overlap_mentioned_p (ops[0], mem))
20665               {
20666                 gcc_assert (overlap == -1);
20667                 overlap = i;
20668               }
20669             else
20670               {
20671                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20672                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20673                 else
20674                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20675                 output_asm_insn (buff, ops);
20676               }
20677           }
20678         if (overlap != -1)
20679           {
20680             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20681             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20682             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20683               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20684             else
20685               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20686             output_asm_insn (buff, ops);
20687           }
20688
20689         return "";
20690       }
20691
20692     default:
20693       gcc_unreachable ();
20694     }
20695
20696   sprintf (buff, templ, load ? "ld" : "st");
20697   output_asm_insn (buff, ops);
20698
20699   return "";
20700 }
20701
20702 /* Compute and return the length of neon_mov<mode>, where <mode> is
20703    one of VSTRUCT modes: EI, OI, CI or XI.  */
20704 int
20705 arm_attr_length_move_neon (rtx_insn *insn)
20706 {
20707   rtx reg, mem, addr;
20708   int load;
20709   machine_mode mode;
20710
20711   extract_insn_cached (insn);
20712
20713   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20714     {
20715       mode = GET_MODE (recog_data.operand[0]);
20716       switch (mode)
20717         {
20718         case E_EImode:
20719         case E_OImode:
20720           return 8;
20721         case E_CImode:
20722           return 12;
20723         case E_XImode:
20724           return 16;
20725         default:
20726           gcc_unreachable ();
20727         }
20728     }
20729
20730   load = REG_P (recog_data.operand[0]);
20731   reg = recog_data.operand[!load];
20732   mem = recog_data.operand[load];
20733
20734   gcc_assert (MEM_P (mem));
20735
20736   addr = XEXP (mem, 0);
20737
20738   /* Strip off const from addresses like (const (plus (...))).  */
20739   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20740     addr = XEXP (addr, 0);
20741
20742   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20743     {
20744       int insns = REG_NREGS (reg) / 2;
20745       return insns * 4;
20746     }
20747   else
20748     return 4;
20749 }
20750
20751 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20752    return zero.  */
20753
20754 int
20755 arm_address_offset_is_imm (rtx_insn *insn)
20756 {
20757   rtx mem, addr;
20758
20759   extract_insn_cached (insn);
20760
20761   if (REG_P (recog_data.operand[0]))
20762     return 0;
20763
20764   mem = recog_data.operand[0];
20765
20766   gcc_assert (MEM_P (mem));
20767
20768   addr = XEXP (mem, 0);
20769
20770   if (REG_P (addr)
20771       || (GET_CODE (addr) == PLUS
20772           && REG_P (XEXP (addr, 0))
20773           && CONST_INT_P (XEXP (addr, 1))))
20774     return 1;
20775   else
20776     return 0;
20777 }
20778
20779 /* Output an ADD r, s, #n where n may be too big for one instruction.
20780    If adding zero to one register, output nothing.  */
20781 const char *
20782 output_add_immediate (rtx *operands)
20783 {
20784   HOST_WIDE_INT n = INTVAL (operands[2]);
20785
20786   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20787     {
20788       if (n < 0)
20789         output_multi_immediate (operands,
20790                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20791                                 -n);
20792       else
20793         output_multi_immediate (operands,
20794                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20795                                 n);
20796     }
20797
20798   return "";
20799 }
20800
20801 /* Output a multiple immediate operation.
20802    OPERANDS is the vector of operands referred to in the output patterns.
20803    INSTR1 is the output pattern to use for the first constant.
20804    INSTR2 is the output pattern to use for subsequent constants.
20805    IMMED_OP is the index of the constant slot in OPERANDS.
20806    N is the constant value.  */
20807 static const char *
20808 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20809                         int immed_op, HOST_WIDE_INT n)
20810 {
20811 #if HOST_BITS_PER_WIDE_INT > 32
20812   n &= 0xffffffff;
20813 #endif
20814
20815   if (n == 0)
20816     {
20817       /* Quick and easy output.  */
20818       operands[immed_op] = const0_rtx;
20819       output_asm_insn (instr1, operands);
20820     }
20821   else
20822     {
20823       int i;
20824       const char * instr = instr1;
20825
20826       /* Note that n is never zero here (which would give no output).  */
20827       for (i = 0; i < 32; i += 2)
20828         {
20829           if (n & (3 << i))
20830             {
20831               operands[immed_op] = GEN_INT (n & (255 << i));
20832               output_asm_insn (instr, operands);
20833               instr = instr2;
20834               i += 6;
20835             }
20836         }
20837     }
20838
20839   return "";
20840 }
20841
20842 /* Return the name of a shifter operation.  */
20843 static const char *
20844 arm_shift_nmem(enum rtx_code code)
20845 {
20846   switch (code)
20847     {
20848     case ASHIFT:
20849       return ARM_LSL_NAME;
20850
20851     case ASHIFTRT:
20852       return "asr";
20853
20854     case LSHIFTRT:
20855       return "lsr";
20856
20857     case ROTATERT:
20858       return "ror";
20859
20860     default:
20861       abort();
20862     }
20863 }
20864
20865 /* Return the appropriate ARM instruction for the operation code.
20866    The returned result should not be overwritten.  OP is the rtx of the
20867    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20868    was shifted.  */
20869 const char *
20870 arithmetic_instr (rtx op, int shift_first_arg)
20871 {
20872   switch (GET_CODE (op))
20873     {
20874     case PLUS:
20875       return "add";
20876
20877     case MINUS:
20878       return shift_first_arg ? "rsb" : "sub";
20879
20880     case IOR:
20881       return "orr";
20882
20883     case XOR:
20884       return "eor";
20885
20886     case AND:
20887       return "and";
20888
20889     case ASHIFT:
20890     case ASHIFTRT:
20891     case LSHIFTRT:
20892     case ROTATERT:
20893       return arm_shift_nmem(GET_CODE(op));
20894
20895     default:
20896       gcc_unreachable ();
20897     }
20898 }
20899
20900 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20901    for the operation code.  The returned result should not be overwritten.
20902    OP is the rtx code of the shift.
20903    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20904    shift.  */
20905 static const char *
20906 shift_op (rtx op, HOST_WIDE_INT *amountp)
20907 {
20908   const char * mnem;
20909   enum rtx_code code = GET_CODE (op);
20910
20911   switch (code)
20912     {
20913     case ROTATE:
20914       if (!CONST_INT_P (XEXP (op, 1)))
20915         {
20916           output_operand_lossage ("invalid shift operand");
20917           return NULL;
20918         }
20919
20920       code = ROTATERT;
20921       *amountp = 32 - INTVAL (XEXP (op, 1));
20922       mnem = "ror";
20923       break;
20924
20925     case ASHIFT:
20926     case ASHIFTRT:
20927     case LSHIFTRT:
20928     case ROTATERT:
20929       mnem = arm_shift_nmem(code);
20930       if (CONST_INT_P (XEXP (op, 1)))
20931         {
20932           *amountp = INTVAL (XEXP (op, 1));
20933         }
20934       else if (REG_P (XEXP (op, 1)))
20935         {
20936           *amountp = -1;
20937           return mnem;
20938         }
20939       else
20940         {
20941           output_operand_lossage ("invalid shift operand");
20942           return NULL;
20943         }
20944       break;
20945
20946     case MULT:
20947       /* We never have to worry about the amount being other than a
20948          power of 2, since this case can never be reloaded from a reg.  */
20949       if (!CONST_INT_P (XEXP (op, 1)))
20950         {
20951           output_operand_lossage ("invalid shift operand");
20952           return NULL;
20953         }
20954
20955       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20956
20957       /* Amount must be a power of two.  */
20958       if (*amountp & (*amountp - 1))
20959         {
20960           output_operand_lossage ("invalid shift operand");
20961           return NULL;
20962         }
20963
20964       *amountp = exact_log2 (*amountp);
20965       gcc_assert (IN_RANGE (*amountp, 0, 31));
20966       return ARM_LSL_NAME;
20967
20968     default:
20969       output_operand_lossage ("invalid shift operand");
20970       return NULL;
20971     }
20972
20973   /* This is not 100% correct, but follows from the desire to merge
20974      multiplication by a power of 2 with the recognizer for a
20975      shift.  >=32 is not a valid shift for "lsl", so we must try and
20976      output a shift that produces the correct arithmetical result.
20977      Using lsr #32 is identical except for the fact that the carry bit
20978      is not set correctly if we set the flags; but we never use the
20979      carry bit from such an operation, so we can ignore that.  */
20980   if (code == ROTATERT)
20981     /* Rotate is just modulo 32.  */
20982     *amountp &= 31;
20983   else if (*amountp != (*amountp & 31))
20984     {
20985       if (code == ASHIFT)
20986         mnem = "lsr";
20987       *amountp = 32;
20988     }
20989
20990   /* Shifts of 0 are no-ops.  */
20991   if (*amountp == 0)
20992     return NULL;
20993
20994   return mnem;
20995 }
20996
20997 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
20998    because /bin/as is horribly restrictive.  The judgement about
20999    whether or not each character is 'printable' (and can be output as
21000    is) or not (and must be printed with an octal escape) must be made
21001    with reference to the *host* character set -- the situation is
21002    similar to that discussed in the comments above pp_c_char in
21003    c-pretty-print.cc.  */
21004
21005 #define MAX_ASCII_LEN 51
21006
21007 void
21008 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21009 {
21010   int i;
21011   int len_so_far = 0;
21012
21013   fputs ("\t.ascii\t\"", stream);
21014
21015   for (i = 0; i < len; i++)
21016     {
21017       int c = p[i];
21018
21019       if (len_so_far >= MAX_ASCII_LEN)
21020         {
21021           fputs ("\"\n\t.ascii\t\"", stream);
21022           len_so_far = 0;
21023         }
21024
21025       if (ISPRINT (c))
21026         {
21027           if (c == '\\' || c == '\"')
21028             {
21029               putc ('\\', stream);
21030               len_so_far++;
21031             }
21032           putc (c, stream);
21033           len_so_far++;
21034         }
21035       else
21036         {
21037           fprintf (stream, "\\%03o", c);
21038           len_so_far += 4;
21039         }
21040     }
21041
21042   fputs ("\"\n", stream);
21043 }
21044 \f
21045
21046 /* Compute the register save mask for registers 0 through 12
21047    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21048
21049 static unsigned long
21050 arm_compute_save_reg0_reg12_mask (void)
21051 {
21052   unsigned long func_type = arm_current_func_type ();
21053   unsigned long save_reg_mask = 0;
21054   unsigned int reg;
21055
21056   if (IS_INTERRUPT (func_type))
21057     {
21058       unsigned int max_reg;
21059       /* Interrupt functions must not corrupt any registers,
21060          even call clobbered ones.  If this is a leaf function
21061          we can just examine the registers used by the RTL, but
21062          otherwise we have to assume that whatever function is
21063          called might clobber anything, and so we have to save
21064          all the call-clobbered registers as well.  */
21065       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21066         /* FIQ handlers have registers r8 - r12 banked, so
21067            we only need to check r0 - r7, Normal ISRs only
21068            bank r14 and r15, so we must check up to r12.
21069            r13 is the stack pointer which is always preserved,
21070            so we do not need to consider it here.  */
21071         max_reg = 7;
21072       else
21073         max_reg = 12;
21074
21075       for (reg = 0; reg <= max_reg; reg++)
21076         if (reg_needs_saving_p (reg))
21077           save_reg_mask |= (1 << reg);
21078
21079       /* Also save the pic base register if necessary.  */
21080       if (PIC_REGISTER_MAY_NEED_SAVING
21081           && crtl->uses_pic_offset_table)
21082         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21083     }
21084   else if (IS_VOLATILE(func_type))
21085     {
21086       /* For noreturn functions we historically omitted register saves
21087          altogether.  However this really messes up debugging.  As a
21088          compromise save just the frame pointers.  Combined with the link
21089          register saved elsewhere this should be sufficient to get
21090          a backtrace.  */
21091       if (frame_pointer_needed)
21092         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21093       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21094         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21095       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21096         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21097     }
21098   else
21099     {
21100       /* In the normal case we only need to save those registers
21101          which are call saved and which are used by this function.  */
21102       for (reg = 0; reg <= 11; reg++)
21103         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21104           save_reg_mask |= (1 << reg);
21105
21106       /* Handle the frame pointer as a special case.  */
21107       if (frame_pointer_needed)
21108         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21109
21110       /* If we aren't loading the PIC register,
21111          don't stack it even though it may be live.  */
21112       if (PIC_REGISTER_MAY_NEED_SAVING
21113           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21114               || crtl->uses_pic_offset_table))
21115         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21116
21117       /* The prologue will copy SP into R0, so save it.  */
21118       if (IS_STACKALIGN (func_type))
21119         save_reg_mask |= 1;
21120     }
21121
21122   /* Save registers so the exception handler can modify them.  */
21123   if (crtl->calls_eh_return)
21124     {
21125       unsigned int i;
21126
21127       for (i = 0; ; i++)
21128         {
21129           reg = EH_RETURN_DATA_REGNO (i);
21130           if (reg == INVALID_REGNUM)
21131             break;
21132           save_reg_mask |= 1 << reg;
21133         }
21134     }
21135
21136   return save_reg_mask;
21137 }
21138
21139 /* Return true if r3 is live at the start of the function.  */
21140
21141 static bool
21142 arm_r3_live_at_start_p (void)
21143 {
21144   /* Just look at cfg info, which is still close enough to correct at this
21145      point.  This gives false positives for broken functions that might use
21146      uninitialized data that happens to be allocated in r3, but who cares?  */
21147   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21148 }
21149
21150 /* Compute the number of bytes used to store the static chain register on the
21151    stack, above the stack frame.  We need to know this accurately to get the
21152    alignment of the rest of the stack frame correct.  */
21153
21154 static int
21155 arm_compute_static_chain_stack_bytes (void)
21156 {
21157   /* Once the value is updated from the init value of -1, do not
21158      re-compute.  */
21159   if (cfun->machine->static_chain_stack_bytes != -1)
21160     return cfun->machine->static_chain_stack_bytes;
21161
21162   /* See the defining assertion in arm_expand_prologue.  */
21163   if (IS_NESTED (arm_current_func_type ())
21164       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21165           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21166                || flag_stack_clash_protection)
21167               && !df_regs_ever_live_p (LR_REGNUM)))
21168       && arm_r3_live_at_start_p ()
21169       && crtl->args.pretend_args_size == 0)
21170     return 4;
21171
21172   return 0;
21173 }
21174
21175 /* Compute a bit mask of which core registers need to be
21176    saved on the stack for the current function.
21177    This is used by arm_compute_frame_layout, which may add extra registers.  */
21178
21179 static unsigned long
21180 arm_compute_save_core_reg_mask (void)
21181 {
21182   unsigned int save_reg_mask = 0;
21183   unsigned long func_type = arm_current_func_type ();
21184   unsigned int reg;
21185
21186   if (IS_NAKED (func_type))
21187     /* This should never really happen.  */
21188     return 0;
21189
21190   /* If we are creating a stack frame, then we must save the frame pointer,
21191      IP (which will hold the old stack pointer), LR and the PC.  */
21192   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21193     save_reg_mask |=
21194       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21195       | (1 << IP_REGNUM)
21196       | (1 << LR_REGNUM)
21197       | (1 << PC_REGNUM);
21198
21199   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21200
21201   /* Decide if we need to save the link register.
21202      Interrupt routines have their own banked link register,
21203      so they never need to save it.
21204      Otherwise if we do not use the link register we do not need to save
21205      it.  If we are pushing other registers onto the stack however, we
21206      can save an instruction in the epilogue by pushing the link register
21207      now and then popping it back into the PC.  This incurs extra memory
21208      accesses though, so we only do it when optimizing for size, and only
21209      if we know that we will not need a fancy return sequence.  */
21210   if (df_regs_ever_live_p (LR_REGNUM)
21211       || (save_reg_mask
21212           && optimize_size
21213           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21214           && !crtl->tail_call_emit
21215           && !crtl->calls_eh_return))
21216     save_reg_mask |= 1 << LR_REGNUM;
21217
21218   if (cfun->machine->lr_save_eliminated)
21219     save_reg_mask &= ~ (1 << LR_REGNUM);
21220
21221   if (TARGET_REALLY_IWMMXT
21222       && ((bit_count (save_reg_mask)
21223            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21224                            arm_compute_static_chain_stack_bytes())
21225            ) % 2) != 0)
21226     {
21227       /* The total number of registers that are going to be pushed
21228          onto the stack is odd.  We need to ensure that the stack
21229          is 64-bit aligned before we start to save iWMMXt registers,
21230          and also before we start to create locals.  (A local variable
21231          might be a double or long long which we will load/store using
21232          an iWMMXt instruction).  Therefore we need to push another
21233          ARM register, so that the stack will be 64-bit aligned.  We
21234          try to avoid using the arg registers (r0 -r3) as they might be
21235          used to pass values in a tail call.  */
21236       for (reg = 4; reg <= 12; reg++)
21237         if ((save_reg_mask & (1 << reg)) == 0)
21238           break;
21239
21240       if (reg <= 12)
21241         save_reg_mask |= (1 << reg);
21242       else
21243         {
21244           cfun->machine->sibcall_blocked = 1;
21245           save_reg_mask |= (1 << 3);
21246         }
21247     }
21248
21249   /* We may need to push an additional register for use initializing the
21250      PIC base register.  */
21251   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21252       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21253     {
21254       reg = thumb_find_work_register (1 << 4);
21255       if (!call_used_or_fixed_reg_p (reg))
21256         save_reg_mask |= (1 << reg);
21257     }
21258
21259   return save_reg_mask;
21260 }
21261
21262 /* Compute a bit mask of which core registers need to be
21263    saved on the stack for the current function.  */
21264 static unsigned long
21265 thumb1_compute_save_core_reg_mask (void)
21266 {
21267   unsigned long mask;
21268   unsigned reg;
21269
21270   mask = 0;
21271   for (reg = 0; reg < 12; reg ++)
21272     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21273       mask |= 1 << reg;
21274
21275   /* Handle the frame pointer as a special case.  */
21276   if (frame_pointer_needed)
21277     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21278
21279   if (flag_pic
21280       && !TARGET_SINGLE_PIC_BASE
21281       && arm_pic_register != INVALID_REGNUM
21282       && crtl->uses_pic_offset_table)
21283     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21284
21285   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21286   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21287     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21288
21289   /* LR will also be pushed if any lo regs are pushed.  */
21290   if (mask & 0xff || thumb_force_lr_save ())
21291     mask |= (1 << LR_REGNUM);
21292
21293   bool call_clobbered_scratch
21294     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21295        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21296
21297   /* Make sure we have a low work register if we need one.  We will
21298      need one if we are going to push a high register, but we are not
21299      currently intending to push a low register.  However if both the
21300      prologue and epilogue have a spare call-clobbered low register,
21301      then we won't need to find an additional work register.  It does
21302      not need to be the same register in the prologue and
21303      epilogue.  */
21304   if ((mask & 0xff) == 0
21305       && !call_clobbered_scratch
21306       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21307     {
21308       /* Use thumb_find_work_register to choose which register
21309          we will use.  If the register is live then we will
21310          have to push it.  Use LAST_LO_REGNUM as our fallback
21311          choice for the register to select.  */
21312       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21313       /* Make sure the register returned by thumb_find_work_register is
21314          not part of the return value.  */
21315       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21316         reg = LAST_LO_REGNUM;
21317
21318       if (callee_saved_reg_p (reg))
21319         mask |= 1 << reg;
21320     }
21321
21322   /* The 504 below is 8 bytes less than 512 because there are two possible
21323      alignment words.  We can't tell here if they will be present or not so we
21324      have to play it safe and assume that they are. */
21325   if ((CALLER_INTERWORKING_SLOT_SIZE +
21326        ROUND_UP_WORD (get_frame_size ()) +
21327        crtl->outgoing_args_size) >= 504)
21328     {
21329       /* This is the same as the code in thumb1_expand_prologue() which
21330          determines which register to use for stack decrement. */
21331       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21332         if (mask & (1 << reg))
21333           break;
21334
21335       if (reg > LAST_LO_REGNUM)
21336         {
21337           /* Make sure we have a register available for stack decrement. */
21338           mask |= 1 << LAST_LO_REGNUM;
21339         }
21340     }
21341
21342   return mask;
21343 }
21344
21345 /* Return the number of bytes required to save VFP registers.  */
21346 static int
21347 arm_get_vfp_saved_size (void)
21348 {
21349   unsigned int regno;
21350   int count;
21351   int saved;
21352
21353   saved = 0;
21354   /* Space for saved VFP registers.  */
21355   if (TARGET_VFP_BASE)
21356     {
21357       count = 0;
21358       for (regno = FIRST_VFP_REGNUM;
21359            regno < LAST_VFP_REGNUM;
21360            regno += 2)
21361         {
21362           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21363             {
21364               if (count > 0)
21365                 {
21366                   /* Workaround ARM10 VFPr1 bug.  */
21367                   if (count == 2 && !arm_arch6)
21368                     count++;
21369                   saved += count * 8;
21370                 }
21371               count = 0;
21372             }
21373           else
21374             count++;
21375         }
21376       if (count > 0)
21377         {
21378           if (count == 2 && !arm_arch6)
21379             count++;
21380           saved += count * 8;
21381         }
21382     }
21383   return saved;
21384 }
21385
21386
21387 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21388    everything bar the final return instruction.  If simple_return is true,
21389    then do not output epilogue, because it has already been emitted in RTL.
21390
21391    Note: do not forget to update length attribute of corresponding insn pattern
21392    when changing assembly output (eg. length attribute of
21393    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21394    register clearing sequences).  */
21395 const char *
21396 output_return_instruction (rtx operand, bool really_return, bool reverse,
21397                            bool simple_return)
21398 {
21399   char conditional[10];
21400   char instr[100];
21401   unsigned reg;
21402   unsigned long live_regs_mask;
21403   unsigned long func_type;
21404   arm_stack_offsets *offsets;
21405
21406   func_type = arm_current_func_type ();
21407
21408   if (IS_NAKED (func_type))
21409     return "";
21410
21411   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21412     {
21413       /* If this function was declared non-returning, and we have
21414          found a tail call, then we have to trust that the called
21415          function won't return.  */
21416       if (really_return)
21417         {
21418           rtx ops[2];
21419
21420           /* Otherwise, trap an attempted return by aborting.  */
21421           ops[0] = operand;
21422           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21423                                        : "abort");
21424           assemble_external_libcall (ops[1]);
21425           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21426         }
21427
21428       return "";
21429     }
21430
21431   gcc_assert (!cfun->calls_alloca || really_return);
21432
21433   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21434
21435   cfun->machine->return_used_this_function = 1;
21436
21437   offsets = arm_get_frame_offsets ();
21438   live_regs_mask = offsets->saved_regs_mask;
21439
21440   if (!simple_return && live_regs_mask)
21441     {
21442       const char * return_reg;
21443
21444       /* If we do not have any special requirements for function exit
21445          (e.g. interworking) then we can load the return address
21446          directly into the PC.  Otherwise we must load it into LR.  */
21447       if (really_return
21448           && !IS_CMSE_ENTRY (func_type)
21449           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21450         return_reg = reg_names[PC_REGNUM];
21451       else
21452         return_reg = reg_names[LR_REGNUM];
21453
21454       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21455         {
21456           /* There are three possible reasons for the IP register
21457              being saved.  1) a stack frame was created, in which case
21458              IP contains the old stack pointer, or 2) an ISR routine
21459              corrupted it, or 3) it was saved to align the stack on
21460              iWMMXt.  In case 1, restore IP into SP, otherwise just
21461              restore IP.  */
21462           if (frame_pointer_needed)
21463             {
21464               live_regs_mask &= ~ (1 << IP_REGNUM);
21465               live_regs_mask |=   (1 << SP_REGNUM);
21466             }
21467           else
21468             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21469         }
21470
21471       /* On some ARM architectures it is faster to use LDR rather than
21472          LDM to load a single register.  On other architectures, the
21473          cost is the same.  In 26 bit mode, or for exception handlers,
21474          we have to use LDM to load the PC so that the CPSR is also
21475          restored.  */
21476       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21477         if (live_regs_mask == (1U << reg))
21478           break;
21479
21480       if (reg <= LAST_ARM_REGNUM
21481           && (reg != LR_REGNUM
21482               || ! really_return
21483               || ! IS_INTERRUPT (func_type)))
21484         {
21485           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21486                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21487         }
21488       else
21489         {
21490           char *p;
21491           int first = 1;
21492
21493           /* Generate the load multiple instruction to restore the
21494              registers.  Note we can get here, even if
21495              frame_pointer_needed is true, but only if sp already
21496              points to the base of the saved core registers.  */
21497           if (live_regs_mask & (1 << SP_REGNUM))
21498             {
21499               unsigned HOST_WIDE_INT stack_adjust;
21500
21501               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21502               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21503
21504               if (stack_adjust && arm_arch5t && TARGET_ARM)
21505                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21506               else
21507                 {
21508                   /* If we can't use ldmib (SA110 bug),
21509                      then try to pop r3 instead.  */
21510                   if (stack_adjust)
21511                     live_regs_mask |= 1 << 3;
21512
21513                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21514                 }
21515             }
21516           /* For interrupt returns we have to use an LDM rather than
21517              a POP so that we can use the exception return variant.  */
21518           else if (IS_INTERRUPT (func_type))
21519             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21520           else
21521             sprintf (instr, "pop%s\t{", conditional);
21522
21523           p = instr + strlen (instr);
21524
21525           for (reg = 0; reg <= SP_REGNUM; reg++)
21526             if (live_regs_mask & (1 << reg))
21527               {
21528                 int l = strlen (reg_names[reg]);
21529
21530                 if (first)
21531                   first = 0;
21532                 else
21533                   {
21534                     memcpy (p, ", ", 2);
21535                     p += 2;
21536                   }
21537
21538                 memcpy (p, "%|", 2);
21539                 memcpy (p + 2, reg_names[reg], l);
21540                 p += l + 2;
21541               }
21542
21543           if (live_regs_mask & (1 << LR_REGNUM))
21544             {
21545               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21546               /* If returning from an interrupt, restore the CPSR.  */
21547               if (IS_INTERRUPT (func_type))
21548                 strcat (p, "^");
21549             }
21550           else
21551             strcpy (p, "}");
21552         }
21553
21554       output_asm_insn (instr, & operand);
21555
21556       /* See if we need to generate an extra instruction to
21557          perform the actual function return.  */
21558       if (really_return
21559           && func_type != ARM_FT_INTERWORKED
21560           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21561         {
21562           /* The return has already been handled
21563              by loading the LR into the PC.  */
21564           return "";
21565         }
21566     }
21567
21568   if (really_return)
21569     {
21570       switch ((int) ARM_FUNC_TYPE (func_type))
21571         {
21572         case ARM_FT_ISR:
21573         case ARM_FT_FIQ:
21574           /* ??? This is wrong for unified assembly syntax.  */
21575           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21576           break;
21577
21578         case ARM_FT_INTERWORKED:
21579           gcc_assert (arm_arch5t || arm_arch4t);
21580           sprintf (instr, "bx%s\t%%|lr", conditional);
21581           break;
21582
21583         case ARM_FT_EXCEPTION:
21584           /* ??? This is wrong for unified assembly syntax.  */
21585           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21586           break;
21587
21588         default:
21589           if (IS_CMSE_ENTRY (func_type))
21590             {
21591               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21592                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21593                  VSTR/VLDR instructions in the prologue and epilogue.  */
21594               if (!TARGET_HAVE_FPCXT_CMSE)
21595                 {
21596                   /* Check if we have to clear the 'GE bits' which is only used if
21597                      parallel add and subtraction instructions are available.  */
21598                   if (TARGET_INT_SIMD)
21599                     snprintf (instr, sizeof (instr),
21600                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21601                   else
21602                     snprintf (instr, sizeof (instr),
21603                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21604
21605                   output_asm_insn (instr, & operand);
21606                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21607                      care of it.  */
21608                   if (TARGET_HARD_FLOAT)
21609                     {
21610                       /* Clear the cumulative exception-status bits (0-4,7) and
21611                          the condition code bits (28-31) of the FPSCR.  We need
21612                          to remember to clear the first scratch register used
21613                          (IP) and save and restore the second (r4).
21614
21615                          Important note: the length of the
21616                          thumb2_cmse_entry_return insn pattern must account for
21617                          the size of the below instructions.  */
21618                       output_asm_insn ("push\t{%|r4}", & operand);
21619                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21620                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21621                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21622                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21623                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21624                       output_asm_insn ("pop\t{%|r4}", & operand);
21625                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21626                     }
21627                 }
21628               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21629             }
21630           /* Use bx if it's available.  */
21631           else if (arm_arch5t || arm_arch4t)
21632             sprintf (instr, "bx%s\t%%|lr", conditional);
21633           else
21634             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21635           break;
21636         }
21637
21638       output_asm_insn (instr, & operand);
21639     }
21640
21641   return "";
21642 }
21643
21644 /* Output in FILE asm statements needed to declare the NAME of the function
21645    defined by its DECL node.  */
21646
21647 void
21648 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21649 {
21650   size_t cmse_name_len;
21651   char *cmse_name = 0;
21652   char cmse_prefix[] = "__acle_se_";
21653
21654   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21655      extra function label for each function with the 'cmse_nonsecure_entry'
21656      attribute.  This extra function label should be prepended with
21657      '__acle_se_', telling the linker that it needs to create secure gateway
21658      veneers for this function.  */
21659   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21660                                     DECL_ATTRIBUTES (decl)))
21661     {
21662       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21663       cmse_name = XALLOCAVEC (char, cmse_name_len);
21664       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21665       targetm.asm_out.globalize_label (file, cmse_name);
21666
21667       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21668       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21669     }
21670
21671   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21672   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21673   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21674   ASM_OUTPUT_LABEL (file, name);
21675
21676   if (cmse_name)
21677     ASM_OUTPUT_LABEL (file, cmse_name);
21678
21679   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21680 }
21681
21682 /* Write the function name into the code section, directly preceding
21683    the function prologue.
21684
21685    Code will be output similar to this:
21686      t0
21687          .ascii "arm_poke_function_name", 0
21688          .align
21689      t1
21690          .word 0xff000000 + (t1 - t0)
21691      arm_poke_function_name
21692          mov     ip, sp
21693          stmfd   sp!, {fp, ip, lr, pc}
21694          sub     fp, ip, #4
21695
21696    When performing a stack backtrace, code can inspect the value
21697    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21698    at location pc - 12 and the top 8 bits are set, then we know
21699    that there is a function name embedded immediately preceding this
21700    location and has length ((pc[-3]) & 0xff000000).
21701
21702    We assume that pc is declared as a pointer to an unsigned long.
21703
21704    It is of no benefit to output the function name if we are assembling
21705    a leaf function.  These function types will not contain a stack
21706    backtrace structure, therefore it is not possible to determine the
21707    function name.  */
21708 void
21709 arm_poke_function_name (FILE *stream, const char *name)
21710 {
21711   unsigned long alignlength;
21712   unsigned long length;
21713   rtx           x;
21714
21715   length      = strlen (name) + 1;
21716   alignlength = ROUND_UP_WORD (length);
21717
21718   ASM_OUTPUT_ASCII (stream, name, length);
21719   ASM_OUTPUT_ALIGN (stream, 2);
21720   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21721   assemble_aligned_integer (UNITS_PER_WORD, x);
21722 }
21723
21724 /* Place some comments into the assembler stream
21725    describing the current function.  */
21726 static void
21727 arm_output_function_prologue (FILE *f)
21728 {
21729   unsigned long func_type;
21730
21731   /* Sanity check.  */
21732   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21733
21734   func_type = arm_current_func_type ();
21735
21736   switch ((int) ARM_FUNC_TYPE (func_type))
21737     {
21738     default:
21739     case ARM_FT_NORMAL:
21740       break;
21741     case ARM_FT_INTERWORKED:
21742       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21743       break;
21744     case ARM_FT_ISR:
21745       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21746       break;
21747     case ARM_FT_FIQ:
21748       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21749       break;
21750     case ARM_FT_EXCEPTION:
21751       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21752       break;
21753     }
21754
21755   if (IS_NAKED (func_type))
21756     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21757
21758   if (IS_VOLATILE (func_type))
21759     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21760
21761   if (IS_NESTED (func_type))
21762     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21763   if (IS_STACKALIGN (func_type))
21764     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21765   if (IS_CMSE_ENTRY (func_type))
21766     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21767
21768   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21769                (HOST_WIDE_INT) crtl->args.size,
21770                crtl->args.pretend_args_size,
21771                (HOST_WIDE_INT) get_frame_size ());
21772
21773   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21774                frame_pointer_needed,
21775                cfun->machine->uses_anonymous_args);
21776
21777   if (cfun->machine->lr_save_eliminated)
21778     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21779
21780   if (crtl->calls_eh_return)
21781     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21782
21783 }
21784
21785 static void
21786 arm_output_function_epilogue (FILE *)
21787 {
21788   arm_stack_offsets *offsets;
21789
21790   if (TARGET_THUMB1)
21791     {
21792       int regno;
21793
21794       /* Emit any call-via-reg trampolines that are needed for v4t support
21795          of call_reg and call_value_reg type insns.  */
21796       for (regno = 0; regno < LR_REGNUM; regno++)
21797         {
21798           rtx label = cfun->machine->call_via[regno];
21799
21800           if (label != NULL)
21801             {
21802               switch_to_section (function_section (current_function_decl));
21803               targetm.asm_out.internal_label (asm_out_file, "L",
21804                                               CODE_LABEL_NUMBER (label));
21805               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21806             }
21807         }
21808
21809       /* ??? Probably not safe to set this here, since it assumes that a
21810          function will be emitted as assembly immediately after we generate
21811          RTL for it.  This does not happen for inline functions.  */
21812       cfun->machine->return_used_this_function = 0;
21813     }
21814   else /* TARGET_32BIT */
21815     {
21816       /* We need to take into account any stack-frame rounding.  */
21817       offsets = arm_get_frame_offsets ();
21818
21819       gcc_assert (!use_return_insn (FALSE, NULL)
21820                   || (cfun->machine->return_used_this_function != 0)
21821                   || offsets->saved_regs == offsets->outgoing_args
21822                   || frame_pointer_needed);
21823     }
21824 }
21825
21826 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21827    STR and STRD.  If an even number of registers are being pushed, one
21828    or more STRD patterns are created for each register pair.  If an
21829    odd number of registers are pushed, emit an initial STR followed by
21830    as many STRD instructions as are needed.  This works best when the
21831    stack is initially 64-bit aligned (the normal case), since it
21832    ensures that each STRD is also 64-bit aligned.  */
21833 static void
21834 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21835 {
21836   int num_regs = 0;
21837   int i;
21838   int regno;
21839   rtx par = NULL_RTX;
21840   rtx dwarf = NULL_RTX;
21841   rtx tmp;
21842   bool first = true;
21843
21844   num_regs = bit_count (saved_regs_mask);
21845
21846   /* Must be at least one register to save, and can't save SP or PC.  */
21847   gcc_assert (num_regs > 0 && num_regs <= 14);
21848   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21849   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21850
21851   /* Create sequence for DWARF info.  All the frame-related data for
21852      debugging is held in this wrapper.  */
21853   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21854
21855   /* Describe the stack adjustment.  */
21856   tmp = gen_rtx_SET (stack_pointer_rtx,
21857                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21858   RTX_FRAME_RELATED_P (tmp) = 1;
21859   XVECEXP (dwarf, 0, 0) = tmp;
21860
21861   /* Find the first register.  */
21862   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21863     ;
21864
21865   i = 0;
21866
21867   /* If there's an odd number of registers to push.  Start off by
21868      pushing a single register.  This ensures that subsequent strd
21869      operations are dword aligned (assuming that SP was originally
21870      64-bit aligned).  */
21871   if ((num_regs & 1) != 0)
21872     {
21873       rtx reg, mem, insn;
21874
21875       reg = gen_rtx_REG (SImode, regno);
21876       if (num_regs == 1)
21877         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21878                                                      stack_pointer_rtx));
21879       else
21880         mem = gen_frame_mem (Pmode,
21881                              gen_rtx_PRE_MODIFY
21882                              (Pmode, stack_pointer_rtx,
21883                               plus_constant (Pmode, stack_pointer_rtx,
21884                                              -4 * num_regs)));
21885
21886       tmp = gen_rtx_SET (mem, reg);
21887       RTX_FRAME_RELATED_P (tmp) = 1;
21888       insn = emit_insn (tmp);
21889       RTX_FRAME_RELATED_P (insn) = 1;
21890       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21891       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21892       RTX_FRAME_RELATED_P (tmp) = 1;
21893       i++;
21894       regno++;
21895       XVECEXP (dwarf, 0, i) = tmp;
21896       first = false;
21897     }
21898
21899   while (i < num_regs)
21900     if (saved_regs_mask & (1 << regno))
21901       {
21902         rtx reg1, reg2, mem1, mem2;
21903         rtx tmp0, tmp1, tmp2;
21904         int regno2;
21905
21906         /* Find the register to pair with this one.  */
21907         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21908              regno2++)
21909           ;
21910
21911         reg1 = gen_rtx_REG (SImode, regno);
21912         reg2 = gen_rtx_REG (SImode, regno2);
21913
21914         if (first)
21915           {
21916             rtx insn;
21917
21918             first = false;
21919             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21920                                                         stack_pointer_rtx,
21921                                                         -4 * num_regs));
21922             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21923                                                         stack_pointer_rtx,
21924                                                         -4 * (num_regs - 1)));
21925             tmp0 = gen_rtx_SET (stack_pointer_rtx,
21926                                 plus_constant (Pmode, stack_pointer_rtx,
21927                                                -4 * (num_regs)));
21928             tmp1 = gen_rtx_SET (mem1, reg1);
21929             tmp2 = gen_rtx_SET (mem2, reg2);
21930             RTX_FRAME_RELATED_P (tmp0) = 1;
21931             RTX_FRAME_RELATED_P (tmp1) = 1;
21932             RTX_FRAME_RELATED_P (tmp2) = 1;
21933             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21934             XVECEXP (par, 0, 0) = tmp0;
21935             XVECEXP (par, 0, 1) = tmp1;
21936             XVECEXP (par, 0, 2) = tmp2;
21937             insn = emit_insn (par);
21938             RTX_FRAME_RELATED_P (insn) = 1;
21939             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21940           }
21941         else
21942           {
21943             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21944                                                         stack_pointer_rtx,
21945                                                         4 * i));
21946             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21947                                                         stack_pointer_rtx,
21948                                                         4 * (i + 1)));
21949             tmp1 = gen_rtx_SET (mem1, reg1);
21950             tmp2 = gen_rtx_SET (mem2, reg2);
21951             RTX_FRAME_RELATED_P (tmp1) = 1;
21952             RTX_FRAME_RELATED_P (tmp2) = 1;
21953             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21954             XVECEXP (par, 0, 0) = tmp1;
21955             XVECEXP (par, 0, 1) = tmp2;
21956             emit_insn (par);
21957           }
21958
21959         /* Create unwind information.  This is an approximation.  */
21960         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21961                                            plus_constant (Pmode,
21962                                                           stack_pointer_rtx,
21963                                                           4 * i)),
21964                             reg1);
21965         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21966                                            plus_constant (Pmode,
21967                                                           stack_pointer_rtx,
21968                                                           4 * (i + 1))),
21969                             reg2);
21970
21971         RTX_FRAME_RELATED_P (tmp1) = 1;
21972         RTX_FRAME_RELATED_P (tmp2) = 1;
21973         XVECEXP (dwarf, 0, i + 1) = tmp1;
21974         XVECEXP (dwarf, 0, i + 2) = tmp2;
21975         i += 2;
21976         regno = regno2 + 1;
21977       }
21978     else
21979       regno++;
21980
21981   return;
21982 }
21983
21984 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21985    whenever possible, otherwise it emits single-word stores.  The first store
21986    also allocates stack space for all saved registers, using writeback with
21987    post-addressing mode.  All other stores use offset addressing.  If no STRD
21988    can be emitted, this function emits a sequence of single-word stores,
21989    and not an STM as before, because single-word stores provide more freedom
21990    scheduling and can be turned into an STM by peephole optimizations.  */
21991 static void
21992 arm_emit_strd_push (unsigned long saved_regs_mask)
21993 {
21994   int num_regs = 0;
21995   int i, j, dwarf_index  = 0;
21996   int offset = 0;
21997   rtx dwarf = NULL_RTX;
21998   rtx insn = NULL_RTX;
21999   rtx tmp, mem;
22000
22001   /* TODO: A more efficient code can be emitted by changing the
22002      layout, e.g., first push all pairs that can use STRD to keep the
22003      stack aligned, and then push all other registers.  */
22004   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22005     if (saved_regs_mask & (1 << i))
22006       num_regs++;
22007
22008   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22009   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22010   gcc_assert (num_regs > 0);
22011
22012   /* Create sequence for DWARF info.  */
22013   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22014
22015   /* For dwarf info, we generate explicit stack update.  */
22016   tmp = gen_rtx_SET (stack_pointer_rtx,
22017                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22018   RTX_FRAME_RELATED_P (tmp) = 1;
22019   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22020
22021   /* Save registers.  */
22022   offset = - 4 * num_regs;
22023   j = 0;
22024   while (j <= LAST_ARM_REGNUM)
22025     if (saved_regs_mask & (1 << j))
22026       {
22027         if ((j % 2 == 0)
22028             && (saved_regs_mask & (1 << (j + 1))))
22029           {
22030             /* Current register and previous register form register pair for
22031                which STRD can be generated.  */
22032             if (offset < 0)
22033               {
22034                 /* Allocate stack space for all saved registers.  */
22035                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22036                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22037                 mem = gen_frame_mem (DImode, tmp);
22038                 offset = 0;
22039               }
22040             else if (offset > 0)
22041               mem = gen_frame_mem (DImode,
22042                                    plus_constant (Pmode,
22043                                                   stack_pointer_rtx,
22044                                                   offset));
22045             else
22046               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22047
22048             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22049             RTX_FRAME_RELATED_P (tmp) = 1;
22050             tmp = emit_insn (tmp);
22051
22052             /* Record the first store insn.  */
22053             if (dwarf_index == 1)
22054               insn = tmp;
22055
22056             /* Generate dwarf info.  */
22057             mem = gen_frame_mem (SImode,
22058                                  plus_constant (Pmode,
22059                                                 stack_pointer_rtx,
22060                                                 offset));
22061             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22062             RTX_FRAME_RELATED_P (tmp) = 1;
22063             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22064
22065             mem = gen_frame_mem (SImode,
22066                                  plus_constant (Pmode,
22067                                                 stack_pointer_rtx,
22068                                                 offset + 4));
22069             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22070             RTX_FRAME_RELATED_P (tmp) = 1;
22071             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22072
22073             offset += 8;
22074             j += 2;
22075           }
22076         else
22077           {
22078             /* Emit a single word store.  */
22079             if (offset < 0)
22080               {
22081                 /* Allocate stack space for all saved registers.  */
22082                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22083                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22084                 mem = gen_frame_mem (SImode, tmp);
22085                 offset = 0;
22086               }
22087             else if (offset > 0)
22088               mem = gen_frame_mem (SImode,
22089                                    plus_constant (Pmode,
22090                                                   stack_pointer_rtx,
22091                                                   offset));
22092             else
22093               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22094
22095             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22096             RTX_FRAME_RELATED_P (tmp) = 1;
22097             tmp = emit_insn (tmp);
22098
22099             /* Record the first store insn.  */
22100             if (dwarf_index == 1)
22101               insn = tmp;
22102
22103             /* Generate dwarf info.  */
22104             mem = gen_frame_mem (SImode,
22105                                  plus_constant(Pmode,
22106                                                stack_pointer_rtx,
22107                                                offset));
22108             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22109             RTX_FRAME_RELATED_P (tmp) = 1;
22110             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22111
22112             offset += 4;
22113             j += 1;
22114           }
22115       }
22116     else
22117       j++;
22118
22119   /* Attach dwarf info to the first insn we generate.  */
22120   gcc_assert (insn != NULL_RTX);
22121   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22122   RTX_FRAME_RELATED_P (insn) = 1;
22123 }
22124
22125 /* Generate and emit an insn that we will recognize as a push_multi.
22126    Unfortunately, since this insn does not reflect very well the actual
22127    semantics of the operation, we need to annotate the insn for the benefit
22128    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22129    MASK for registers that should be annotated for DWARF2 frame unwind
22130    information.  */
22131 static rtx
22132 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22133 {
22134   int num_regs = 0;
22135   int num_dwarf_regs = 0;
22136   int i, j;
22137   rtx par;
22138   rtx dwarf;
22139   int dwarf_par_index;
22140   rtx tmp, reg;
22141
22142   /* We don't record the PC in the dwarf frame information.  */
22143   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22144
22145   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22146     {
22147       if (mask & (1 << i))
22148         num_regs++;
22149       if (dwarf_regs_mask & (1 << i))
22150         num_dwarf_regs++;
22151     }
22152
22153   gcc_assert (num_regs && num_regs <= 16);
22154   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22155
22156   /* For the body of the insn we are going to generate an UNSPEC in
22157      parallel with several USEs.  This allows the insn to be recognized
22158      by the push_multi pattern in the arm.md file.
22159
22160      The body of the insn looks something like this:
22161
22162        (parallel [
22163            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22164                                         (const_int:SI <num>)))
22165                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22166            (use (reg:SI XX))
22167            (use (reg:SI YY))
22168            ...
22169         ])
22170
22171      For the frame note however, we try to be more explicit and actually
22172      show each register being stored into the stack frame, plus a (single)
22173      decrement of the stack pointer.  We do it this way in order to be
22174      friendly to the stack unwinding code, which only wants to see a single
22175      stack decrement per instruction.  The RTL we generate for the note looks
22176      something like this:
22177
22178       (sequence [
22179            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22180            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22181            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22182            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22183            ...
22184         ])
22185
22186      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22187      instead we'd have a parallel expression detailing all
22188      the stores to the various memory addresses so that debug
22189      information is more up-to-date. Remember however while writing
22190      this to take care of the constraints with the push instruction.
22191
22192      Note also that this has to be taken care of for the VFP registers.
22193
22194      For more see PR43399.  */
22195
22196   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22197   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22198   dwarf_par_index = 1;
22199
22200   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22201     {
22202       if (mask & (1 << i))
22203         {
22204           reg = gen_rtx_REG (SImode, i);
22205
22206           XVECEXP (par, 0, 0)
22207             = gen_rtx_SET (gen_frame_mem
22208                            (BLKmode,
22209                             gen_rtx_PRE_MODIFY (Pmode,
22210                                                 stack_pointer_rtx,
22211                                                 plus_constant
22212                                                 (Pmode, stack_pointer_rtx,
22213                                                  -4 * num_regs))
22214                             ),
22215                            gen_rtx_UNSPEC (BLKmode,
22216                                            gen_rtvec (1, reg),
22217                                            UNSPEC_PUSH_MULT));
22218
22219           if (dwarf_regs_mask & (1 << i))
22220             {
22221               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22222                                  reg);
22223               RTX_FRAME_RELATED_P (tmp) = 1;
22224               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22225             }
22226
22227           break;
22228         }
22229     }
22230
22231   for (j = 1, i++; j < num_regs; i++)
22232     {
22233       if (mask & (1 << i))
22234         {
22235           reg = gen_rtx_REG (SImode, i);
22236
22237           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22238
22239           if (dwarf_regs_mask & (1 << i))
22240             {
22241               tmp
22242                 = gen_rtx_SET (gen_frame_mem
22243                                (SImode,
22244                                 plus_constant (Pmode, stack_pointer_rtx,
22245                                                4 * j)),
22246                                reg);
22247               RTX_FRAME_RELATED_P (tmp) = 1;
22248               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22249             }
22250
22251           j++;
22252         }
22253     }
22254
22255   par = emit_insn (par);
22256
22257   tmp = gen_rtx_SET (stack_pointer_rtx,
22258                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22259   RTX_FRAME_RELATED_P (tmp) = 1;
22260   XVECEXP (dwarf, 0, 0) = tmp;
22261
22262   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22263
22264   return par;
22265 }
22266
22267 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22268    SIZE is the offset to be adjusted.
22269    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22270 static void
22271 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22272 {
22273   rtx dwarf;
22274
22275   RTX_FRAME_RELATED_P (insn) = 1;
22276   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22277   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22278 }
22279
22280 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22281    SAVED_REGS_MASK shows which registers need to be restored.
22282
22283    Unfortunately, since this insn does not reflect very well the actual
22284    semantics of the operation, we need to annotate the insn for the benefit
22285    of DWARF2 frame unwind information.  */
22286 static void
22287 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22288 {
22289   int num_regs = 0;
22290   int i, j;
22291   rtx par;
22292   rtx dwarf = NULL_RTX;
22293   rtx tmp, reg;
22294   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22295   int offset_adj;
22296   int emit_update;
22297
22298   offset_adj = return_in_pc ? 1 : 0;
22299   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22300     if (saved_regs_mask & (1 << i))
22301       num_regs++;
22302
22303   gcc_assert (num_regs && num_regs <= 16);
22304
22305   /* If SP is in reglist, then we don't emit SP update insn.  */
22306   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22307
22308   /* The parallel needs to hold num_regs SETs
22309      and one SET for the stack update.  */
22310   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22311
22312   if (return_in_pc)
22313     XVECEXP (par, 0, 0) = ret_rtx;
22314
22315   if (emit_update)
22316     {
22317       /* Increment the stack pointer, based on there being
22318          num_regs 4-byte registers to restore.  */
22319       tmp = gen_rtx_SET (stack_pointer_rtx,
22320                          plus_constant (Pmode,
22321                                         stack_pointer_rtx,
22322                                         4 * num_regs));
22323       RTX_FRAME_RELATED_P (tmp) = 1;
22324       XVECEXP (par, 0, offset_adj) = tmp;
22325     }
22326
22327   /* Now restore every reg, which may include PC.  */
22328   for (j = 0, i = 0; j < num_regs; i++)
22329     if (saved_regs_mask & (1 << i))
22330       {
22331         reg = gen_rtx_REG (SImode, i);
22332         if ((num_regs == 1) && emit_update && !return_in_pc)
22333           {
22334             /* Emit single load with writeback.  */
22335             tmp = gen_frame_mem (SImode,
22336                                  gen_rtx_POST_INC (Pmode,
22337                                                    stack_pointer_rtx));
22338             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22339             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22340             return;
22341           }
22342
22343         tmp = gen_rtx_SET (reg,
22344                            gen_frame_mem
22345                            (SImode,
22346                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22347         RTX_FRAME_RELATED_P (tmp) = 1;
22348         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22349
22350         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22351            should not have PC, skip PC.  */
22352         if (i != PC_REGNUM)
22353           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22354
22355         j++;
22356       }
22357
22358   if (return_in_pc)
22359     par = emit_jump_insn (par);
22360   else
22361     par = emit_insn (par);
22362
22363   REG_NOTES (par) = dwarf;
22364   if (!return_in_pc)
22365     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22366                                  stack_pointer_rtx, stack_pointer_rtx);
22367 }
22368
22369 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22370    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22371
22372    Unfortunately, since this insn does not reflect very well the actual
22373    semantics of the operation, we need to annotate the insn for the benefit
22374    of DWARF2 frame unwind information.  */
22375 static void
22376 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22377 {
22378   int i, j;
22379   rtx par;
22380   rtx dwarf = NULL_RTX;
22381   rtx tmp, reg;
22382
22383   gcc_assert (num_regs && num_regs <= 32);
22384
22385     /* Workaround ARM10 VFPr1 bug.  */
22386   if (num_regs == 2 && !arm_arch6)
22387     {
22388       if (first_reg == 15)
22389         first_reg--;
22390
22391       num_regs++;
22392     }
22393
22394   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22395      there could be up to 32 D-registers to restore.
22396      If there are more than 16 D-registers, make two recursive calls,
22397      each of which emits one pop_multi instruction.  */
22398   if (num_regs > 16)
22399     {
22400       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22401       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22402       return;
22403     }
22404
22405   /* The parallel needs to hold num_regs SETs
22406      and one SET for the stack update.  */
22407   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22408
22409   /* Increment the stack pointer, based on there being
22410      num_regs 8-byte registers to restore.  */
22411   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22412   RTX_FRAME_RELATED_P (tmp) = 1;
22413   XVECEXP (par, 0, 0) = tmp;
22414
22415   /* Now show every reg that will be restored, using a SET for each.  */
22416   for (j = 0, i=first_reg; j < num_regs; i += 2)
22417     {
22418       reg = gen_rtx_REG (DFmode, i);
22419
22420       tmp = gen_rtx_SET (reg,
22421                          gen_frame_mem
22422                          (DFmode,
22423                           plus_constant (Pmode, base_reg, 8 * j)));
22424       RTX_FRAME_RELATED_P (tmp) = 1;
22425       XVECEXP (par, 0, j + 1) = tmp;
22426
22427       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22428
22429       j++;
22430     }
22431
22432   par = emit_insn (par);
22433   REG_NOTES (par) = dwarf;
22434
22435   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22436   if (REGNO (base_reg) == IP_REGNUM)
22437     {
22438       RTX_FRAME_RELATED_P (par) = 1;
22439       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22440     }
22441   else
22442     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22443                                  base_reg, base_reg);
22444 }
22445
22446 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22447    number of registers are being popped, multiple LDRD patterns are created for
22448    all register pairs.  If odd number of registers are popped, last register is
22449    loaded by using LDR pattern.  */
22450 static void
22451 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22452 {
22453   int num_regs = 0;
22454   int i, j;
22455   rtx par = NULL_RTX;
22456   rtx dwarf = NULL_RTX;
22457   rtx tmp, reg, tmp1;
22458   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22459
22460   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22461     if (saved_regs_mask & (1 << i))
22462       num_regs++;
22463
22464   gcc_assert (num_regs && num_regs <= 16);
22465
22466   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22467      to be popped.  So, if num_regs is even, now it will become odd,
22468      and we can generate pop with PC.  If num_regs is odd, it will be
22469      even now, and ldr with return can be generated for PC.  */
22470   if (return_in_pc)
22471     num_regs--;
22472
22473   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22474
22475   /* Var j iterates over all the registers to gather all the registers in
22476      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22477      A PARALLEL RTX of register-pair is created here, so that pattern for
22478      LDRD can be matched.  As PC is always last register to be popped, and
22479      we have already decremented num_regs if PC, we don't have to worry
22480      about PC in this loop.  */
22481   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22482     if (saved_regs_mask & (1 << j))
22483       {
22484         /* Create RTX for memory load.  */
22485         reg = gen_rtx_REG (SImode, j);
22486         tmp = gen_rtx_SET (reg,
22487                            gen_frame_mem (SImode,
22488                                plus_constant (Pmode,
22489                                               stack_pointer_rtx, 4 * i)));
22490         RTX_FRAME_RELATED_P (tmp) = 1;
22491
22492         if (i % 2 == 0)
22493           {
22494             /* When saved-register index (i) is even, the RTX to be emitted is
22495                yet to be created.  Hence create it first.  The LDRD pattern we
22496                are generating is :
22497                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22498                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22499                where target registers need not be consecutive.  */
22500             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22501             dwarf = NULL_RTX;
22502           }
22503
22504         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22505            added as 0th element and if i is odd, reg_i is added as 1st element
22506            of LDRD pattern shown above.  */
22507         XVECEXP (par, 0, (i % 2)) = tmp;
22508         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22509
22510         if ((i % 2) == 1)
22511           {
22512             /* When saved-register index (i) is odd, RTXs for both the registers
22513                to be loaded are generated in above given LDRD pattern, and the
22514                pattern can be emitted now.  */
22515             par = emit_insn (par);
22516             REG_NOTES (par) = dwarf;
22517             RTX_FRAME_RELATED_P (par) = 1;
22518           }
22519
22520         i++;
22521       }
22522
22523   /* If the number of registers pushed is odd AND return_in_pc is false OR
22524      number of registers are even AND return_in_pc is true, last register is
22525      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22526      then LDR with post increment.  */
22527
22528   /* Increment the stack pointer, based on there being
22529      num_regs 4-byte registers to restore.  */
22530   tmp = gen_rtx_SET (stack_pointer_rtx,
22531                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22532   RTX_FRAME_RELATED_P (tmp) = 1;
22533   tmp = emit_insn (tmp);
22534   if (!return_in_pc)
22535     {
22536       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22537                                    stack_pointer_rtx, stack_pointer_rtx);
22538     }
22539
22540   dwarf = NULL_RTX;
22541
22542   if (((num_regs % 2) == 1 && !return_in_pc)
22543       || ((num_regs % 2) == 0 && return_in_pc))
22544     {
22545       /* Scan for the single register to be popped.  Skip until the saved
22546          register is found.  */
22547       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22548
22549       /* Gen LDR with post increment here.  */
22550       tmp1 = gen_rtx_MEM (SImode,
22551                           gen_rtx_POST_INC (SImode,
22552                                             stack_pointer_rtx));
22553       set_mem_alias_set (tmp1, get_frame_alias_set ());
22554
22555       reg = gen_rtx_REG (SImode, j);
22556       tmp = gen_rtx_SET (reg, tmp1);
22557       RTX_FRAME_RELATED_P (tmp) = 1;
22558       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22559
22560       if (return_in_pc)
22561         {
22562           /* If return_in_pc, j must be PC_REGNUM.  */
22563           gcc_assert (j == PC_REGNUM);
22564           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22565           XVECEXP (par, 0, 0) = ret_rtx;
22566           XVECEXP (par, 0, 1) = tmp;
22567           par = emit_jump_insn (par);
22568         }
22569       else
22570         {
22571           par = emit_insn (tmp);
22572           REG_NOTES (par) = dwarf;
22573           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22574                                        stack_pointer_rtx, stack_pointer_rtx);
22575         }
22576
22577     }
22578   else if ((num_regs % 2) == 1 && return_in_pc)
22579     {
22580       /* There are 2 registers to be popped.  So, generate the pattern
22581          pop_multiple_with_stack_update_and_return to pop in PC.  */
22582       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22583     }
22584
22585   return;
22586 }
22587
22588 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22589    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22590    offset addressing and then generates one separate stack udpate. This provides
22591    more scheduling freedom, compared to writeback on every load.  However,
22592    if the function returns using load into PC directly
22593    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22594    before the last load.  TODO: Add a peephole optimization to recognize
22595    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22596    peephole optimization to merge the load at stack-offset zero
22597    with the stack update instruction using load with writeback
22598    in post-index addressing mode.  */
22599 static void
22600 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22601 {
22602   int j = 0;
22603   int offset = 0;
22604   rtx par = NULL_RTX;
22605   rtx dwarf = NULL_RTX;
22606   rtx tmp, mem;
22607
22608   /* Restore saved registers.  */
22609   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22610   j = 0;
22611   while (j <= LAST_ARM_REGNUM)
22612     if (saved_regs_mask & (1 << j))
22613       {
22614         if ((j % 2) == 0
22615             && (saved_regs_mask & (1 << (j + 1)))
22616             && (j + 1) != PC_REGNUM)
22617           {
22618             /* Current register and next register form register pair for which
22619                LDRD can be generated. PC is always the last register popped, and
22620                we handle it separately.  */
22621             if (offset > 0)
22622               mem = gen_frame_mem (DImode,
22623                                    plus_constant (Pmode,
22624                                                   stack_pointer_rtx,
22625                                                   offset));
22626             else
22627               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22628
22629             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22630             tmp = emit_insn (tmp);
22631             RTX_FRAME_RELATED_P (tmp) = 1;
22632
22633             /* Generate dwarf info.  */
22634
22635             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22636                                     gen_rtx_REG (SImode, j),
22637                                     NULL_RTX);
22638             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22639                                     gen_rtx_REG (SImode, j + 1),
22640                                     dwarf);
22641
22642             REG_NOTES (tmp) = dwarf;
22643
22644             offset += 8;
22645             j += 2;
22646           }
22647         else if (j != PC_REGNUM)
22648           {
22649             /* Emit a single word load.  */
22650             if (offset > 0)
22651               mem = gen_frame_mem (SImode,
22652                                    plus_constant (Pmode,
22653                                                   stack_pointer_rtx,
22654                                                   offset));
22655             else
22656               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22657
22658             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22659             tmp = emit_insn (tmp);
22660             RTX_FRAME_RELATED_P (tmp) = 1;
22661
22662             /* Generate dwarf info.  */
22663             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22664                                               gen_rtx_REG (SImode, j),
22665                                               NULL_RTX);
22666
22667             offset += 4;
22668             j += 1;
22669           }
22670         else /* j == PC_REGNUM */
22671           j++;
22672       }
22673     else
22674       j++;
22675
22676   /* Update the stack.  */
22677   if (offset > 0)
22678     {
22679       tmp = gen_rtx_SET (stack_pointer_rtx,
22680                          plus_constant (Pmode,
22681                                         stack_pointer_rtx,
22682                                         offset));
22683       tmp = emit_insn (tmp);
22684       arm_add_cfa_adjust_cfa_note (tmp, offset,
22685                                    stack_pointer_rtx, stack_pointer_rtx);
22686       offset = 0;
22687     }
22688
22689   if (saved_regs_mask & (1 << PC_REGNUM))
22690     {
22691       /* Only PC is to be popped.  */
22692       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22693       XVECEXP (par, 0, 0) = ret_rtx;
22694       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22695                          gen_frame_mem (SImode,
22696                                         gen_rtx_POST_INC (SImode,
22697                                                           stack_pointer_rtx)));
22698       RTX_FRAME_RELATED_P (tmp) = 1;
22699       XVECEXP (par, 0, 1) = tmp;
22700       par = emit_jump_insn (par);
22701
22702       /* Generate dwarf info.  */
22703       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22704                               gen_rtx_REG (SImode, PC_REGNUM),
22705                               NULL_RTX);
22706       REG_NOTES (par) = dwarf;
22707       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22708                                    stack_pointer_rtx, stack_pointer_rtx);
22709     }
22710 }
22711
22712 /* Calculate the size of the return value that is passed in registers.  */
22713 static unsigned
22714 arm_size_return_regs (void)
22715 {
22716   machine_mode mode;
22717
22718   if (crtl->return_rtx != 0)
22719     mode = GET_MODE (crtl->return_rtx);
22720   else
22721     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22722
22723   return GET_MODE_SIZE (mode);
22724 }
22725
22726 /* Return true if the current function needs to save/restore LR.  */
22727 static bool
22728 thumb_force_lr_save (void)
22729 {
22730   return !cfun->machine->lr_save_eliminated
22731          && (!crtl->is_leaf
22732              || thumb_far_jump_used_p ()
22733              || df_regs_ever_live_p (LR_REGNUM));
22734 }
22735
22736 /* We do not know if r3 will be available because
22737    we do have an indirect tailcall happening in this
22738    particular case.  */
22739 static bool
22740 is_indirect_tailcall_p (rtx call)
22741 {
22742   rtx pat = PATTERN (call);
22743
22744   /* Indirect tail call.  */
22745   pat = XVECEXP (pat, 0, 0);
22746   if (GET_CODE (pat) == SET)
22747     pat = SET_SRC (pat);
22748
22749   pat = XEXP (XEXP (pat, 0), 0);
22750   return REG_P (pat);
22751 }
22752
22753 /* Return true if r3 is used by any of the tail call insns in the
22754    current function.  */
22755 static bool
22756 any_sibcall_could_use_r3 (void)
22757 {
22758   edge_iterator ei;
22759   edge e;
22760
22761   if (!crtl->tail_call_emit)
22762     return false;
22763   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22764     if (e->flags & EDGE_SIBCALL)
22765       {
22766         rtx_insn *call = BB_END (e->src);
22767         if (!CALL_P (call))
22768           call = prev_nonnote_nondebug_insn (call);
22769         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22770         if (find_regno_fusage (call, USE, 3)
22771             || is_indirect_tailcall_p (call))
22772           return true;
22773       }
22774   return false;
22775 }
22776
22777
22778 /* Compute the distance from register FROM to register TO.
22779    These can be the arg pointer (26), the soft frame pointer (25),
22780    the stack pointer (13) or the hard frame pointer (11).
22781    In thumb mode r7 is used as the soft frame pointer, if needed.
22782    Typical stack layout looks like this:
22783
22784        old stack pointer -> |    |
22785                              ----
22786                             |    | \
22787                             |    |   saved arguments for
22788                             |    |   vararg functions
22789                             |    | /
22790                               --
22791    hard FP & arg pointer -> |    | \
22792                             |    |   stack
22793                             |    |   frame
22794                             |    | /
22795                               --
22796                             |    | \
22797                             |    |   call saved
22798                             |    |   registers
22799       soft frame pointer -> |    | /
22800                               --
22801                             |    | \
22802                             |    |   local
22803                             |    |   variables
22804      locals base pointer -> |    | /
22805                               --
22806                             |    | \
22807                             |    |   outgoing
22808                             |    |   arguments
22809    current stack pointer -> |    | /
22810                               --
22811
22812   For a given function some or all of these stack components
22813   may not be needed, giving rise to the possibility of
22814   eliminating some of the registers.
22815
22816   The values returned by this function must reflect the behavior
22817   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22818
22819   The sign of the number returned reflects the direction of stack
22820   growth, so the values are positive for all eliminations except
22821   from the soft frame pointer to the hard frame pointer.
22822
22823   SFP may point just inside the local variables block to ensure correct
22824   alignment.  */
22825
22826
22827 /* Return cached stack offsets.  */
22828
22829 static arm_stack_offsets *
22830 arm_get_frame_offsets (void)
22831 {
22832   struct arm_stack_offsets *offsets;
22833
22834   offsets = &cfun->machine->stack_offsets;
22835
22836   return offsets;
22837 }
22838
22839
22840 /* Calculate stack offsets.  These are used to calculate register elimination
22841    offsets and in prologue/epilogue code.  Also calculates which registers
22842    should be saved.  */
22843
22844 static void
22845 arm_compute_frame_layout (void)
22846 {
22847   struct arm_stack_offsets *offsets;
22848   unsigned long func_type;
22849   int saved;
22850   int core_saved;
22851   HOST_WIDE_INT frame_size;
22852   int i;
22853
22854   offsets = &cfun->machine->stack_offsets;
22855
22856   /* Initially this is the size of the local variables.  It will translated
22857      into an offset once we have determined the size of preceding data.  */
22858   frame_size = ROUND_UP_WORD (get_frame_size ());
22859
22860   /* Space for variadic functions.  */
22861   offsets->saved_args = crtl->args.pretend_args_size;
22862
22863   /* In Thumb mode this is incorrect, but never used.  */
22864   offsets->frame
22865     = (offsets->saved_args
22866        + arm_compute_static_chain_stack_bytes ()
22867        + (frame_pointer_needed ? 4 : 0));
22868
22869   if (TARGET_32BIT)
22870     {
22871       unsigned int regno;
22872
22873       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22874       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22875       saved = core_saved;
22876
22877       /* We know that SP will be doubleword aligned on entry, and we must
22878          preserve that condition at any subroutine call.  We also require the
22879          soft frame pointer to be doubleword aligned.  */
22880
22881       if (TARGET_REALLY_IWMMXT)
22882         {
22883           /* Check for the call-saved iWMMXt registers.  */
22884           for (regno = FIRST_IWMMXT_REGNUM;
22885                regno <= LAST_IWMMXT_REGNUM;
22886                regno++)
22887             if (reg_needs_saving_p (regno))
22888               saved += 8;
22889         }
22890
22891       func_type = arm_current_func_type ();
22892       /* Space for saved VFP registers.  */
22893       if (! IS_VOLATILE (func_type)
22894           && TARGET_VFP_BASE)
22895         saved += arm_get_vfp_saved_size ();
22896
22897       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22898          nonecure entry functions with VSTR/VLDR.  */
22899       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22900         saved += 4;
22901     }
22902   else /* TARGET_THUMB1 */
22903     {
22904       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22905       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22906       saved = core_saved;
22907       if (TARGET_BACKTRACE)
22908         saved += 16;
22909     }
22910
22911   /* Saved registers include the stack frame.  */
22912   offsets->saved_regs
22913     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22914   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22915
22916   /* A leaf function does not need any stack alignment if it has nothing
22917      on the stack.  */
22918   if (crtl->is_leaf && frame_size == 0
22919       /* However if it calls alloca(), we have a dynamically allocated
22920          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22921       && ! cfun->calls_alloca)
22922     {
22923       offsets->outgoing_args = offsets->soft_frame;
22924       offsets->locals_base = offsets->soft_frame;
22925       return;
22926     }
22927
22928   /* Ensure SFP has the correct alignment.  */
22929   if (ARM_DOUBLEWORD_ALIGN
22930       && (offsets->soft_frame & 7))
22931     {
22932       offsets->soft_frame += 4;
22933       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22934          when there is a stack frame as the alignment will be rolled into
22935          the normal stack adjustment.  */
22936       if (frame_size + crtl->outgoing_args_size == 0)
22937         {
22938           int reg = -1;
22939
22940           /* Register r3 is caller-saved.  Normally it does not need to be
22941              saved on entry by the prologue.  However if we choose to save
22942              it for padding then we may confuse the compiler into thinking
22943              a prologue sequence is required when in fact it is not.  This
22944              will occur when shrink-wrapping if r3 is used as a scratch
22945              register and there are no other callee-saved writes.
22946
22947              This situation can be avoided when other callee-saved registers
22948              are available and r3 is not mandatory if we choose a callee-saved
22949              register for padding.  */
22950           bool prefer_callee_reg_p = false;
22951
22952           /* If it is safe to use r3, then do so.  This sometimes
22953              generates better code on Thumb-2 by avoiding the need to
22954              use 32-bit push/pop instructions.  */
22955           if (! any_sibcall_could_use_r3 ()
22956               && arm_size_return_regs () <= 12
22957               && (offsets->saved_regs_mask & (1 << 3)) == 0
22958               && (TARGET_THUMB2
22959                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22960             {
22961               reg = 3;
22962               if (!TARGET_THUMB2)
22963                 prefer_callee_reg_p = true;
22964             }
22965           if (reg == -1
22966               || prefer_callee_reg_p)
22967             {
22968               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22969                 {
22970                   /* Avoid fixed registers; they may be changed at
22971                      arbitrary times so it's unsafe to restore them
22972                      during the epilogue.  */
22973                   if (!fixed_regs[i]
22974                       && (offsets->saved_regs_mask & (1 << i)) == 0)
22975                     {
22976                       reg = i;
22977                       break;
22978                     }
22979                 }
22980             }
22981
22982           if (reg != -1)
22983             {
22984               offsets->saved_regs += 4;
22985               offsets->saved_regs_mask |= (1 << reg);
22986             }
22987         }
22988     }
22989
22990   offsets->locals_base = offsets->soft_frame + frame_size;
22991   offsets->outgoing_args = (offsets->locals_base
22992                             + crtl->outgoing_args_size);
22993
22994   if (ARM_DOUBLEWORD_ALIGN)
22995     {
22996       /* Ensure SP remains doubleword aligned.  */
22997       if (offsets->outgoing_args & 7)
22998         offsets->outgoing_args += 4;
22999       gcc_assert (!(offsets->outgoing_args & 7));
23000     }
23001 }
23002
23003
23004 /* Calculate the relative offsets for the different stack pointers.  Positive
23005    offsets are in the direction of stack growth.  */
23006
23007 HOST_WIDE_INT
23008 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23009 {
23010   arm_stack_offsets *offsets;
23011
23012   offsets = arm_get_frame_offsets ();
23013
23014   /* OK, now we have enough information to compute the distances.
23015      There must be an entry in these switch tables for each pair
23016      of registers in ELIMINABLE_REGS, even if some of the entries
23017      seem to be redundant or useless.  */
23018   switch (from)
23019     {
23020     case ARG_POINTER_REGNUM:
23021       switch (to)
23022         {
23023         case THUMB_HARD_FRAME_POINTER_REGNUM:
23024           return 0;
23025
23026         case FRAME_POINTER_REGNUM:
23027           /* This is the reverse of the soft frame pointer
23028              to hard frame pointer elimination below.  */
23029           return offsets->soft_frame - offsets->saved_args;
23030
23031         case ARM_HARD_FRAME_POINTER_REGNUM:
23032           /* This is only non-zero in the case where the static chain register
23033              is stored above the frame.  */
23034           return offsets->frame - offsets->saved_args - 4;
23035
23036         case STACK_POINTER_REGNUM:
23037           /* If nothing has been pushed on the stack at all
23038              then this will return -4.  This *is* correct!  */
23039           return offsets->outgoing_args - (offsets->saved_args + 4);
23040
23041         default:
23042           gcc_unreachable ();
23043         }
23044       gcc_unreachable ();
23045
23046     case FRAME_POINTER_REGNUM:
23047       switch (to)
23048         {
23049         case THUMB_HARD_FRAME_POINTER_REGNUM:
23050           return 0;
23051
23052         case ARM_HARD_FRAME_POINTER_REGNUM:
23053           /* The hard frame pointer points to the top entry in the
23054              stack frame.  The soft frame pointer to the bottom entry
23055              in the stack frame.  If there is no stack frame at all,
23056              then they are identical.  */
23057
23058           return offsets->frame - offsets->soft_frame;
23059
23060         case STACK_POINTER_REGNUM:
23061           return offsets->outgoing_args - offsets->soft_frame;
23062
23063         default:
23064           gcc_unreachable ();
23065         }
23066       gcc_unreachable ();
23067
23068     default:
23069       /* You cannot eliminate from the stack pointer.
23070          In theory you could eliminate from the hard frame
23071          pointer to the stack pointer, but this will never
23072          happen, since if a stack frame is not needed the
23073          hard frame pointer will never be used.  */
23074       gcc_unreachable ();
23075     }
23076 }
23077
23078 /* Given FROM and TO register numbers, say whether this elimination is
23079    allowed.  Frame pointer elimination is automatically handled.
23080
23081    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23082    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23083    pointer, we must eliminate FRAME_POINTER_REGNUM into
23084    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23085    ARG_POINTER_REGNUM.  */
23086
23087 bool
23088 arm_can_eliminate (const int from, const int to)
23089 {
23090   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23091           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23092           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23093           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23094            true);
23095 }
23096
23097 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23098    number of bytes pushed.  */
23099
23100 static int
23101 arm_save_coproc_regs(void)
23102 {
23103   int saved_size = 0;
23104   unsigned reg;
23105   unsigned start_reg;
23106   rtx insn;
23107
23108   if (TARGET_REALLY_IWMMXT)
23109   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23110     if (reg_needs_saving_p (reg))
23111       {
23112         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23113         insn = gen_rtx_MEM (V2SImode, insn);
23114         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23115         RTX_FRAME_RELATED_P (insn) = 1;
23116         saved_size += 8;
23117       }
23118
23119   if (TARGET_VFP_BASE)
23120     {
23121       start_reg = FIRST_VFP_REGNUM;
23122
23123       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23124         {
23125           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23126             {
23127               if (start_reg != reg)
23128                 saved_size += vfp_emit_fstmd (start_reg,
23129                                               (reg - start_reg) / 2);
23130               start_reg = reg + 2;
23131             }
23132         }
23133       if (start_reg != reg)
23134         saved_size += vfp_emit_fstmd (start_reg,
23135                                       (reg - start_reg) / 2);
23136     }
23137   return saved_size;
23138 }
23139
23140
23141 /* Set the Thumb frame pointer from the stack pointer.  */
23142
23143 static void
23144 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23145 {
23146   HOST_WIDE_INT amount;
23147   rtx insn, dwarf;
23148
23149   amount = offsets->outgoing_args - offsets->locals_base;
23150   if (amount < 1024)
23151     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23152                                   stack_pointer_rtx, GEN_INT (amount)));
23153   else
23154     {
23155       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23156       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23157          expects the first two operands to be the same.  */
23158       if (TARGET_THUMB2)
23159         {
23160           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23161                                         stack_pointer_rtx,
23162                                         hard_frame_pointer_rtx));
23163         }
23164       else
23165         {
23166           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23167                                         hard_frame_pointer_rtx,
23168                                         stack_pointer_rtx));
23169         }
23170       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23171                            plus_constant (Pmode, stack_pointer_rtx, amount));
23172       RTX_FRAME_RELATED_P (dwarf) = 1;
23173       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23174     }
23175
23176   RTX_FRAME_RELATED_P (insn) = 1;
23177 }
23178
23179 struct scratch_reg {
23180   rtx reg;
23181   bool saved;
23182 };
23183
23184 /* Return a short-lived scratch register for use as a 2nd scratch register on
23185    function entry after the registers are saved in the prologue.  This register
23186    must be released by means of release_scratch_register_on_entry.  IP is not
23187    considered since it is always used as the 1st scratch register if available.
23188
23189    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23190    mask of live registers.  */
23191
23192 static void
23193 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23194                                unsigned long live_regs)
23195 {
23196   int regno = -1;
23197
23198   sr->saved = false;
23199
23200   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23201     regno = LR_REGNUM;
23202   else
23203     {
23204       unsigned int i;
23205
23206       for (i = 4; i < 11; i++)
23207         if (regno1 != i && (live_regs & (1 << i)) != 0)
23208           {
23209             regno = i;
23210             break;
23211           }
23212
23213       if (regno < 0)
23214         {
23215           /* If IP is used as the 1st scratch register for a nested function,
23216              then either r3 wasn't available or is used to preserve IP.  */
23217           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23218             regno1 = 3;
23219           regno = (regno1 == 3 ? 2 : 3);
23220           sr->saved
23221             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23222                                regno);
23223         }
23224     }
23225
23226   sr->reg = gen_rtx_REG (SImode, regno);
23227   if (sr->saved)
23228     {
23229       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23230       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23231       rtx x = gen_rtx_SET (stack_pointer_rtx,
23232                            plus_constant (Pmode, stack_pointer_rtx, -4));
23233       RTX_FRAME_RELATED_P (insn) = 1;
23234       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23235     }
23236 }
23237
23238 /* Release a scratch register obtained from the preceding function.  */
23239
23240 static void
23241 release_scratch_register_on_entry (struct scratch_reg *sr)
23242 {
23243   if (sr->saved)
23244     {
23245       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23246       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23247       rtx x = gen_rtx_SET (stack_pointer_rtx,
23248                            plus_constant (Pmode, stack_pointer_rtx, 4));
23249       RTX_FRAME_RELATED_P (insn) = 1;
23250       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23251     }
23252 }
23253
23254 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23255
23256 #if PROBE_INTERVAL > 4096
23257 #error Cannot use indexed addressing mode for stack probing
23258 #endif
23259
23260 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23261    inclusive.  These are offsets from the current stack pointer.  REGNO1
23262    is the index number of the 1st scratch register and LIVE_REGS is the
23263    mask of live registers.  */
23264
23265 static void
23266 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23267                             unsigned int regno1, unsigned long live_regs)
23268 {
23269   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23270
23271   /* See if we have a constant small number of probes to generate.  If so,
23272      that's the easy case.  */
23273   if (size <= PROBE_INTERVAL)
23274     {
23275       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23276       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23277       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23278     }
23279
23280   /* The run-time loop is made up of 10 insns in the generic case while the
23281      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23282   else if (size <= 5 * PROBE_INTERVAL)
23283     {
23284       HOST_WIDE_INT i, rem;
23285
23286       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23287       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23288       emit_stack_probe (reg1);
23289
23290       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23291          it exceeds SIZE.  If only two probes are needed, this will not
23292          generate any code.  Then probe at FIRST + SIZE.  */
23293       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23294         {
23295           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23296           emit_stack_probe (reg1);
23297         }
23298
23299       rem = size - (i - PROBE_INTERVAL);
23300       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23301         {
23302           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23303           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23304         }
23305       else
23306         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23307     }
23308
23309   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23310      extra careful with variables wrapping around because we might be at
23311      the very top (or the very bottom) of the address space and we have
23312      to be able to handle this case properly; in particular, we use an
23313      equality test for the loop condition.  */
23314   else
23315     {
23316       HOST_WIDE_INT rounded_size;
23317       struct scratch_reg sr;
23318
23319       get_scratch_register_on_entry (&sr, regno1, live_regs);
23320
23321       emit_move_insn (reg1, GEN_INT (first));
23322
23323
23324       /* Step 1: round SIZE to the previous multiple of the interval.  */
23325
23326       rounded_size = size & -PROBE_INTERVAL;
23327       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23328
23329
23330       /* Step 2: compute initial and final value of the loop counter.  */
23331
23332       /* TEST_ADDR = SP + FIRST.  */
23333       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23334
23335       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23336       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23337
23338
23339       /* Step 3: the loop
23340
23341          do
23342            {
23343              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23344              probe at TEST_ADDR
23345            }
23346          while (TEST_ADDR != LAST_ADDR)
23347
23348          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23349          until it is equal to ROUNDED_SIZE.  */
23350
23351       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23352
23353
23354       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23355          that SIZE is equal to ROUNDED_SIZE.  */
23356
23357       if (size != rounded_size)
23358         {
23359           HOST_WIDE_INT rem = size - rounded_size;
23360
23361           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23362             {
23363               emit_set_insn (sr.reg,
23364                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23365               emit_stack_probe (plus_constant (Pmode, sr.reg,
23366                                                PROBE_INTERVAL - rem));
23367             }
23368           else
23369             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23370         }
23371
23372       release_scratch_register_on_entry (&sr);
23373     }
23374
23375   /* Make sure nothing is scheduled before we are done.  */
23376   emit_insn (gen_blockage ());
23377 }
23378
23379 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23380    absolute addresses.  */
23381
23382 const char *
23383 output_probe_stack_range (rtx reg1, rtx reg2)
23384 {
23385   static int labelno = 0;
23386   char loop_lab[32];
23387   rtx xops[2];
23388
23389   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23390
23391   /* Loop.  */
23392   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23393
23394   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23395   xops[0] = reg1;
23396   xops[1] = GEN_INT (PROBE_INTERVAL);
23397   output_asm_insn ("sub\t%0, %0, %1", xops);
23398
23399   /* Probe at TEST_ADDR.  */
23400   output_asm_insn ("str\tr0, [%0, #0]", xops);
23401
23402   /* Test if TEST_ADDR == LAST_ADDR.  */
23403   xops[1] = reg2;
23404   output_asm_insn ("cmp\t%0, %1", xops);
23405
23406   /* Branch.  */
23407   fputs ("\tbne\t", asm_out_file);
23408   assemble_name_raw (asm_out_file, loop_lab);
23409   fputc ('\n', asm_out_file);
23410
23411   return "";
23412 }
23413
23414 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23415    function.  */
23416 void
23417 arm_expand_prologue (void)
23418 {
23419   rtx amount;
23420   rtx insn;
23421   rtx ip_rtx;
23422   unsigned long live_regs_mask;
23423   unsigned long func_type;
23424   int fp_offset = 0;
23425   int saved_pretend_args = 0;
23426   int saved_regs = 0;
23427   unsigned HOST_WIDE_INT args_to_push;
23428   HOST_WIDE_INT size;
23429   arm_stack_offsets *offsets;
23430   bool clobber_ip;
23431
23432   func_type = arm_current_func_type ();
23433
23434   /* Naked functions don't have prologues.  */
23435   if (IS_NAKED (func_type))
23436     {
23437       if (flag_stack_usage_info)
23438         current_function_static_stack_size = 0;
23439       return;
23440     }
23441
23442   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23443   args_to_push = crtl->args.pretend_args_size;
23444
23445   /* Compute which register we will have to save onto the stack.  */
23446   offsets = arm_get_frame_offsets ();
23447   live_regs_mask = offsets->saved_regs_mask;
23448
23449   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23450
23451   if (IS_STACKALIGN (func_type))
23452     {
23453       rtx r0, r1;
23454
23455       /* Handle a word-aligned stack pointer.  We generate the following:
23456
23457           mov r0, sp
23458           bic r1, r0, #7
23459           mov sp, r1
23460           <save and restore r0 in normal prologue/epilogue>
23461           mov sp, r0
23462           bx lr
23463
23464          The unwinder doesn't need to know about the stack realignment.
23465          Just tell it we saved SP in r0.  */
23466       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23467
23468       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23469       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23470
23471       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23472       RTX_FRAME_RELATED_P (insn) = 1;
23473       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23474
23475       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23476
23477       /* ??? The CFA changes here, which may cause GDB to conclude that it
23478          has entered a different function.  That said, the unwind info is
23479          correct, individually, before and after this instruction because
23480          we've described the save of SP, which will override the default
23481          handling of SP as restoring from the CFA.  */
23482       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23483     }
23484
23485   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23486      now the value must be -1 as stored by arm_init_machine_status ().  */
23487   cfun->machine->static_chain_stack_bytes
23488     = arm_compute_static_chain_stack_bytes ();
23489
23490   /* The static chain register is the same as the IP register.  If it is
23491      clobbered when creating the frame, we need to save and restore it.  */
23492   clobber_ip = IS_NESTED (func_type)
23493                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23494                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23495                         || flag_stack_clash_protection)
23496                        && !df_regs_ever_live_p (LR_REGNUM)
23497                        && arm_r3_live_at_start_p ()));
23498
23499   /* Find somewhere to store IP whilst the frame is being created.
23500      We try the following places in order:
23501
23502        1. The last argument register r3 if it is available.
23503        2. A slot on the stack above the frame if there are no
23504           arguments to push onto the stack.
23505        3. Register r3 again, after pushing the argument registers
23506           onto the stack, if this is a varargs function.
23507        4. The last slot on the stack created for the arguments to
23508           push, if this isn't a varargs function.
23509
23510      Note - we only need to tell the dwarf2 backend about the SP
23511      adjustment in the second variant; the static chain register
23512      doesn't need to be unwound, as it doesn't contain a value
23513      inherited from the caller.  */
23514   if (clobber_ip)
23515     {
23516       if (!arm_r3_live_at_start_p ())
23517         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23518       else if (args_to_push == 0)
23519         {
23520           rtx addr, dwarf;
23521
23522           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23523           saved_regs += 4;
23524
23525           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23526           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23527           fp_offset = 4;
23528
23529           /* Just tell the dwarf backend that we adjusted SP.  */
23530           dwarf = gen_rtx_SET (stack_pointer_rtx,
23531                                plus_constant (Pmode, stack_pointer_rtx,
23532                                               -fp_offset));
23533           RTX_FRAME_RELATED_P (insn) = 1;
23534           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23535         }
23536       else
23537         {
23538           /* Store the args on the stack.  */
23539           if (cfun->machine->uses_anonymous_args)
23540             {
23541               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23542                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23543               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23544               saved_pretend_args = 1;
23545             }
23546           else
23547             {
23548               rtx addr, dwarf;
23549
23550               if (args_to_push == 4)
23551                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23552               else
23553                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23554                                            plus_constant (Pmode,
23555                                                           stack_pointer_rtx,
23556                                                           -args_to_push));
23557
23558               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23559
23560               /* Just tell the dwarf backend that we adjusted SP.  */
23561               dwarf = gen_rtx_SET (stack_pointer_rtx,
23562                                    plus_constant (Pmode, stack_pointer_rtx,
23563                                                   -args_to_push));
23564               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23565             }
23566
23567           RTX_FRAME_RELATED_P (insn) = 1;
23568           fp_offset = args_to_push;
23569           args_to_push = 0;
23570         }
23571     }
23572
23573   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23574     {
23575       if (IS_INTERRUPT (func_type))
23576         {
23577           /* Interrupt functions must not corrupt any registers.
23578              Creating a frame pointer however, corrupts the IP
23579              register, so we must push it first.  */
23580           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23581
23582           /* Do not set RTX_FRAME_RELATED_P on this insn.
23583              The dwarf stack unwinding code only wants to see one
23584              stack decrement per function, and this is not it.  If
23585              this instruction is labeled as being part of the frame
23586              creation sequence then dwarf2out_frame_debug_expr will
23587              die when it encounters the assignment of IP to FP
23588              later on, since the use of SP here establishes SP as
23589              the CFA register and not IP.
23590
23591              Anyway this instruction is not really part of the stack
23592              frame creation although it is part of the prologue.  */
23593         }
23594
23595       insn = emit_set_insn (ip_rtx,
23596                             plus_constant (Pmode, stack_pointer_rtx,
23597                                            fp_offset));
23598       RTX_FRAME_RELATED_P (insn) = 1;
23599     }
23600
23601   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23602   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23603     {
23604       saved_regs += 4;
23605       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23606                                                 GEN_INT (FPCXTNS_ENUM)));
23607       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23608                           plus_constant (Pmode, stack_pointer_rtx, -4));
23609       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23610       RTX_FRAME_RELATED_P (insn) = 1;
23611     }
23612
23613   if (args_to_push)
23614     {
23615       /* Push the argument registers, or reserve space for them.  */
23616       if (cfun->machine->uses_anonymous_args)
23617         insn = emit_multi_reg_push
23618           ((0xf0 >> (args_to_push / 4)) & 0xf,
23619            (0xf0 >> (args_to_push / 4)) & 0xf);
23620       else
23621         insn = emit_insn
23622           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23623                        GEN_INT (- args_to_push)));
23624       RTX_FRAME_RELATED_P (insn) = 1;
23625     }
23626
23627   /* If this is an interrupt service routine, and the link register
23628      is going to be pushed, and we're not generating extra
23629      push of IP (needed when frame is needed and frame layout if apcs),
23630      subtracting four from LR now will mean that the function return
23631      can be done with a single instruction.  */
23632   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23633       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23634       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23635       && TARGET_ARM)
23636     {
23637       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23638
23639       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23640     }
23641
23642   if (live_regs_mask)
23643     {
23644       unsigned long dwarf_regs_mask = live_regs_mask;
23645
23646       saved_regs += bit_count (live_regs_mask) * 4;
23647       if (optimize_size && !frame_pointer_needed
23648           && saved_regs == offsets->saved_regs - offsets->saved_args)
23649         {
23650           /* If no coprocessor registers are being pushed and we don't have
23651              to worry about a frame pointer then push extra registers to
23652              create the stack frame.  This is done in a way that does not
23653              alter the frame layout, so is independent of the epilogue.  */
23654           int n;
23655           int frame;
23656           n = 0;
23657           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23658             n++;
23659           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23660           if (frame && n * 4 >= frame)
23661             {
23662               n = frame / 4;
23663               live_regs_mask |= (1 << n) - 1;
23664               saved_regs += frame;
23665             }
23666         }
23667
23668       if (TARGET_LDRD
23669           && current_tune->prefer_ldrd_strd
23670           && !optimize_function_for_size_p (cfun))
23671         {
23672           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23673           if (TARGET_THUMB2)
23674             thumb2_emit_strd_push (live_regs_mask);
23675           else if (TARGET_ARM
23676                    && !TARGET_APCS_FRAME
23677                    && !IS_INTERRUPT (func_type))
23678             arm_emit_strd_push (live_regs_mask);
23679           else
23680             {
23681               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23682               RTX_FRAME_RELATED_P (insn) = 1;
23683             }
23684         }
23685       else
23686         {
23687           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23688           RTX_FRAME_RELATED_P (insn) = 1;
23689         }
23690     }
23691
23692   if (! IS_VOLATILE (func_type))
23693     saved_regs += arm_save_coproc_regs ();
23694
23695   if (frame_pointer_needed && TARGET_ARM)
23696     {
23697       /* Create the new frame pointer.  */
23698       if (TARGET_APCS_FRAME)
23699         {
23700           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23701           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23702           RTX_FRAME_RELATED_P (insn) = 1;
23703         }
23704       else
23705         {
23706           insn = GEN_INT (saved_regs - (4 + fp_offset));
23707           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23708                                         stack_pointer_rtx, insn));
23709           RTX_FRAME_RELATED_P (insn) = 1;
23710         }
23711     }
23712
23713   size = offsets->outgoing_args - offsets->saved_args;
23714   if (flag_stack_usage_info)
23715     current_function_static_stack_size = size;
23716
23717   /* If this isn't an interrupt service routine and we have a frame, then do
23718      stack checking.  We use IP as the first scratch register, except for the
23719      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23720   if (!IS_INTERRUPT (func_type)
23721       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23722           || flag_stack_clash_protection))
23723     {
23724       unsigned int regno;
23725
23726       if (!IS_NESTED (func_type) || clobber_ip)
23727         regno = IP_REGNUM;
23728       else if (df_regs_ever_live_p (LR_REGNUM))
23729         regno = LR_REGNUM;
23730       else
23731         regno = 3;
23732
23733       if (crtl->is_leaf && !cfun->calls_alloca)
23734         {
23735           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23736             arm_emit_probe_stack_range (get_stack_check_protect (),
23737                                         size - get_stack_check_protect (),
23738                                         regno, live_regs_mask);
23739         }
23740       else if (size > 0)
23741         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23742                                     regno, live_regs_mask);
23743     }
23744
23745   /* Recover the static chain register.  */
23746   if (clobber_ip)
23747     {
23748       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23749         insn = gen_rtx_REG (SImode, 3);
23750       else
23751         {
23752           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23753           insn = gen_frame_mem (SImode, insn);
23754         }
23755       emit_set_insn (ip_rtx, insn);
23756       emit_insn (gen_force_register_use (ip_rtx));
23757     }
23758
23759   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23760     {
23761       /* This add can produce multiple insns for a large constant, so we
23762          need to get tricky.  */
23763       rtx_insn *last = get_last_insn ();
23764
23765       amount = GEN_INT (offsets->saved_args + saved_regs
23766                         - offsets->outgoing_args);
23767
23768       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23769                                     amount));
23770       do
23771         {
23772           last = last ? NEXT_INSN (last) : get_insns ();
23773           RTX_FRAME_RELATED_P (last) = 1;
23774         }
23775       while (last != insn);
23776
23777       /* If the frame pointer is needed, emit a special barrier that
23778          will prevent the scheduler from moving stores to the frame
23779          before the stack adjustment.  */
23780       if (frame_pointer_needed)
23781         emit_insn (gen_stack_tie (stack_pointer_rtx,
23782                                   hard_frame_pointer_rtx));
23783     }
23784
23785
23786   if (frame_pointer_needed && TARGET_THUMB2)
23787     thumb_set_frame_pointer (offsets);
23788
23789   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23790     {
23791       unsigned long mask;
23792
23793       mask = live_regs_mask;
23794       mask &= THUMB2_WORK_REGS;
23795       if (!IS_NESTED (func_type))
23796         mask |= (1 << IP_REGNUM);
23797       arm_load_pic_register (mask, NULL_RTX);
23798     }
23799
23800   /* If we are profiling, make sure no instructions are scheduled before
23801      the call to mcount.  Similarly if the user has requested no
23802      scheduling in the prolog.  Similarly if we want non-call exceptions
23803      using the EABI unwinder, to prevent faulting instructions from being
23804      swapped with a stack adjustment.  */
23805   if (crtl->profile || !TARGET_SCHED_PROLOG
23806       || (arm_except_unwind_info (&global_options) == UI_TARGET
23807           && cfun->can_throw_non_call_exceptions))
23808     emit_insn (gen_blockage ());
23809
23810   /* If the link register is being kept alive, with the return address in it,
23811      then make sure that it does not get reused by the ce2 pass.  */
23812   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23813     cfun->machine->lr_save_eliminated = 1;
23814 }
23815 \f
23816 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23817 static void
23818 arm_print_condition (FILE *stream)
23819 {
23820   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23821     {
23822       /* Branch conversion is not implemented for Thumb-2.  */
23823       if (TARGET_THUMB)
23824         {
23825           output_operand_lossage ("predicated Thumb instruction");
23826           return;
23827         }
23828       if (current_insn_predicate != NULL)
23829         {
23830           output_operand_lossage
23831             ("predicated instruction in conditional sequence");
23832           return;
23833         }
23834
23835       fputs (arm_condition_codes[arm_current_cc], stream);
23836     }
23837   else if (current_insn_predicate)
23838     {
23839       enum arm_cond_code code;
23840
23841       if (TARGET_THUMB1)
23842         {
23843           output_operand_lossage ("predicated Thumb instruction");
23844           return;
23845         }
23846
23847       code = get_arm_condition_code (current_insn_predicate);
23848       fputs (arm_condition_codes[code], stream);
23849     }
23850 }
23851
23852
23853 /* Globally reserved letters: acln
23854    Puncutation letters currently used: @_|?().!#
23855    Lower case letters currently used: bcdefhimpqtvwxyz
23856    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23857    Letters previously used, but now deprecated/obsolete: sWXYZ.
23858
23859    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23860
23861    If CODE is 'd', then the X is a condition operand and the instruction
23862    should only be executed if the condition is true.
23863    if CODE is 'D', then the X is a condition operand and the instruction
23864    should only be executed if the condition is false: however, if the mode
23865    of the comparison is CCFPEmode, then always execute the instruction -- we
23866    do this because in these circumstances !GE does not necessarily imply LT;
23867    in these cases the instruction pattern will take care to make sure that
23868    an instruction containing %d will follow, thereby undoing the effects of
23869    doing this instruction unconditionally.
23870    If CODE is 'N' then X is a floating point operand that must be negated
23871    before output.
23872    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23873    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23874    If CODE is 'V', then the operand must be a CONST_INT representing
23875    the bits to preserve in the modified register (Rd) of a BFI or BFC
23876    instruction: print out both the width and lsb (shift) fields.  */
23877 static void
23878 arm_print_operand (FILE *stream, rtx x, int code)
23879 {
23880   switch (code)
23881     {
23882     case '@':
23883       fputs (ASM_COMMENT_START, stream);
23884       return;
23885
23886     case '_':
23887       fputs (user_label_prefix, stream);
23888       return;
23889
23890     case '|':
23891       fputs (REGISTER_PREFIX, stream);
23892       return;
23893
23894     case '?':
23895       arm_print_condition (stream);
23896       return;
23897
23898     case '.':
23899       /* The current condition code for a condition code setting instruction.
23900          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23901       fputc('s', stream);
23902       arm_print_condition (stream);
23903       return;
23904
23905     case '!':
23906       /* If the instruction is conditionally executed then print
23907          the current condition code, otherwise print 's'.  */
23908       gcc_assert (TARGET_THUMB2);
23909       if (current_insn_predicate)
23910         arm_print_condition (stream);
23911       else
23912         fputc('s', stream);
23913       break;
23914
23915     /* %# is a "break" sequence. It doesn't output anything, but is used to
23916        separate e.g. operand numbers from following text, if that text consists
23917        of further digits which we don't want to be part of the operand
23918        number.  */
23919     case '#':
23920       return;
23921
23922     case 'N':
23923       {
23924         REAL_VALUE_TYPE r;
23925         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23926         fprintf (stream, "%s", fp_const_from_val (&r));
23927       }
23928       return;
23929
23930     /* An integer or symbol address without a preceding # sign.  */
23931     case 'c':
23932       switch (GET_CODE (x))
23933         {
23934         case CONST_INT:
23935           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23936           break;
23937
23938         case SYMBOL_REF:
23939           output_addr_const (stream, x);
23940           break;
23941
23942         case CONST:
23943           if (GET_CODE (XEXP (x, 0)) == PLUS
23944               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23945             {
23946               output_addr_const (stream, x);
23947               break;
23948             }
23949           /* Fall through.  */
23950
23951         default:
23952           output_operand_lossage ("Unsupported operand for code '%c'", code);
23953         }
23954       return;
23955
23956     /* An integer that we want to print in HEX.  */
23957     case 'x':
23958       switch (GET_CODE (x))
23959         {
23960         case CONST_INT:
23961           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23962           break;
23963
23964         default:
23965           output_operand_lossage ("Unsupported operand for code '%c'", code);
23966         }
23967       return;
23968
23969     case 'B':
23970       if (CONST_INT_P (x))
23971         {
23972           HOST_WIDE_INT val;
23973           val = ARM_SIGN_EXTEND (~INTVAL (x));
23974           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23975         }
23976       else
23977         {
23978           putc ('~', stream);
23979           output_addr_const (stream, x);
23980         }
23981       return;
23982
23983     case 'b':
23984       /* Print the log2 of a CONST_INT.  */
23985       {
23986         HOST_WIDE_INT val;
23987
23988         if (!CONST_INT_P (x)
23989             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23990           output_operand_lossage ("Unsupported operand for code '%c'", code);
23991         else
23992           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23993       }
23994       return;
23995
23996     case 'L':
23997       /* The low 16 bits of an immediate constant.  */
23998       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23999       return;
24000
24001     case 'i':
24002       fprintf (stream, "%s", arithmetic_instr (x, 1));
24003       return;
24004
24005     case 'I':
24006       fprintf (stream, "%s", arithmetic_instr (x, 0));
24007       return;
24008
24009     case 'S':
24010       {
24011         HOST_WIDE_INT val;
24012         const char *shift;
24013
24014         shift = shift_op (x, &val);
24015
24016         if (shift)
24017           {
24018             fprintf (stream, ", %s ", shift);
24019             if (val == -1)
24020               arm_print_operand (stream, XEXP (x, 1), 0);
24021             else
24022               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24023           }
24024       }
24025       return;
24026
24027       /* An explanation of the 'Q', 'R' and 'H' register operands:
24028
24029          In a pair of registers containing a DI or DF value the 'Q'
24030          operand returns the register number of the register containing
24031          the least significant part of the value.  The 'R' operand returns
24032          the register number of the register containing the most
24033          significant part of the value.
24034
24035          The 'H' operand returns the higher of the two register numbers.
24036          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24037          same as the 'Q' operand, since the most significant part of the
24038          value is held in the lower number register.  The reverse is true
24039          on systems where WORDS_BIG_ENDIAN is false.
24040
24041          The purpose of these operands is to distinguish between cases
24042          where the endian-ness of the values is important (for example
24043          when they are added together), and cases where the endian-ness
24044          is irrelevant, but the order of register operations is important.
24045          For example when loading a value from memory into a register
24046          pair, the endian-ness does not matter.  Provided that the value
24047          from the lower memory address is put into the lower numbered
24048          register, and the value from the higher address is put into the
24049          higher numbered register, the load will work regardless of whether
24050          the value being loaded is big-wordian or little-wordian.  The
24051          order of the two register loads can matter however, if the address
24052          of the memory location is actually held in one of the registers
24053          being overwritten by the load.
24054
24055          The 'Q' and 'R' constraints are also available for 64-bit
24056          constants.  */
24057     case 'Q':
24058       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24059         {
24060           rtx part = gen_lowpart (SImode, x);
24061           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24062           return;
24063         }
24064
24065       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24066         {
24067           output_operand_lossage ("invalid operand for code '%c'", code);
24068           return;
24069         }
24070
24071       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24072       return;
24073
24074     case 'R':
24075       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24076         {
24077           machine_mode mode = GET_MODE (x);
24078           rtx part;
24079
24080           if (mode == VOIDmode)
24081             mode = DImode;
24082           part = gen_highpart_mode (SImode, mode, x);
24083           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24084           return;
24085         }
24086
24087       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24088         {
24089           output_operand_lossage ("invalid operand for code '%c'", code);
24090           return;
24091         }
24092
24093       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24094       return;
24095
24096     case 'H':
24097       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24098         {
24099           output_operand_lossage ("invalid operand for code '%c'", code);
24100           return;
24101         }
24102
24103       asm_fprintf (stream, "%r", REGNO (x) + 1);
24104       return;
24105
24106     case 'J':
24107       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24108         {
24109           output_operand_lossage ("invalid operand for code '%c'", code);
24110           return;
24111         }
24112
24113       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24114       return;
24115
24116     case 'K':
24117       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24118         {
24119           output_operand_lossage ("invalid operand for code '%c'", code);
24120           return;
24121         }
24122
24123       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24124       return;
24125
24126     case 'm':
24127       asm_fprintf (stream, "%r",
24128                    REG_P (XEXP (x, 0))
24129                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24130       return;
24131
24132     case 'M':
24133       asm_fprintf (stream, "{%r-%r}",
24134                    REGNO (x),
24135                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24136       return;
24137
24138     /* Like 'M', but writing doubleword vector registers, for use by Neon
24139        insns.  */
24140     case 'h':
24141       {
24142         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24143         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24144         if (numregs == 1)
24145           asm_fprintf (stream, "{d%d}", regno);
24146         else
24147           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24148       }
24149       return;
24150
24151     case 'd':
24152       /* CONST_TRUE_RTX means always -- that's the default.  */
24153       if (x == const_true_rtx)
24154         return;
24155
24156       if (!COMPARISON_P (x))
24157         {
24158           output_operand_lossage ("invalid operand for code '%c'", code);
24159           return;
24160         }
24161
24162       fputs (arm_condition_codes[get_arm_condition_code (x)],
24163              stream);
24164       return;
24165
24166     case 'D':
24167       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24168          want to do that.  */
24169       if (x == const_true_rtx)
24170         {
24171           output_operand_lossage ("instruction never executed");
24172           return;
24173         }
24174       if (!COMPARISON_P (x))
24175         {
24176           output_operand_lossage ("invalid operand for code '%c'", code);
24177           return;
24178         }
24179
24180       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24181                                  (get_arm_condition_code (x))],
24182              stream);
24183       return;
24184
24185     case 'V':
24186       {
24187         /* Output the LSB (shift) and width for a bitmask instruction
24188            based on a literal mask.  The LSB is printed first,
24189            followed by the width.
24190
24191            Eg. For 0b1...1110001, the result is #1, #3.  */
24192         if (!CONST_INT_P (x))
24193           {
24194             output_operand_lossage ("invalid operand for code '%c'", code);
24195             return;
24196           }
24197
24198         unsigned HOST_WIDE_INT val
24199           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24200         int lsb = exact_log2 (val & -val);
24201         asm_fprintf (stream, "#%d, #%d", lsb,
24202                      (exact_log2 (val + (val & -val)) - lsb));
24203       }
24204       return;
24205
24206     case 's':
24207     case 'W':
24208     case 'X':
24209     case 'Y':
24210     case 'Z':
24211       /* Former Maverick support, removed after GCC-4.7.  */
24212       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24213       return;
24214
24215     case 'U':
24216       if (!REG_P (x)
24217           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24218           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24219         /* Bad value for wCG register number.  */
24220         {
24221           output_operand_lossage ("invalid operand for code '%c'", code);
24222           return;
24223         }
24224
24225       else
24226         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24227       return;
24228
24229       /* Print an iWMMXt control register name.  */
24230     case 'w':
24231       if (!CONST_INT_P (x)
24232           || INTVAL (x) < 0
24233           || INTVAL (x) >= 16)
24234         /* Bad value for wC register number.  */
24235         {
24236           output_operand_lossage ("invalid operand for code '%c'", code);
24237           return;
24238         }
24239
24240       else
24241         {
24242           static const char * wc_reg_names [16] =
24243             {
24244               "wCID",  "wCon",  "wCSSF", "wCASF",
24245               "wC4",   "wC5",   "wC6",   "wC7",
24246               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24247               "wC12",  "wC13",  "wC14",  "wC15"
24248             };
24249
24250           fputs (wc_reg_names [INTVAL (x)], stream);
24251         }
24252       return;
24253
24254     /* Print the high single-precision register of a VFP double-precision
24255        register.  */
24256     case 'p':
24257       {
24258         machine_mode mode = GET_MODE (x);
24259         int regno;
24260
24261         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24262           {
24263             output_operand_lossage ("invalid operand for code '%c'", code);
24264             return;
24265           }
24266
24267         regno = REGNO (x);
24268         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24269           {
24270             output_operand_lossage ("invalid operand for code '%c'", code);
24271             return;
24272           }
24273
24274         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24275       }
24276       return;
24277
24278     /* Print a VFP/Neon double precision or quad precision register name.  */
24279     case 'P':
24280     case 'q':
24281       {
24282         machine_mode mode = GET_MODE (x);
24283         int is_quad = (code == 'q');
24284         int regno;
24285
24286         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24287           {
24288             output_operand_lossage ("invalid operand for code '%c'", code);
24289             return;
24290           }
24291
24292         if (!REG_P (x)
24293             || !IS_VFP_REGNUM (REGNO (x)))
24294           {
24295             output_operand_lossage ("invalid operand for code '%c'", code);
24296             return;
24297           }
24298
24299         regno = REGNO (x);
24300         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24301             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24302           {
24303             output_operand_lossage ("invalid operand for code '%c'", code);
24304             return;
24305           }
24306
24307         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24308           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24309       }
24310       return;
24311
24312     /* These two codes print the low/high doubleword register of a Neon quad
24313        register, respectively.  For pair-structure types, can also print
24314        low/high quadword registers.  */
24315     case 'e':
24316     case 'f':
24317       {
24318         machine_mode mode = GET_MODE (x);
24319         int regno;
24320
24321         if ((GET_MODE_SIZE (mode) != 16
24322              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24323           {
24324             output_operand_lossage ("invalid operand for code '%c'", code);
24325             return;
24326           }
24327
24328         regno = REGNO (x);
24329         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24330           {
24331             output_operand_lossage ("invalid operand for code '%c'", code);
24332             return;
24333           }
24334
24335         if (GET_MODE_SIZE (mode) == 16)
24336           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24337                                   + (code == 'f' ? 1 : 0));
24338         else
24339           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24340                                   + (code == 'f' ? 1 : 0));
24341       }
24342       return;
24343
24344     /* Print a VFPv3 floating-point constant, represented as an integer
24345        index.  */
24346     case 'G':
24347       {
24348         int index = vfp3_const_double_index (x);
24349         gcc_assert (index != -1);
24350         fprintf (stream, "%d", index);
24351       }
24352       return;
24353
24354     /* Print bits representing opcode features for Neon.
24355
24356        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24357        and polynomials as unsigned.
24358
24359        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24360
24361        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24362
24363     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24364     case 'T':
24365       {
24366         HOST_WIDE_INT bits = INTVAL (x);
24367         fputc ("uspf"[bits & 3], stream);
24368       }
24369       return;
24370
24371     /* Likewise, but signed and unsigned integers are both 'i'.  */
24372     case 'F':
24373       {
24374         HOST_WIDE_INT bits = INTVAL (x);
24375         fputc ("iipf"[bits & 3], stream);
24376       }
24377       return;
24378
24379     /* As for 'T', but emit 'u' instead of 'p'.  */
24380     case 't':
24381       {
24382         HOST_WIDE_INT bits = INTVAL (x);
24383         fputc ("usuf"[bits & 3], stream);
24384       }
24385       return;
24386
24387     /* Bit 2: rounding (vs none).  */
24388     case 'O':
24389       {
24390         HOST_WIDE_INT bits = INTVAL (x);
24391         fputs ((bits & 4) != 0 ? "r" : "", stream);
24392       }
24393       return;
24394
24395     /* Memory operand for vld1/vst1 instruction.  */
24396     case 'A':
24397       {
24398         rtx addr;
24399         bool postinc = FALSE;
24400         rtx postinc_reg = NULL;
24401         unsigned align, memsize, align_bits;
24402
24403         gcc_assert (MEM_P (x));
24404         addr = XEXP (x, 0);
24405         if (GET_CODE (addr) == POST_INC)
24406           {
24407             postinc = 1;
24408             addr = XEXP (addr, 0);
24409           }
24410         if (GET_CODE (addr) == POST_MODIFY)
24411           {
24412             postinc_reg = XEXP( XEXP (addr, 1), 1);
24413             addr = XEXP (addr, 0);
24414           }
24415         asm_fprintf (stream, "[%r", REGNO (addr));
24416
24417         /* We know the alignment of this access, so we can emit a hint in the
24418            instruction (for some alignments) as an aid to the memory subsystem
24419            of the target.  */
24420         align = MEM_ALIGN (x) >> 3;
24421         memsize = MEM_SIZE (x);
24422
24423         /* Only certain alignment specifiers are supported by the hardware.  */
24424         if (memsize == 32 && (align % 32) == 0)
24425           align_bits = 256;
24426         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24427           align_bits = 128;
24428         else if (memsize >= 8 && (align % 8) == 0)
24429           align_bits = 64;
24430         else
24431           align_bits = 0;
24432
24433         if (align_bits != 0)
24434           asm_fprintf (stream, ":%d", align_bits);
24435
24436         asm_fprintf (stream, "]");
24437
24438         if (postinc)
24439           fputs("!", stream);
24440         if (postinc_reg)
24441           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24442       }
24443       return;
24444
24445     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24446        rtx_code the memory operands output looks like following.
24447        1. [Rn], #+/-<imm>
24448        2. [Rn, #+/-<imm>]!
24449        3. [Rn, #+/-<imm>]
24450        4. [Rn].  */
24451     case 'E':
24452       {
24453         rtx addr;
24454         rtx postinc_reg = NULL;
24455         unsigned inc_val = 0;
24456         enum rtx_code code;
24457
24458         gcc_assert (MEM_P (x));
24459         addr = XEXP (x, 0);
24460         code = GET_CODE (addr);
24461         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24462             || code  == PRE_DEC)
24463           {
24464             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24465             inc_val = GET_MODE_SIZE (GET_MODE (x));
24466             if (code == POST_INC || code == POST_DEC)
24467               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24468                                               ? "": "-", inc_val);
24469             else
24470               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24471                                                ? "": "-", inc_val);
24472           }
24473         else if (code == POST_MODIFY || code == PRE_MODIFY)
24474           {
24475             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24476             postinc_reg = XEXP (XEXP (addr, 1), 1);
24477             if (postinc_reg && CONST_INT_P (postinc_reg))
24478               {
24479                 if (code == POST_MODIFY)
24480                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24481                 else
24482                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24483               }
24484           }
24485         else if (code == PLUS)
24486           {
24487             rtx base = XEXP (addr, 0);
24488             rtx index = XEXP (addr, 1);
24489
24490             gcc_assert (REG_P (base) && CONST_INT_P (index));
24491
24492             HOST_WIDE_INT offset = INTVAL (index);
24493             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24494           }
24495         else
24496           {
24497             gcc_assert (REG_P (addr));
24498             asm_fprintf (stream, "[%r]",REGNO (addr));
24499           }
24500       }
24501       return;
24502
24503     case 'C':
24504       {
24505         rtx addr;
24506
24507         gcc_assert (MEM_P (x));
24508         addr = XEXP (x, 0);
24509         gcc_assert (REG_P (addr));
24510         asm_fprintf (stream, "[%r]", REGNO (addr));
24511       }
24512       return;
24513
24514     /* Translate an S register number into a D register number and element index.  */
24515     case 'y':
24516       {
24517         machine_mode mode = GET_MODE (x);
24518         int regno;
24519
24520         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24521           {
24522             output_operand_lossage ("invalid operand for code '%c'", code);
24523             return;
24524           }
24525
24526         regno = REGNO (x);
24527         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24528           {
24529             output_operand_lossage ("invalid operand for code '%c'", code);
24530             return;
24531           }
24532
24533         regno = regno - FIRST_VFP_REGNUM;
24534         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24535       }
24536       return;
24537
24538     case 'v':
24539         gcc_assert (CONST_DOUBLE_P (x));
24540         int result;
24541         result = vfp3_const_double_for_fract_bits (x);
24542         if (result == 0)
24543           result = vfp3_const_double_for_bits (x);
24544         fprintf (stream, "#%d", result);
24545         return;
24546
24547     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24548        number into a D register number and element index.  */
24549     case 'z':
24550       {
24551         machine_mode mode = GET_MODE (x);
24552         int regno;
24553
24554         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24555           {
24556             output_operand_lossage ("invalid operand for code '%c'", code);
24557             return;
24558           }
24559
24560         regno = REGNO (x);
24561         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24562           {
24563             output_operand_lossage ("invalid operand for code '%c'", code);
24564             return;
24565           }
24566
24567         regno = regno - FIRST_VFP_REGNUM;
24568         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24569       }
24570       return;
24571
24572     default:
24573       if (x == 0)
24574         {
24575           output_operand_lossage ("missing operand");
24576           return;
24577         }
24578
24579       switch (GET_CODE (x))
24580         {
24581         case REG:
24582           asm_fprintf (stream, "%r", REGNO (x));
24583           break;
24584
24585         case MEM:
24586           output_address (GET_MODE (x), XEXP (x, 0));
24587           break;
24588
24589         case CONST_DOUBLE:
24590           {
24591             char fpstr[20];
24592             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24593                               sizeof (fpstr), 0, 1);
24594             fprintf (stream, "#%s", fpstr);
24595           }
24596           break;
24597
24598         default:
24599           gcc_assert (GET_CODE (x) != NEG);
24600           fputc ('#', stream);
24601           if (GET_CODE (x) == HIGH)
24602             {
24603               fputs (":lower16:", stream);
24604               x = XEXP (x, 0);
24605             }
24606
24607           output_addr_const (stream, x);
24608           break;
24609         }
24610     }
24611 }
24612 \f
24613 /* Target hook for printing a memory address.  */
24614 static void
24615 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24616 {
24617   if (TARGET_32BIT)
24618     {
24619       int is_minus = GET_CODE (x) == MINUS;
24620
24621       if (REG_P (x))
24622         asm_fprintf (stream, "[%r]", REGNO (x));
24623       else if (GET_CODE (x) == PLUS || is_minus)
24624         {
24625           rtx base = XEXP (x, 0);
24626           rtx index = XEXP (x, 1);
24627           HOST_WIDE_INT offset = 0;
24628           if (!REG_P (base)
24629               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24630             {
24631               /* Ensure that BASE is a register.  */
24632               /* (one of them must be).  */
24633               /* Also ensure the SP is not used as in index register.  */
24634               std::swap (base, index);
24635             }
24636           switch (GET_CODE (index))
24637             {
24638             case CONST_INT:
24639               offset = INTVAL (index);
24640               if (is_minus)
24641                 offset = -offset;
24642               asm_fprintf (stream, "[%r, #%wd]",
24643                            REGNO (base), offset);
24644               break;
24645
24646             case REG:
24647               asm_fprintf (stream, "[%r, %s%r]",
24648                            REGNO (base), is_minus ? "-" : "",
24649                            REGNO (index));
24650               break;
24651
24652             case MULT:
24653             case ASHIFTRT:
24654             case LSHIFTRT:
24655             case ASHIFT:
24656             case ROTATERT:
24657               {
24658                 asm_fprintf (stream, "[%r, %s%r",
24659                              REGNO (base), is_minus ? "-" : "",
24660                              REGNO (XEXP (index, 0)));
24661                 arm_print_operand (stream, index, 'S');
24662                 fputs ("]", stream);
24663                 break;
24664               }
24665
24666             default:
24667               gcc_unreachable ();
24668             }
24669         }
24670       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24671                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24672         {
24673           gcc_assert (REG_P (XEXP (x, 0)));
24674
24675           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24676             asm_fprintf (stream, "[%r, #%s%d]!",
24677                          REGNO (XEXP (x, 0)),
24678                          GET_CODE (x) == PRE_DEC ? "-" : "",
24679                          GET_MODE_SIZE (mode));
24680           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24681             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24682           else
24683             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24684                          GET_CODE (x) == POST_DEC ? "-" : "",
24685                          GET_MODE_SIZE (mode));
24686         }
24687       else if (GET_CODE (x) == PRE_MODIFY)
24688         {
24689           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24690           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24691             asm_fprintf (stream, "#%wd]!",
24692                          INTVAL (XEXP (XEXP (x, 1), 1)));
24693           else
24694             asm_fprintf (stream, "%r]!",
24695                          REGNO (XEXP (XEXP (x, 1), 1)));
24696         }
24697       else if (GET_CODE (x) == POST_MODIFY)
24698         {
24699           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24700           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24701             asm_fprintf (stream, "#%wd",
24702                          INTVAL (XEXP (XEXP (x, 1), 1)));
24703           else
24704             asm_fprintf (stream, "%r",
24705                          REGNO (XEXP (XEXP (x, 1), 1)));
24706         }
24707       else output_addr_const (stream, x);
24708     }
24709   else
24710     {
24711       if (REG_P (x))
24712         asm_fprintf (stream, "[%r]", REGNO (x));
24713       else if (GET_CODE (x) == POST_INC)
24714         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24715       else if (GET_CODE (x) == PLUS)
24716         {
24717           gcc_assert (REG_P (XEXP (x, 0)));
24718           if (CONST_INT_P (XEXP (x, 1)))
24719             asm_fprintf (stream, "[%r, #%wd]",
24720                          REGNO (XEXP (x, 0)),
24721                          INTVAL (XEXP (x, 1)));
24722           else
24723             asm_fprintf (stream, "[%r, %r]",
24724                          REGNO (XEXP (x, 0)),
24725                          REGNO (XEXP (x, 1)));
24726         }
24727       else
24728         output_addr_const (stream, x);
24729     }
24730 }
24731 \f
24732 /* Target hook for indicating whether a punctuation character for
24733    TARGET_PRINT_OPERAND is valid.  */
24734 static bool
24735 arm_print_operand_punct_valid_p (unsigned char code)
24736 {
24737   return (code == '@' || code == '|' || code == '.'
24738           || code == '(' || code == ')' || code == '#'
24739           || (TARGET_32BIT && (code == '?'))
24740           || (TARGET_THUMB2 && (code == '!'))
24741           || (TARGET_THUMB && (code == '_')));
24742 }
24743 \f
24744 /* Target hook for assembling integer objects.  The ARM version needs to
24745    handle word-sized values specially.  */
24746 static bool
24747 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24748 {
24749   machine_mode mode;
24750
24751   if (size == UNITS_PER_WORD && aligned_p)
24752     {
24753       fputs ("\t.word\t", asm_out_file);
24754       output_addr_const (asm_out_file, x);
24755
24756       /* Mark symbols as position independent.  We only do this in the
24757          .text segment, not in the .data segment.  */
24758       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24759           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24760         {
24761           /* See legitimize_pic_address for an explanation of the
24762              TARGET_VXWORKS_RTP check.  */
24763           /* References to weak symbols cannot be resolved locally:
24764              they may be overridden by a non-weak definition at link
24765              time.  */
24766           if (!arm_pic_data_is_text_relative
24767               || (SYMBOL_REF_P (x)
24768                   && (!SYMBOL_REF_LOCAL_P (x)
24769                       || (SYMBOL_REF_DECL (x)
24770                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24771                       || (SYMBOL_REF_FUNCTION_P (x)
24772                           && !arm_fdpic_local_funcdesc_p (x)))))
24773             {
24774               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24775                 fputs ("(GOTFUNCDESC)", asm_out_file);
24776               else
24777                 fputs ("(GOT)", asm_out_file);
24778             }
24779           else
24780             {
24781               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24782                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24783               else
24784                 {
24785                   bool is_readonly;
24786
24787                   if (!TARGET_FDPIC
24788                       || arm_is_segment_info_known (x, &is_readonly))
24789                     fputs ("(GOTOFF)", asm_out_file);
24790                   else
24791                     fputs ("(GOT)", asm_out_file);
24792                 }
24793             }
24794         }
24795
24796       /* For FDPIC we also have to mark symbol for .data section.  */
24797       if (TARGET_FDPIC
24798           && !making_const_table
24799           && SYMBOL_REF_P (x)
24800           && SYMBOL_REF_FUNCTION_P (x))
24801         fputs ("(FUNCDESC)", asm_out_file);
24802
24803       fputc ('\n', asm_out_file);
24804       return true;
24805     }
24806
24807   mode = GET_MODE (x);
24808
24809   if (arm_vector_mode_supported_p (mode))
24810     {
24811       int i, units;
24812
24813       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24814
24815       units = CONST_VECTOR_NUNITS (x);
24816       size = GET_MODE_UNIT_SIZE (mode);
24817
24818       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24819         for (i = 0; i < units; i++)
24820           {
24821             rtx elt = CONST_VECTOR_ELT (x, i);
24822             assemble_integer
24823               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24824           }
24825       else
24826         for (i = 0; i < units; i++)
24827           {
24828             rtx elt = CONST_VECTOR_ELT (x, i);
24829             assemble_real
24830               (*CONST_DOUBLE_REAL_VALUE (elt),
24831                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24832                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24833           }
24834
24835       return true;
24836     }
24837
24838   return default_assemble_integer (x, size, aligned_p);
24839 }
24840
24841 static void
24842 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24843 {
24844   section *s;
24845
24846   if (!TARGET_AAPCS_BASED)
24847     {
24848       (is_ctor ?
24849        default_named_section_asm_out_constructor
24850        : default_named_section_asm_out_destructor) (symbol, priority);
24851       return;
24852     }
24853
24854   /* Put these in the .init_array section, using a special relocation.  */
24855   if (priority != DEFAULT_INIT_PRIORITY)
24856     {
24857       char buf[18];
24858       sprintf (buf, "%s.%.5u",
24859                is_ctor ? ".init_array" : ".fini_array",
24860                priority);
24861       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24862     }
24863   else if (is_ctor)
24864     s = ctors_section;
24865   else
24866     s = dtors_section;
24867
24868   switch_to_section (s);
24869   assemble_align (POINTER_SIZE);
24870   fputs ("\t.word\t", asm_out_file);
24871   output_addr_const (asm_out_file, symbol);
24872   fputs ("(target1)\n", asm_out_file);
24873 }
24874
24875 /* Add a function to the list of static constructors.  */
24876
24877 static void
24878 arm_elf_asm_constructor (rtx symbol, int priority)
24879 {
24880   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24881 }
24882
24883 /* Add a function to the list of static destructors.  */
24884
24885 static void
24886 arm_elf_asm_destructor (rtx symbol, int priority)
24887 {
24888   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24889 }
24890 \f
24891 /* A finite state machine takes care of noticing whether or not instructions
24892    can be conditionally executed, and thus decrease execution time and code
24893    size by deleting branch instructions.  The fsm is controlled by
24894    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24895
24896 /* The state of the fsm controlling condition codes are:
24897    0: normal, do nothing special
24898    1: make ASM_OUTPUT_OPCODE not output this instruction
24899    2: make ASM_OUTPUT_OPCODE not output this instruction
24900    3: make instructions conditional
24901    4: make instructions conditional
24902
24903    State transitions (state->state by whom under condition):
24904    0 -> 1 final_prescan_insn if the `target' is a label
24905    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24906    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24907    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24908    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24909           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24910    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24911           (the target insn is arm_target_insn).
24912
24913    If the jump clobbers the conditions then we use states 2 and 4.
24914
24915    A similar thing can be done with conditional return insns.
24916
24917    XXX In case the `target' is an unconditional branch, this conditionalising
24918    of the instructions always reduces code size, but not always execution
24919    time.  But then, I want to reduce the code size to somewhere near what
24920    /bin/cc produces.  */
24921
24922 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24923    instructions.  When a COND_EXEC instruction is seen the subsequent
24924    instructions are scanned so that multiple conditional instructions can be
24925    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24926    specify the length and true/false mask for the IT block.  These will be
24927    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24928
24929 /* Returns the index of the ARM condition code string in
24930    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24931    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24932
24933 enum arm_cond_code
24934 maybe_get_arm_condition_code (rtx comparison)
24935 {
24936   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24937   enum arm_cond_code code;
24938   enum rtx_code comp_code = GET_CODE (comparison);
24939
24940   if (GET_MODE_CLASS (mode) != MODE_CC)
24941     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24942                            XEXP (comparison, 1));
24943
24944   switch (mode)
24945     {
24946     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24947     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24948     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24949     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24950     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24951     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24952     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24953     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24954     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24955     case E_CC_DLTUmode: code = ARM_CC;
24956
24957     dominance:
24958       if (comp_code == EQ)
24959         return ARM_INVERSE_CONDITION_CODE (code);
24960       if (comp_code == NE)
24961         return code;
24962       return ARM_NV;
24963
24964     case E_CC_NZmode:
24965       switch (comp_code)
24966         {
24967         case NE: return ARM_NE;
24968         case EQ: return ARM_EQ;
24969         case GE: return ARM_PL;
24970         case LT: return ARM_MI;
24971         default: return ARM_NV;
24972         }
24973
24974     case E_CC_Zmode:
24975       switch (comp_code)
24976         {
24977         case NE: return ARM_NE;
24978         case EQ: return ARM_EQ;
24979         default: return ARM_NV;
24980         }
24981
24982     case E_CC_Nmode:
24983       switch (comp_code)
24984         {
24985         case NE: return ARM_MI;
24986         case EQ: return ARM_PL;
24987         default: return ARM_NV;
24988         }
24989
24990     case E_CCFPEmode:
24991     case E_CCFPmode:
24992       /* We can handle all cases except UNEQ and LTGT.  */
24993       switch (comp_code)
24994         {
24995         case GE: return ARM_GE;
24996         case GT: return ARM_GT;
24997         case LE: return ARM_LS;
24998         case LT: return ARM_MI;
24999         case NE: return ARM_NE;
25000         case EQ: return ARM_EQ;
25001         case ORDERED: return ARM_VC;
25002         case UNORDERED: return ARM_VS;
25003         case UNLT: return ARM_LT;
25004         case UNLE: return ARM_LE;
25005         case UNGT: return ARM_HI;
25006         case UNGE: return ARM_PL;
25007           /* UNEQ and LTGT do not have a representation.  */
25008         case UNEQ: /* Fall through.  */
25009         case LTGT: /* Fall through.  */
25010         default: return ARM_NV;
25011         }
25012
25013     case E_CC_SWPmode:
25014       switch (comp_code)
25015         {
25016         case NE: return ARM_NE;
25017         case EQ: return ARM_EQ;
25018         case GE: return ARM_LE;
25019         case GT: return ARM_LT;
25020         case LE: return ARM_GE;
25021         case LT: return ARM_GT;
25022         case GEU: return ARM_LS;
25023         case GTU: return ARM_CC;
25024         case LEU: return ARM_CS;
25025         case LTU: return ARM_HI;
25026         default: return ARM_NV;
25027         }
25028
25029     case E_CC_Cmode:
25030       switch (comp_code)
25031         {
25032         case LTU: return ARM_CS;
25033         case GEU: return ARM_CC;
25034         default: return ARM_NV;
25035         }
25036
25037     case E_CC_NVmode:
25038       switch (comp_code)
25039         {
25040         case GE: return ARM_GE;
25041         case LT: return ARM_LT;
25042         default: return ARM_NV;
25043         }
25044
25045     case E_CC_Bmode:
25046       switch (comp_code)
25047         {
25048         case GEU: return ARM_CS;
25049         case LTU: return ARM_CC;
25050         default: return ARM_NV;
25051         }
25052
25053     case E_CC_Vmode:
25054       switch (comp_code)
25055         {
25056         case NE: return ARM_VS;
25057         case EQ: return ARM_VC;
25058         default: return ARM_NV;
25059         }
25060
25061     case E_CC_ADCmode:
25062       switch (comp_code)
25063         {
25064         case GEU: return ARM_CS;
25065         case LTU: return ARM_CC;
25066         default: return ARM_NV;
25067         }
25068
25069     case E_CCmode:
25070     case E_CC_RSBmode:
25071       switch (comp_code)
25072         {
25073         case NE: return ARM_NE;
25074         case EQ: return ARM_EQ;
25075         case GE: return ARM_GE;
25076         case GT: return ARM_GT;
25077         case LE: return ARM_LE;
25078         case LT: return ARM_LT;
25079         case GEU: return ARM_CS;
25080         case GTU: return ARM_HI;
25081         case LEU: return ARM_LS;
25082         case LTU: return ARM_CC;
25083         default: return ARM_NV;
25084         }
25085
25086     default: gcc_unreachable ();
25087     }
25088 }
25089
25090 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25091 static enum arm_cond_code
25092 get_arm_condition_code (rtx comparison)
25093 {
25094   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25095   gcc_assert (code != ARM_NV);
25096   return code;
25097 }
25098
25099 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25100    code registers when not targetting Thumb1.  The VFP condition register
25101    only exists when generating hard-float code.  */
25102 static bool
25103 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25104 {
25105   if (!TARGET_32BIT)
25106     return false;
25107
25108   *p1 = CC_REGNUM;
25109   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25110   return true;
25111 }
25112
25113 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25114    instructions.  */
25115 void
25116 thumb2_final_prescan_insn (rtx_insn *insn)
25117 {
25118   rtx_insn *first_insn = insn;
25119   rtx body = PATTERN (insn);
25120   rtx predicate;
25121   enum arm_cond_code code;
25122   int n;
25123   int mask;
25124   int max;
25125
25126   /* max_insns_skipped in the tune was already taken into account in the
25127      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25128      just emit the IT blocks as we can.  It does not make sense to split
25129      the IT blocks.  */
25130   max = MAX_INSN_PER_IT_BLOCK;
25131
25132   /* Remove the previous insn from the count of insns to be output.  */
25133   if (arm_condexec_count)
25134       arm_condexec_count--;
25135
25136   /* Nothing to do if we are already inside a conditional block.  */
25137   if (arm_condexec_count)
25138     return;
25139
25140   if (GET_CODE (body) != COND_EXEC)
25141     return;
25142
25143   /* Conditional jumps are implemented directly.  */
25144   if (JUMP_P (insn))
25145     return;
25146
25147   predicate = COND_EXEC_TEST (body);
25148   arm_current_cc = get_arm_condition_code (predicate);
25149
25150   n = get_attr_ce_count (insn);
25151   arm_condexec_count = 1;
25152   arm_condexec_mask = (1 << n) - 1;
25153   arm_condexec_masklen = n;
25154   /* See if subsequent instructions can be combined into the same block.  */
25155   for (;;)
25156     {
25157       insn = next_nonnote_insn (insn);
25158
25159       /* Jumping into the middle of an IT block is illegal, so a label or
25160          barrier terminates the block.  */
25161       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25162         break;
25163
25164       body = PATTERN (insn);
25165       /* USE and CLOBBER aren't really insns, so just skip them.  */
25166       if (GET_CODE (body) == USE
25167           || GET_CODE (body) == CLOBBER)
25168         continue;
25169
25170       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25171       if (GET_CODE (body) != COND_EXEC)
25172         break;
25173       /* Maximum number of conditionally executed instructions in a block.  */
25174       n = get_attr_ce_count (insn);
25175       if (arm_condexec_masklen + n > max)
25176         break;
25177
25178       predicate = COND_EXEC_TEST (body);
25179       code = get_arm_condition_code (predicate);
25180       mask = (1 << n) - 1;
25181       if (arm_current_cc == code)
25182         arm_condexec_mask |= (mask << arm_condexec_masklen);
25183       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25184         break;
25185
25186       arm_condexec_count++;
25187       arm_condexec_masklen += n;
25188
25189       /* A jump must be the last instruction in a conditional block.  */
25190       if (JUMP_P (insn))
25191         break;
25192     }
25193   /* Restore recog_data (getting the attributes of other insns can
25194      destroy this array, but final.cc assumes that it remains intact
25195      across this call).  */
25196   extract_constrain_insn_cached (first_insn);
25197 }
25198
25199 void
25200 arm_final_prescan_insn (rtx_insn *insn)
25201 {
25202   /* BODY will hold the body of INSN.  */
25203   rtx body = PATTERN (insn);
25204
25205   /* This will be 1 if trying to repeat the trick, and things need to be
25206      reversed if it appears to fail.  */
25207   int reverse = 0;
25208
25209   /* If we start with a return insn, we only succeed if we find another one.  */
25210   int seeking_return = 0;
25211   enum rtx_code return_code = UNKNOWN;
25212
25213   /* START_INSN will hold the insn from where we start looking.  This is the
25214      first insn after the following code_label if REVERSE is true.  */
25215   rtx_insn *start_insn = insn;
25216
25217   /* If in state 4, check if the target branch is reached, in order to
25218      change back to state 0.  */
25219   if (arm_ccfsm_state == 4)
25220     {
25221       if (insn == arm_target_insn)
25222         {
25223           arm_target_insn = NULL;
25224           arm_ccfsm_state = 0;
25225         }
25226       return;
25227     }
25228
25229   /* If in state 3, it is possible to repeat the trick, if this insn is an
25230      unconditional branch to a label, and immediately following this branch
25231      is the previous target label which is only used once, and the label this
25232      branch jumps to is not too far off.  */
25233   if (arm_ccfsm_state == 3)
25234     {
25235       if (simplejump_p (insn))
25236         {
25237           start_insn = next_nonnote_insn (start_insn);
25238           if (BARRIER_P (start_insn))
25239             {
25240               /* XXX Isn't this always a barrier?  */
25241               start_insn = next_nonnote_insn (start_insn);
25242             }
25243           if (LABEL_P (start_insn)
25244               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25245               && LABEL_NUSES (start_insn) == 1)
25246             reverse = TRUE;
25247           else
25248             return;
25249         }
25250       else if (ANY_RETURN_P (body))
25251         {
25252           start_insn = next_nonnote_insn (start_insn);
25253           if (BARRIER_P (start_insn))
25254             start_insn = next_nonnote_insn (start_insn);
25255           if (LABEL_P (start_insn)
25256               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25257               && LABEL_NUSES (start_insn) == 1)
25258             {
25259               reverse = TRUE;
25260               seeking_return = 1;
25261               return_code = GET_CODE (body);
25262             }
25263           else
25264             return;
25265         }
25266       else
25267         return;
25268     }
25269
25270   gcc_assert (!arm_ccfsm_state || reverse);
25271   if (!JUMP_P (insn))
25272     return;
25273
25274   /* This jump might be paralleled with a clobber of the condition codes
25275      the jump should always come first */
25276   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25277     body = XVECEXP (body, 0, 0);
25278
25279   if (reverse
25280       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25281           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25282     {
25283       int insns_skipped;
25284       int fail = FALSE, succeed = FALSE;
25285       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25286       int then_not_else = TRUE;
25287       rtx_insn *this_insn = start_insn;
25288       rtx label = 0;
25289
25290       /* Register the insn jumped to.  */
25291       if (reverse)
25292         {
25293           if (!seeking_return)
25294             label = XEXP (SET_SRC (body), 0);
25295         }
25296       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25297         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25298       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25299         {
25300           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25301           then_not_else = FALSE;
25302         }
25303       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25304         {
25305           seeking_return = 1;
25306           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25307         }
25308       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25309         {
25310           seeking_return = 1;
25311           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25312           then_not_else = FALSE;
25313         }
25314       else
25315         gcc_unreachable ();
25316
25317       /* See how many insns this branch skips, and what kind of insns.  If all
25318          insns are okay, and the label or unconditional branch to the same
25319          label is not too far away, succeed.  */
25320       for (insns_skipped = 0;
25321            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25322         {
25323           rtx scanbody;
25324
25325           this_insn = next_nonnote_insn (this_insn);
25326           if (!this_insn)
25327             break;
25328
25329           switch (GET_CODE (this_insn))
25330             {
25331             case CODE_LABEL:
25332               /* Succeed if it is the target label, otherwise fail since
25333                  control falls in from somewhere else.  */
25334               if (this_insn == label)
25335                 {
25336                   arm_ccfsm_state = 1;
25337                   succeed = TRUE;
25338                 }
25339               else
25340                 fail = TRUE;
25341               break;
25342
25343             case BARRIER:
25344               /* Succeed if the following insn is the target label.
25345                  Otherwise fail.
25346                  If return insns are used then the last insn in a function
25347                  will be a barrier.  */
25348               this_insn = next_nonnote_insn (this_insn);
25349               if (this_insn && this_insn == label)
25350                 {
25351                   arm_ccfsm_state = 1;
25352                   succeed = TRUE;
25353                 }
25354               else
25355                 fail = TRUE;
25356               break;
25357
25358             case CALL_INSN:
25359               /* The AAPCS says that conditional calls should not be
25360                  used since they make interworking inefficient (the
25361                  linker can't transform BL<cond> into BLX).  That's
25362                  only a problem if the machine has BLX.  */
25363               if (arm_arch5t)
25364                 {
25365                   fail = TRUE;
25366                   break;
25367                 }
25368
25369               /* Succeed if the following insn is the target label, or
25370                  if the following two insns are a barrier and the
25371                  target label.  */
25372               this_insn = next_nonnote_insn (this_insn);
25373               if (this_insn && BARRIER_P (this_insn))
25374                 this_insn = next_nonnote_insn (this_insn);
25375
25376               if (this_insn && this_insn == label
25377                   && insns_skipped < max_insns_skipped)
25378                 {
25379                   arm_ccfsm_state = 1;
25380                   succeed = TRUE;
25381                 }
25382               else
25383                 fail = TRUE;
25384               break;
25385
25386             case JUMP_INSN:
25387               /* If this is an unconditional branch to the same label, succeed.
25388                  If it is to another label, do nothing.  If it is conditional,
25389                  fail.  */
25390               /* XXX Probably, the tests for SET and the PC are
25391                  unnecessary.  */
25392
25393               scanbody = PATTERN (this_insn);
25394               if (GET_CODE (scanbody) == SET
25395                   && GET_CODE (SET_DEST (scanbody)) == PC)
25396                 {
25397                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25398                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25399                     {
25400                       arm_ccfsm_state = 2;
25401                       succeed = TRUE;
25402                     }
25403                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25404                     fail = TRUE;
25405                 }
25406               /* Fail if a conditional return is undesirable (e.g. on a
25407                  StrongARM), but still allow this if optimizing for size.  */
25408               else if (GET_CODE (scanbody) == return_code
25409                        && !use_return_insn (TRUE, NULL)
25410                        && !optimize_size)
25411                 fail = TRUE;
25412               else if (GET_CODE (scanbody) == return_code)
25413                 {
25414                   arm_ccfsm_state = 2;
25415                   succeed = TRUE;
25416                 }
25417               else if (GET_CODE (scanbody) == PARALLEL)
25418                 {
25419                   switch (get_attr_conds (this_insn))
25420                     {
25421                     case CONDS_NOCOND:
25422                       break;
25423                     default:
25424                       fail = TRUE;
25425                       break;
25426                     }
25427                 }
25428               else
25429                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25430
25431               break;
25432
25433             case INSN:
25434               /* Instructions using or affecting the condition codes make it
25435                  fail.  */
25436               scanbody = PATTERN (this_insn);
25437               if (!(GET_CODE (scanbody) == SET
25438                     || GET_CODE (scanbody) == PARALLEL)
25439                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25440                 fail = TRUE;
25441               break;
25442
25443             default:
25444               break;
25445             }
25446         }
25447       if (succeed)
25448         {
25449           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25450             arm_target_label = CODE_LABEL_NUMBER (label);
25451           else
25452             {
25453               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25454
25455               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25456                 {
25457                   this_insn = next_nonnote_insn (this_insn);
25458                   gcc_assert (!this_insn
25459                               || (!BARRIER_P (this_insn)
25460                                   && !LABEL_P (this_insn)));
25461                 }
25462               if (!this_insn)
25463                 {
25464                   /* Oh, dear! we ran off the end.. give up.  */
25465                   extract_constrain_insn_cached (insn);
25466                   arm_ccfsm_state = 0;
25467                   arm_target_insn = NULL;
25468                   return;
25469                 }
25470               arm_target_insn = this_insn;
25471             }
25472
25473           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25474              what it was.  */
25475           if (!reverse)
25476             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25477
25478           if (reverse || then_not_else)
25479             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25480         }
25481
25482       /* Restore recog_data (getting the attributes of other insns can
25483          destroy this array, but final.cc assumes that it remains intact
25484          across this call.  */
25485       extract_constrain_insn_cached (insn);
25486     }
25487 }
25488
25489 /* Output IT instructions.  */
25490 void
25491 thumb2_asm_output_opcode (FILE * stream)
25492 {
25493   char buff[5];
25494   int n;
25495
25496   if (arm_condexec_mask)
25497     {
25498       for (n = 0; n < arm_condexec_masklen; n++)
25499         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25500       buff[n] = 0;
25501       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25502                   arm_condition_codes[arm_current_cc]);
25503       arm_condexec_mask = 0;
25504     }
25505 }
25506
25507 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25508    UNITS_PER_WORD bytes wide.  */
25509 static unsigned int
25510 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25511 {
25512   if (IS_VPR_REGNUM (regno))
25513     return CEIL (GET_MODE_SIZE (mode), 2);
25514
25515   if (TARGET_32BIT
25516       && regno > PC_REGNUM
25517       && regno != FRAME_POINTER_REGNUM
25518       && regno != ARG_POINTER_REGNUM
25519       && !IS_VFP_REGNUM (regno))
25520     return 1;
25521
25522   return ARM_NUM_REGS (mode);
25523 }
25524
25525 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25526 static bool
25527 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25528 {
25529   if (GET_MODE_CLASS (mode) == MODE_CC)
25530     return (regno == CC_REGNUM
25531             || (TARGET_VFP_BASE
25532                 && regno == VFPCC_REGNUM));
25533
25534   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25535     return false;
25536
25537   if (IS_VPR_REGNUM (regno))
25538     return mode == HImode
25539       || mode == V16BImode
25540       || mode == V8BImode
25541       || mode == V4BImode;
25542
25543   if (TARGET_THUMB1)
25544     /* For the Thumb we only allow values bigger than SImode in
25545        registers 0 - 6, so that there is always a second low
25546        register available to hold the upper part of the value.
25547        We probably we ought to ensure that the register is the
25548        start of an even numbered register pair.  */
25549     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25550
25551   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25552     {
25553       if (mode == DFmode || mode == DImode)
25554         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25555
25556       if (mode == HFmode || mode == BFmode || mode == HImode
25557           || mode == SFmode || mode == SImode)
25558         return VFP_REGNO_OK_FOR_SINGLE (regno);
25559
25560       if (TARGET_NEON)
25561         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25562                || (VALID_NEON_QREG_MODE (mode)
25563                    && NEON_REGNO_OK_FOR_QUAD (regno))
25564                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25565                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25566                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25567                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25568                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25569      if (TARGET_HAVE_MVE)
25570        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25571                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25572                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25573
25574       return false;
25575     }
25576
25577   if (TARGET_REALLY_IWMMXT)
25578     {
25579       if (IS_IWMMXT_GR_REGNUM (regno))
25580         return mode == SImode;
25581
25582       if (IS_IWMMXT_REGNUM (regno))
25583         return VALID_IWMMXT_REG_MODE (mode);
25584     }
25585
25586   /* We allow almost any value to be stored in the general registers.
25587      Restrict doubleword quantities to even register pairs in ARM state
25588      so that we can use ldrd. The same restriction applies for MVE
25589      in order to support Armv8.1-M Mainline instructions.
25590      Do not allow very large Neon structure  opaque modes in general
25591      registers; they would use too many.  */
25592   if (regno <= LAST_ARM_REGNUM)
25593     {
25594       if (ARM_NUM_REGS (mode) > 4)
25595         return false;
25596
25597       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25598         return true;
25599
25600       return !((TARGET_LDRD || TARGET_CDE)
25601                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25602     }
25603
25604   if (regno == FRAME_POINTER_REGNUM
25605       || regno == ARG_POINTER_REGNUM)
25606     /* We only allow integers in the fake hard registers.  */
25607     return GET_MODE_CLASS (mode) == MODE_INT;
25608
25609   return false;
25610 }
25611
25612 /* Implement TARGET_MODES_TIEABLE_P.  */
25613
25614 static bool
25615 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25616 {
25617   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25618     return true;
25619
25620   /* We specifically want to allow elements of "structure" modes to
25621      be tieable to the structure.  This more general condition allows
25622      other rarer situations too.  */
25623   if ((TARGET_NEON
25624        && (VALID_NEON_DREG_MODE (mode1)
25625            || VALID_NEON_QREG_MODE (mode1)
25626            || VALID_NEON_STRUCT_MODE (mode1))
25627        && (VALID_NEON_DREG_MODE (mode2)
25628            || VALID_NEON_QREG_MODE (mode2)
25629            || VALID_NEON_STRUCT_MODE (mode2)))
25630       || (TARGET_HAVE_MVE
25631           && (VALID_MVE_MODE (mode1)
25632               || VALID_MVE_STRUCT_MODE (mode1))
25633           && (VALID_MVE_MODE (mode2)
25634               || VALID_MVE_STRUCT_MODE (mode2))))
25635     return true;
25636
25637   return false;
25638 }
25639
25640 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25641    not used in arm mode.  */
25642
25643 enum reg_class
25644 arm_regno_class (int regno)
25645 {
25646   if (regno == PC_REGNUM)
25647     return NO_REGS;
25648
25649   if (IS_VPR_REGNUM (regno))
25650     return VPR_REG;
25651
25652   if (TARGET_THUMB1)
25653     {
25654       if (regno == STACK_POINTER_REGNUM)
25655         return STACK_REG;
25656       if (regno == CC_REGNUM)
25657         return CC_REG;
25658       if (regno < 8)
25659         return LO_REGS;
25660       return HI_REGS;
25661     }
25662
25663   if (TARGET_THUMB2 && regno < 8)
25664     return LO_REGS;
25665
25666   if (   regno <= LAST_ARM_REGNUM
25667       || regno == FRAME_POINTER_REGNUM
25668       || regno == ARG_POINTER_REGNUM)
25669     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25670
25671   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25672     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25673
25674   if (IS_VFP_REGNUM (regno))
25675     {
25676       if (regno <= D7_VFP_REGNUM)
25677         return VFP_D0_D7_REGS;
25678       else if (regno <= LAST_LO_VFP_REGNUM)
25679         return VFP_LO_REGS;
25680       else
25681         return VFP_HI_REGS;
25682     }
25683
25684   if (IS_IWMMXT_REGNUM (regno))
25685     return IWMMXT_REGS;
25686
25687   if (IS_IWMMXT_GR_REGNUM (regno))
25688     return IWMMXT_GR_REGS;
25689
25690   return NO_REGS;
25691 }
25692
25693 /* Handle a special case when computing the offset
25694    of an argument from the frame pointer.  */
25695 int
25696 arm_debugger_arg_offset (int value, rtx addr)
25697 {
25698   rtx_insn *insn;
25699
25700   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25701   if (value != 0)
25702     return 0;
25703
25704   /* We can only cope with the case where the address is held in a register.  */
25705   if (!REG_P (addr))
25706     return 0;
25707
25708   /* If we are using the frame pointer to point at the argument, then
25709      an offset of 0 is correct.  */
25710   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25711     return 0;
25712
25713   /* If we are using the stack pointer to point at the
25714      argument, then an offset of 0 is correct.  */
25715   /* ??? Check this is consistent with thumb2 frame layout.  */
25716   if ((TARGET_THUMB || !frame_pointer_needed)
25717       && REGNO (addr) == SP_REGNUM)
25718     return 0;
25719
25720   /* Oh dear.  The argument is pointed to by a register rather
25721      than being held in a register, or being stored at a known
25722      offset from the frame pointer.  Since GDB only understands
25723      those two kinds of argument we must translate the address
25724      held in the register into an offset from the frame pointer.
25725      We do this by searching through the insns for the function
25726      looking to see where this register gets its value.  If the
25727      register is initialized from the frame pointer plus an offset
25728      then we are in luck and we can continue, otherwise we give up.
25729
25730      This code is exercised by producing debugging information
25731      for a function with arguments like this:
25732
25733            double func (double a, double b, int c, double d) {return d;}
25734
25735      Without this code the stab for parameter 'd' will be set to
25736      an offset of 0 from the frame pointer, rather than 8.  */
25737
25738   /* The if() statement says:
25739
25740      If the insn is a normal instruction
25741      and if the insn is setting the value in a register
25742      and if the register being set is the register holding the address of the argument
25743      and if the address is computing by an addition
25744      that involves adding to a register
25745      which is the frame pointer
25746      a constant integer
25747
25748      then...  */
25749
25750   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25751     {
25752       if (   NONJUMP_INSN_P (insn)
25753           && GET_CODE (PATTERN (insn)) == SET
25754           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25755           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25756           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25757           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25758           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25759              )
25760         {
25761           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25762
25763           break;
25764         }
25765     }
25766
25767   if (value == 0)
25768     {
25769       debug_rtx (addr);
25770       warning (0, "unable to compute real location of stacked parameter");
25771       value = 8; /* XXX magic hack */
25772     }
25773
25774   return value;
25775 }
25776 \f
25777 /* Implement TARGET_PROMOTED_TYPE.  */
25778
25779 static tree
25780 arm_promoted_type (const_tree t)
25781 {
25782   if (SCALAR_FLOAT_TYPE_P (t)
25783       && TYPE_PRECISION (t) == 16
25784       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25785     return float_type_node;
25786   return NULL_TREE;
25787 }
25788
25789 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25790    This simply adds HFmode as a supported mode; even though we don't
25791    implement arithmetic on this type directly, it's supported by
25792    optabs conversions, much the way the double-word arithmetic is
25793    special-cased in the default hook.  */
25794
25795 static bool
25796 arm_scalar_mode_supported_p (scalar_mode mode)
25797 {
25798   if (mode == HFmode)
25799     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25800   else if (ALL_FIXED_POINT_MODE_P (mode))
25801     return true;
25802   else
25803     return default_scalar_mode_supported_p (mode);
25804 }
25805
25806 /* Set the value of FLT_EVAL_METHOD.
25807    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25808
25809     0: evaluate all operations and constants, whose semantic type has at
25810        most the range and precision of type float, to the range and
25811        precision of float; evaluate all other operations and constants to
25812        the range and precision of the semantic type;
25813
25814     N, where _FloatN is a supported interchange floating type
25815        evaluate all operations and constants, whose semantic type has at
25816        most the range and precision of _FloatN type, to the range and
25817        precision of the _FloatN type; evaluate all other operations and
25818        constants to the range and precision of the semantic type;
25819
25820    If we have the ARMv8.2-A extensions then we support _Float16 in native
25821    precision, so we should set this to 16.  Otherwise, we support the type,
25822    but want to evaluate expressions in float precision, so set this to
25823    0.  */
25824
25825 static enum flt_eval_method
25826 arm_excess_precision (enum excess_precision_type type)
25827 {
25828   switch (type)
25829     {
25830       case EXCESS_PRECISION_TYPE_FAST:
25831       case EXCESS_PRECISION_TYPE_STANDARD:
25832         /* We can calculate either in 16-bit range and precision or
25833            32-bit range and precision.  Make that decision based on whether
25834            we have native support for the ARMv8.2-A 16-bit floating-point
25835            instructions or not.  */
25836         return (TARGET_VFP_FP16INST
25837                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25838                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25839       case EXCESS_PRECISION_TYPE_IMPLICIT:
25840       case EXCESS_PRECISION_TYPE_FLOAT16:
25841         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25842       default:
25843         gcc_unreachable ();
25844     }
25845   return FLT_EVAL_METHOD_UNPREDICTABLE;
25846 }
25847
25848
25849 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25850    _Float16 if we are using anything other than ieee format for 16-bit
25851    floating point.  Otherwise, punt to the default implementation.  */
25852 static opt_scalar_float_mode
25853 arm_floatn_mode (int n, bool extended)
25854 {
25855   if (!extended && n == 16)
25856     {
25857       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25858         return HFmode;
25859       return opt_scalar_float_mode ();
25860     }
25861
25862   return default_floatn_mode (n, extended);
25863 }
25864
25865
25866 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25867    not to early-clobber SRC registers in the process.
25868
25869    We assume that the operands described by SRC and DEST represent a
25870    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25871    number of components into which the copy has been decomposed.  */
25872 void
25873 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25874 {
25875   unsigned int i;
25876
25877   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25878       || REGNO (operands[0]) < REGNO (operands[1]))
25879     {
25880       for (i = 0; i < count; i++)
25881         {
25882           operands[2 * i] = dest[i];
25883           operands[2 * i + 1] = src[i];
25884         }
25885     }
25886   else
25887     {
25888       for (i = 0; i < count; i++)
25889         {
25890           operands[2 * i] = dest[count - i - 1];
25891           operands[2 * i + 1] = src[count - i - 1];
25892         }
25893     }
25894 }
25895
25896 /* Split operands into moves from op[1] + op[2] into op[0].  */
25897
25898 void
25899 neon_split_vcombine (rtx operands[3])
25900 {
25901   unsigned int dest = REGNO (operands[0]);
25902   unsigned int src1 = REGNO (operands[1]);
25903   unsigned int src2 = REGNO (operands[2]);
25904   machine_mode halfmode = GET_MODE (operands[1]);
25905   unsigned int halfregs = REG_NREGS (operands[1]);
25906   rtx destlo, desthi;
25907
25908   if (src1 == dest && src2 == dest + halfregs)
25909     {
25910       /* No-op move.  Can't split to nothing; emit something.  */
25911       emit_note (NOTE_INSN_DELETED);
25912       return;
25913     }
25914
25915   /* Preserve register attributes for variable tracking.  */
25916   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25917   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25918                                GET_MODE_SIZE (halfmode));
25919
25920   /* Special case of reversed high/low parts.  Use VSWP.  */
25921   if (src2 == dest && src1 == dest + halfregs)
25922     {
25923       rtx x = gen_rtx_SET (destlo, operands[1]);
25924       rtx y = gen_rtx_SET (desthi, operands[2]);
25925       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25926       return;
25927     }
25928
25929   if (!reg_overlap_mentioned_p (operands[2], destlo))
25930     {
25931       /* Try to avoid unnecessary moves if part of the result
25932          is in the right place already.  */
25933       if (src1 != dest)
25934         emit_move_insn (destlo, operands[1]);
25935       if (src2 != dest + halfregs)
25936         emit_move_insn (desthi, operands[2]);
25937     }
25938   else
25939     {
25940       if (src2 != dest + halfregs)
25941         emit_move_insn (desthi, operands[2]);
25942       if (src1 != dest)
25943         emit_move_insn (destlo, operands[1]);
25944     }
25945 }
25946 \f
25947 /* Return the number (counting from 0) of
25948    the least significant set bit in MASK.  */
25949
25950 inline static int
25951 number_of_first_bit_set (unsigned mask)
25952 {
25953   return ctz_hwi (mask);
25954 }
25955
25956 /* Like emit_multi_reg_push, but allowing for a different set of
25957    registers to be described as saved.  MASK is the set of registers
25958    to be saved; REAL_REGS is the set of registers to be described as
25959    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25960
25961 static rtx_insn *
25962 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25963 {
25964   unsigned long regno;
25965   rtx par[10], tmp, reg;
25966   rtx_insn *insn;
25967   int i, j;
25968
25969   /* Build the parallel of the registers actually being stored.  */
25970   for (i = 0; mask; ++i, mask &= mask - 1)
25971     {
25972       regno = ctz_hwi (mask);
25973       reg = gen_rtx_REG (SImode, regno);
25974
25975       if (i == 0)
25976         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25977       else
25978         tmp = gen_rtx_USE (VOIDmode, reg);
25979
25980       par[i] = tmp;
25981     }
25982
25983   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25984   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25985   tmp = gen_frame_mem (BLKmode, tmp);
25986   tmp = gen_rtx_SET (tmp, par[0]);
25987   par[0] = tmp;
25988
25989   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25990   insn = emit_insn (tmp);
25991
25992   /* Always build the stack adjustment note for unwind info.  */
25993   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25994   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25995   par[0] = tmp;
25996
25997   /* Build the parallel of the registers recorded as saved for unwind.  */
25998   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25999     {
26000       regno = ctz_hwi (real_regs);
26001       reg = gen_rtx_REG (SImode, regno);
26002
26003       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26004       tmp = gen_frame_mem (SImode, tmp);
26005       tmp = gen_rtx_SET (tmp, reg);
26006       RTX_FRAME_RELATED_P (tmp) = 1;
26007       par[j + 1] = tmp;
26008     }
26009
26010   if (j == 0)
26011     tmp = par[0];
26012   else
26013     {
26014       RTX_FRAME_RELATED_P (par[0]) = 1;
26015       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26016     }
26017
26018   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26019
26020   return insn;
26021 }
26022
26023 /* Emit code to push or pop registers to or from the stack.  F is the
26024    assembly file.  MASK is the registers to pop.  */
26025 static void
26026 thumb_pop (FILE *f, unsigned long mask)
26027 {
26028   int regno;
26029   int lo_mask = mask & 0xFF;
26030
26031   gcc_assert (mask);
26032
26033   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26034     {
26035       /* Special case.  Do not generate a POP PC statement here, do it in
26036          thumb_exit() */
26037       thumb_exit (f, -1);
26038       return;
26039     }
26040
26041   fprintf (f, "\tpop\t{");
26042
26043   /* Look at the low registers first.  */
26044   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26045     {
26046       if (lo_mask & 1)
26047         {
26048           asm_fprintf (f, "%r", regno);
26049
26050           if ((lo_mask & ~1) != 0)
26051             fprintf (f, ", ");
26052         }
26053     }
26054
26055   if (mask & (1 << PC_REGNUM))
26056     {
26057       /* Catch popping the PC.  */
26058       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26059           || IS_CMSE_ENTRY (arm_current_func_type ()))
26060         {
26061           /* The PC is never poped directly, instead
26062              it is popped into r3 and then BX is used.  */
26063           fprintf (f, "}\n");
26064
26065           thumb_exit (f, -1);
26066
26067           return;
26068         }
26069       else
26070         {
26071           if (mask & 0xFF)
26072             fprintf (f, ", ");
26073
26074           asm_fprintf (f, "%r", PC_REGNUM);
26075         }
26076     }
26077
26078   fprintf (f, "}\n");
26079 }
26080
26081 /* Generate code to return from a thumb function.
26082    If 'reg_containing_return_addr' is -1, then the return address is
26083    actually on the stack, at the stack pointer.
26084
26085    Note: do not forget to update length attribute of corresponding insn pattern
26086    when changing assembly output (eg. length attribute of epilogue_insns when
26087    updating Armv8-M Baseline Security Extensions register clearing
26088    sequences).  */
26089 static void
26090 thumb_exit (FILE *f, int reg_containing_return_addr)
26091 {
26092   unsigned regs_available_for_popping;
26093   unsigned regs_to_pop;
26094   int pops_needed;
26095   unsigned available;
26096   unsigned required;
26097   machine_mode mode;
26098   int size;
26099   int restore_a4 = FALSE;
26100
26101   /* Compute the registers we need to pop.  */
26102   regs_to_pop = 0;
26103   pops_needed = 0;
26104
26105   if (reg_containing_return_addr == -1)
26106     {
26107       regs_to_pop |= 1 << LR_REGNUM;
26108       ++pops_needed;
26109     }
26110
26111   if (TARGET_BACKTRACE)
26112     {
26113       /* Restore the (ARM) frame pointer and stack pointer.  */
26114       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26115       pops_needed += 2;
26116     }
26117
26118   /* If there is nothing to pop then just emit the BX instruction and
26119      return.  */
26120   if (pops_needed == 0)
26121     {
26122       if (crtl->calls_eh_return)
26123         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26124
26125       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26126         {
26127           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26128              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26129           if (!TARGET_HAVE_FPCXT_CMSE)
26130             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26131                          reg_containing_return_addr);
26132           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26133         }
26134       else
26135         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26136       return;
26137     }
26138   /* Otherwise if we are not supporting interworking and we have not created
26139      a backtrace structure and the function was not entered in ARM mode then
26140      just pop the return address straight into the PC.  */
26141   else if (!TARGET_INTERWORK
26142            && !TARGET_BACKTRACE
26143            && !is_called_in_ARM_mode (current_function_decl)
26144            && !crtl->calls_eh_return
26145            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26146     {
26147       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26148       return;
26149     }
26150
26151   /* Find out how many of the (return) argument registers we can corrupt.  */
26152   regs_available_for_popping = 0;
26153
26154   /* If returning via __builtin_eh_return, the bottom three registers
26155      all contain information needed for the return.  */
26156   if (crtl->calls_eh_return)
26157     size = 12;
26158   else
26159     {
26160       /* If we can deduce the registers used from the function's
26161          return value.  This is more reliable that examining
26162          df_regs_ever_live_p () because that will be set if the register is
26163          ever used in the function, not just if the register is used
26164          to hold a return value.  */
26165
26166       if (crtl->return_rtx != 0)
26167         mode = GET_MODE (crtl->return_rtx);
26168       else
26169         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26170
26171       size = GET_MODE_SIZE (mode);
26172
26173       if (size == 0)
26174         {
26175           /* In a void function we can use any argument register.
26176              In a function that returns a structure on the stack
26177              we can use the second and third argument registers.  */
26178           if (mode == VOIDmode)
26179             regs_available_for_popping =
26180               (1 << ARG_REGISTER (1))
26181               | (1 << ARG_REGISTER (2))
26182               | (1 << ARG_REGISTER (3));
26183           else
26184             regs_available_for_popping =
26185               (1 << ARG_REGISTER (2))
26186               | (1 << ARG_REGISTER (3));
26187         }
26188       else if (size <= 4)
26189         regs_available_for_popping =
26190           (1 << ARG_REGISTER (2))
26191           | (1 << ARG_REGISTER (3));
26192       else if (size <= 8)
26193         regs_available_for_popping =
26194           (1 << ARG_REGISTER (3));
26195     }
26196
26197   /* Match registers to be popped with registers into which we pop them.  */
26198   for (available = regs_available_for_popping,
26199        required  = regs_to_pop;
26200        required != 0 && available != 0;
26201        available &= ~(available & - available),
26202        required  &= ~(required  & - required))
26203     -- pops_needed;
26204
26205   /* If we have any popping registers left over, remove them.  */
26206   if (available > 0)
26207     regs_available_for_popping &= ~available;
26208
26209   /* Otherwise if we need another popping register we can use
26210      the fourth argument register.  */
26211   else if (pops_needed)
26212     {
26213       /* If we have not found any free argument registers and
26214          reg a4 contains the return address, we must move it.  */
26215       if (regs_available_for_popping == 0
26216           && reg_containing_return_addr == LAST_ARG_REGNUM)
26217         {
26218           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26219           reg_containing_return_addr = LR_REGNUM;
26220         }
26221       else if (size > 12)
26222         {
26223           /* Register a4 is being used to hold part of the return value,
26224              but we have dire need of a free, low register.  */
26225           restore_a4 = TRUE;
26226
26227           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26228         }
26229
26230       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26231         {
26232           /* The fourth argument register is available.  */
26233           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26234
26235           --pops_needed;
26236         }
26237     }
26238
26239   /* Pop as many registers as we can.  */
26240   thumb_pop (f, regs_available_for_popping);
26241
26242   /* Process the registers we popped.  */
26243   if (reg_containing_return_addr == -1)
26244     {
26245       /* The return address was popped into the lowest numbered register.  */
26246       regs_to_pop &= ~(1 << LR_REGNUM);
26247
26248       reg_containing_return_addr =
26249         number_of_first_bit_set (regs_available_for_popping);
26250
26251       /* Remove this register for the mask of available registers, so that
26252          the return address will not be corrupted by further pops.  */
26253       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26254     }
26255
26256   /* If we popped other registers then handle them here.  */
26257   if (regs_available_for_popping)
26258     {
26259       int frame_pointer;
26260
26261       /* Work out which register currently contains the frame pointer.  */
26262       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26263
26264       /* Move it into the correct place.  */
26265       asm_fprintf (f, "\tmov\t%r, %r\n",
26266                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26267
26268       /* (Temporarily) remove it from the mask of popped registers.  */
26269       regs_available_for_popping &= ~(1 << frame_pointer);
26270       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26271
26272       if (regs_available_for_popping)
26273         {
26274           int stack_pointer;
26275
26276           /* We popped the stack pointer as well,
26277              find the register that contains it.  */
26278           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26279
26280           /* Move it into the stack register.  */
26281           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26282
26283           /* At this point we have popped all necessary registers, so
26284              do not worry about restoring regs_available_for_popping
26285              to its correct value:
26286
26287              assert (pops_needed == 0)
26288              assert (regs_available_for_popping == (1 << frame_pointer))
26289              assert (regs_to_pop == (1 << STACK_POINTER))  */
26290         }
26291       else
26292         {
26293           /* Since we have just move the popped value into the frame
26294              pointer, the popping register is available for reuse, and
26295              we know that we still have the stack pointer left to pop.  */
26296           regs_available_for_popping |= (1 << frame_pointer);
26297         }
26298     }
26299
26300   /* If we still have registers left on the stack, but we no longer have
26301      any registers into which we can pop them, then we must move the return
26302      address into the link register and make available the register that
26303      contained it.  */
26304   if (regs_available_for_popping == 0 && pops_needed > 0)
26305     {
26306       regs_available_for_popping |= 1 << reg_containing_return_addr;
26307
26308       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26309                    reg_containing_return_addr);
26310
26311       reg_containing_return_addr = LR_REGNUM;
26312     }
26313
26314   /* If we have registers left on the stack then pop some more.
26315      We know that at most we will want to pop FP and SP.  */
26316   if (pops_needed > 0)
26317     {
26318       int  popped_into;
26319       int  move_to;
26320
26321       thumb_pop (f, regs_available_for_popping);
26322
26323       /* We have popped either FP or SP.
26324          Move whichever one it is into the correct register.  */
26325       popped_into = number_of_first_bit_set (regs_available_for_popping);
26326       move_to     = number_of_first_bit_set (regs_to_pop);
26327
26328       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26329       --pops_needed;
26330     }
26331
26332   /* If we still have not popped everything then we must have only
26333      had one register available to us and we are now popping the SP.  */
26334   if (pops_needed > 0)
26335     {
26336       int  popped_into;
26337
26338       thumb_pop (f, regs_available_for_popping);
26339
26340       popped_into = number_of_first_bit_set (regs_available_for_popping);
26341
26342       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26343       /*
26344         assert (regs_to_pop == (1 << STACK_POINTER))
26345         assert (pops_needed == 1)
26346       */
26347     }
26348
26349   /* If necessary restore the a4 register.  */
26350   if (restore_a4)
26351     {
26352       if (reg_containing_return_addr != LR_REGNUM)
26353         {
26354           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26355           reg_containing_return_addr = LR_REGNUM;
26356         }
26357
26358       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26359     }
26360
26361   if (crtl->calls_eh_return)
26362     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26363
26364   /* Return to caller.  */
26365   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26366     {
26367       /* This is for the cases where LR is not being used to contain the return
26368          address.  It may therefore contain information that we might not want
26369          to leak, hence it must be cleared.  The value in R0 will never be a
26370          secret at this point, so it is safe to use it, see the clearing code
26371          in cmse_nonsecure_entry_clear_before_return ().  */
26372       if (reg_containing_return_addr != LR_REGNUM)
26373         asm_fprintf (f, "\tmov\tlr, r0\n");
26374
26375       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26376          by cmse_nonsecure_entry_clear_before_return ().  */
26377       if (!TARGET_HAVE_FPCXT_CMSE)
26378         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26379       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26380     }
26381   else
26382     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26383 }
26384 \f
26385 /* Scan INSN just before assembler is output for it.
26386    For Thumb-1, we track the status of the condition codes; this
26387    information is used in the cbranchsi4_insn pattern.  */
26388 void
26389 thumb1_final_prescan_insn (rtx_insn *insn)
26390 {
26391   if (flag_print_asm_name)
26392     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26393                  INSN_ADDRESSES (INSN_UID (insn)));
26394   /* Don't overwrite the previous setter when we get to a cbranch.  */
26395   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26396     {
26397       enum attr_conds conds;
26398
26399       if (cfun->machine->thumb1_cc_insn)
26400         {
26401           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26402               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26403             CC_STATUS_INIT;
26404         }
26405       conds = get_attr_conds (insn);
26406       if (conds == CONDS_SET)
26407         {
26408           rtx set = single_set (insn);
26409           cfun->machine->thumb1_cc_insn = insn;
26410           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26411           cfun->machine->thumb1_cc_op1 = const0_rtx;
26412           cfun->machine->thumb1_cc_mode = CC_NZmode;
26413           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26414             {
26415               rtx src1 = XEXP (SET_SRC (set), 1);
26416               if (src1 == const0_rtx)
26417                 cfun->machine->thumb1_cc_mode = CCmode;
26418             }
26419           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26420             {
26421               /* Record the src register operand instead of dest because
26422                  cprop_hardreg pass propagates src.  */
26423               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26424             }
26425         }
26426       else if (conds != CONDS_NOCOND)
26427         cfun->machine->thumb1_cc_insn = NULL_RTX;
26428     }
26429
26430     /* Check if unexpected far jump is used.  */
26431     if (cfun->machine->lr_save_eliminated
26432         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26433       internal_error("Unexpected thumb1 far jump");
26434 }
26435
26436 int
26437 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26438 {
26439   unsigned HOST_WIDE_INT mask = 0xff;
26440   int i;
26441
26442   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26443   if (val == 0) /* XXX */
26444     return 0;
26445
26446   for (i = 0; i < 25; i++)
26447     if ((val & (mask << i)) == val)
26448       return 1;
26449
26450   return 0;
26451 }
26452
26453 /* Returns nonzero if the current function contains,
26454    or might contain a far jump.  */
26455 static int
26456 thumb_far_jump_used_p (void)
26457 {
26458   rtx_insn *insn;
26459   bool far_jump = false;
26460   unsigned int func_size = 0;
26461
26462   /* If we have already decided that far jumps may be used,
26463      do not bother checking again, and always return true even if
26464      it turns out that they are not being used.  Once we have made
26465      the decision that far jumps are present (and that hence the link
26466      register will be pushed onto the stack) we cannot go back on it.  */
26467   if (cfun->machine->far_jump_used)
26468     return 1;
26469
26470   /* If this function is not being called from the prologue/epilogue
26471      generation code then it must be being called from the
26472      INITIAL_ELIMINATION_OFFSET macro.  */
26473   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26474     {
26475       /* In this case we know that we are being asked about the elimination
26476          of the arg pointer register.  If that register is not being used,
26477          then there are no arguments on the stack, and we do not have to
26478          worry that a far jump might force the prologue to push the link
26479          register, changing the stack offsets.  In this case we can just
26480          return false, since the presence of far jumps in the function will
26481          not affect stack offsets.
26482
26483          If the arg pointer is live (or if it was live, but has now been
26484          eliminated and so set to dead) then we do have to test to see if
26485          the function might contain a far jump.  This test can lead to some
26486          false negatives, since before reload is completed, then length of
26487          branch instructions is not known, so gcc defaults to returning their
26488          longest length, which in turn sets the far jump attribute to true.
26489
26490          A false negative will not result in bad code being generated, but it
26491          will result in a needless push and pop of the link register.  We
26492          hope that this does not occur too often.
26493
26494          If we need doubleword stack alignment this could affect the other
26495          elimination offsets so we can't risk getting it wrong.  */
26496       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26497         cfun->machine->arg_pointer_live = 1;
26498       else if (!cfun->machine->arg_pointer_live)
26499         return 0;
26500     }
26501
26502   /* We should not change far_jump_used during or after reload, as there is
26503      no chance to change stack frame layout.  */
26504   if (reload_in_progress || reload_completed)
26505     return 0;
26506
26507   /* Check to see if the function contains a branch
26508      insn with the far jump attribute set.  */
26509   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26510     {
26511       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26512         {
26513           far_jump = true;
26514         }
26515       func_size += get_attr_length (insn);
26516     }
26517
26518   /* Attribute far_jump will always be true for thumb1 before
26519      shorten_branch pass.  So checking far_jump attribute before
26520      shorten_branch isn't much useful.
26521
26522      Following heuristic tries to estimate more accurately if a far jump
26523      may finally be used.  The heuristic is very conservative as there is
26524      no chance to roll-back the decision of not to use far jump.
26525
26526      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26527      2-byte insn is associated with a 4 byte constant pool.  Using
26528      function size 2048/3 as the threshold is conservative enough.  */
26529   if (far_jump)
26530     {
26531       if ((func_size * 3) >= 2048)
26532         {
26533           /* Record the fact that we have decided that
26534              the function does use far jumps.  */
26535           cfun->machine->far_jump_used = 1;
26536           return 1;
26537         }
26538     }
26539
26540   return 0;
26541 }
26542
26543 /* Return nonzero if FUNC must be entered in ARM mode.  */
26544 static bool
26545 is_called_in_ARM_mode (tree func)
26546 {
26547   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26548
26549   /* Ignore the problem about functions whose address is taken.  */
26550   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26551     return true;
26552
26553 #ifdef ARM_PE
26554   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26555 #else
26556   return false;
26557 #endif
26558 }
26559
26560 /* Given the stack offsets and register mask in OFFSETS, decide how
26561    many additional registers to push instead of subtracting a constant
26562    from SP.  For epilogues the principle is the same except we use pop.
26563    FOR_PROLOGUE indicates which we're generating.  */
26564 static int
26565 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26566 {
26567   HOST_WIDE_INT amount;
26568   unsigned long live_regs_mask = offsets->saved_regs_mask;
26569   /* Extract a mask of the ones we can give to the Thumb's push/pop
26570      instruction.  */
26571   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26572   /* Then count how many other high registers will need to be pushed.  */
26573   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26574   int n_free, reg_base, size;
26575
26576   if (!for_prologue && frame_pointer_needed)
26577     amount = offsets->locals_base - offsets->saved_regs;
26578   else
26579     amount = offsets->outgoing_args - offsets->saved_regs;
26580
26581   /* If the stack frame size is 512 exactly, we can save one load
26582      instruction, which should make this a win even when optimizing
26583      for speed.  */
26584   if (!optimize_size && amount != 512)
26585     return 0;
26586
26587   /* Can't do this if there are high registers to push.  */
26588   if (high_regs_pushed != 0)
26589     return 0;
26590
26591   /* Shouldn't do it in the prologue if no registers would normally
26592      be pushed at all.  In the epilogue, also allow it if we'll have
26593      a pop insn for the PC.  */
26594   if  (l_mask == 0
26595        && (for_prologue
26596            || TARGET_BACKTRACE
26597            || (live_regs_mask & 1 << LR_REGNUM) == 0
26598            || TARGET_INTERWORK
26599            || crtl->args.pretend_args_size != 0))
26600     return 0;
26601
26602   /* Don't do this if thumb_expand_prologue wants to emit instructions
26603      between the push and the stack frame allocation.  */
26604   if (for_prologue
26605       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26606           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26607     return 0;
26608
26609   reg_base = 0;
26610   n_free = 0;
26611   if (!for_prologue)
26612     {
26613       size = arm_size_return_regs ();
26614       reg_base = ARM_NUM_INTS (size);
26615       live_regs_mask >>= reg_base;
26616     }
26617
26618   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26619          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26620     {
26621       live_regs_mask >>= 1;
26622       n_free++;
26623     }
26624
26625   if (n_free == 0)
26626     return 0;
26627   gcc_assert (amount / 4 * 4 == amount);
26628
26629   if (amount >= 512 && (amount - n_free * 4) < 512)
26630     return (amount - 508) / 4;
26631   if (amount <= n_free * 4)
26632     return amount / 4;
26633   return 0;
26634 }
26635
26636 /* The bits which aren't usefully expanded as rtl.  */
26637 const char *
26638 thumb1_unexpanded_epilogue (void)
26639 {
26640   arm_stack_offsets *offsets;
26641   int regno;
26642   unsigned long live_regs_mask = 0;
26643   int high_regs_pushed = 0;
26644   int extra_pop;
26645   int had_to_push_lr;
26646   int size;
26647
26648   if (cfun->machine->return_used_this_function != 0)
26649     return "";
26650
26651   if (IS_NAKED (arm_current_func_type ()))
26652     return "";
26653
26654   offsets = arm_get_frame_offsets ();
26655   live_regs_mask = offsets->saved_regs_mask;
26656   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26657
26658   /* If we can deduce the registers used from the function's return value.
26659      This is more reliable that examining df_regs_ever_live_p () because that
26660      will be set if the register is ever used in the function, not just if
26661      the register is used to hold a return value.  */
26662   size = arm_size_return_regs ();
26663
26664   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26665   if (extra_pop > 0)
26666     {
26667       unsigned long extra_mask = (1 << extra_pop) - 1;
26668       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26669     }
26670
26671   /* The prolog may have pushed some high registers to use as
26672      work registers.  e.g. the testsuite file:
26673      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26674      compiles to produce:
26675         push    {r4, r5, r6, r7, lr}
26676         mov     r7, r9
26677         mov     r6, r8
26678         push    {r6, r7}
26679      as part of the prolog.  We have to undo that pushing here.  */
26680
26681   if (high_regs_pushed)
26682     {
26683       unsigned long mask = live_regs_mask & 0xff;
26684       int next_hi_reg;
26685
26686       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26687
26688       if (mask == 0)
26689         /* Oh dear!  We have no low registers into which we can pop
26690            high registers!  */
26691         internal_error
26692           ("no low registers available for popping high registers");
26693
26694       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26695         if (live_regs_mask & (1 << next_hi_reg))
26696           break;
26697
26698       while (high_regs_pushed)
26699         {
26700           /* Find lo register(s) into which the high register(s) can
26701              be popped.  */
26702           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26703             {
26704               if (mask & (1 << regno))
26705                 high_regs_pushed--;
26706               if (high_regs_pushed == 0)
26707                 break;
26708             }
26709
26710           if (high_regs_pushed == 0 && regno >= 0)
26711             mask &= ~((1 << regno) - 1);
26712
26713           /* Pop the values into the low register(s).  */
26714           thumb_pop (asm_out_file, mask);
26715
26716           /* Move the value(s) into the high registers.  */
26717           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26718             {
26719               if (mask & (1 << regno))
26720                 {
26721                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26722                                regno);
26723
26724                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26725                        next_hi_reg--)
26726                     if (live_regs_mask & (1 << next_hi_reg))
26727                       break;
26728                 }
26729             }
26730         }
26731       live_regs_mask &= ~0x0f00;
26732     }
26733
26734   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26735   live_regs_mask &= 0xff;
26736
26737   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26738     {
26739       /* Pop the return address into the PC.  */
26740       if (had_to_push_lr)
26741         live_regs_mask |= 1 << PC_REGNUM;
26742
26743       /* Either no argument registers were pushed or a backtrace
26744          structure was created which includes an adjusted stack
26745          pointer, so just pop everything.  */
26746       if (live_regs_mask)
26747         thumb_pop (asm_out_file, live_regs_mask);
26748
26749       /* We have either just popped the return address into the
26750          PC or it is was kept in LR for the entire function.
26751          Note that thumb_pop has already called thumb_exit if the
26752          PC was in the list.  */
26753       if (!had_to_push_lr)
26754         thumb_exit (asm_out_file, LR_REGNUM);
26755     }
26756   else
26757     {
26758       /* Pop everything but the return address.  */
26759       if (live_regs_mask)
26760         thumb_pop (asm_out_file, live_regs_mask);
26761
26762       if (had_to_push_lr)
26763         {
26764           if (size > 12)
26765             {
26766               /* We have no free low regs, so save one.  */
26767               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26768                            LAST_ARG_REGNUM);
26769             }
26770
26771           /* Get the return address into a temporary register.  */
26772           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26773
26774           if (size > 12)
26775             {
26776               /* Move the return address to lr.  */
26777               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26778                            LAST_ARG_REGNUM);
26779               /* Restore the low register.  */
26780               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26781                            IP_REGNUM);
26782               regno = LR_REGNUM;
26783             }
26784           else
26785             regno = LAST_ARG_REGNUM;
26786         }
26787       else
26788         regno = LR_REGNUM;
26789
26790       /* Remove the argument registers that were pushed onto the stack.  */
26791       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26792                    SP_REGNUM, SP_REGNUM,
26793                    crtl->args.pretend_args_size);
26794
26795       thumb_exit (asm_out_file, regno);
26796     }
26797
26798   return "";
26799 }
26800
26801 /* Functions to save and restore machine-specific function data.  */
26802 static struct machine_function *
26803 arm_init_machine_status (void)
26804 {
26805   struct machine_function *machine;
26806   machine = ggc_cleared_alloc<machine_function> ();
26807
26808 #if ARM_FT_UNKNOWN != 0
26809   machine->func_type = ARM_FT_UNKNOWN;
26810 #endif
26811   machine->static_chain_stack_bytes = -1;
26812   return machine;
26813 }
26814
26815 /* Return an RTX indicating where the return address to the
26816    calling function can be found.  */
26817 rtx
26818 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26819 {
26820   if (count != 0)
26821     return NULL_RTX;
26822
26823   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26824 }
26825
26826 /* Do anything needed before RTL is emitted for each function.  */
26827 void
26828 arm_init_expanders (void)
26829 {
26830   /* Arrange to initialize and mark the machine per-function status.  */
26831   init_machine_status = arm_init_machine_status;
26832
26833   /* This is to stop the combine pass optimizing away the alignment
26834      adjustment of va_arg.  */
26835   /* ??? It is claimed that this should not be necessary.  */
26836   if (cfun)
26837     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26838 }
26839
26840 /* Check that FUNC is called with a different mode.  */
26841
26842 bool
26843 arm_change_mode_p (tree func)
26844 {
26845   if (TREE_CODE (func) != FUNCTION_DECL)
26846     return false;
26847
26848   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26849
26850   if (!callee_tree)
26851     callee_tree = target_option_default_node;
26852
26853   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26854   int flags = callee_opts->x_target_flags;
26855
26856   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26857 }
26858
26859 /* Like arm_compute_initial_elimination offset.  Simpler because there
26860    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26861    to point at the base of the local variables after static stack
26862    space for a function has been allocated.  */
26863
26864 HOST_WIDE_INT
26865 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26866 {
26867   arm_stack_offsets *offsets;
26868
26869   offsets = arm_get_frame_offsets ();
26870
26871   switch (from)
26872     {
26873     case ARG_POINTER_REGNUM:
26874       switch (to)
26875         {
26876         case STACK_POINTER_REGNUM:
26877           return offsets->outgoing_args - offsets->saved_args;
26878
26879         case FRAME_POINTER_REGNUM:
26880           return offsets->soft_frame - offsets->saved_args;
26881
26882         case ARM_HARD_FRAME_POINTER_REGNUM:
26883           return offsets->saved_regs - offsets->saved_args;
26884
26885         case THUMB_HARD_FRAME_POINTER_REGNUM:
26886           return offsets->locals_base - offsets->saved_args;
26887
26888         default:
26889           gcc_unreachable ();
26890         }
26891       break;
26892
26893     case FRAME_POINTER_REGNUM:
26894       switch (to)
26895         {
26896         case STACK_POINTER_REGNUM:
26897           return offsets->outgoing_args - offsets->soft_frame;
26898
26899         case ARM_HARD_FRAME_POINTER_REGNUM:
26900           return offsets->saved_regs - offsets->soft_frame;
26901
26902         case THUMB_HARD_FRAME_POINTER_REGNUM:
26903           return offsets->locals_base - offsets->soft_frame;
26904
26905         default:
26906           gcc_unreachable ();
26907         }
26908       break;
26909
26910     default:
26911       gcc_unreachable ();
26912     }
26913 }
26914
26915 /* Generate the function's prologue.  */
26916
26917 void
26918 thumb1_expand_prologue (void)
26919 {
26920   rtx_insn *insn;
26921
26922   HOST_WIDE_INT amount;
26923   HOST_WIDE_INT size;
26924   arm_stack_offsets *offsets;
26925   unsigned long func_type;
26926   int regno;
26927   unsigned long live_regs_mask;
26928   unsigned long l_mask;
26929   unsigned high_regs_pushed = 0;
26930   bool lr_needs_saving;
26931
26932   func_type = arm_current_func_type ();
26933
26934   /* Naked functions don't have prologues.  */
26935   if (IS_NAKED (func_type))
26936     {
26937       if (flag_stack_usage_info)
26938         current_function_static_stack_size = 0;
26939       return;
26940     }
26941
26942   if (IS_INTERRUPT (func_type))
26943     {
26944       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26945       return;
26946     }
26947
26948   if (is_called_in_ARM_mode (current_function_decl))
26949     emit_insn (gen_prologue_thumb1_interwork ());
26950
26951   offsets = arm_get_frame_offsets ();
26952   live_regs_mask = offsets->saved_regs_mask;
26953   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26954
26955   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26956   l_mask = live_regs_mask & 0x40ff;
26957   /* Then count how many other high registers will need to be pushed.  */
26958   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26959
26960   if (crtl->args.pretend_args_size)
26961     {
26962       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26963
26964       if (cfun->machine->uses_anonymous_args)
26965         {
26966           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26967           unsigned long mask;
26968
26969           mask = 1ul << (LAST_ARG_REGNUM + 1);
26970           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26971
26972           insn = thumb1_emit_multi_reg_push (mask, 0);
26973         }
26974       else
26975         {
26976           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26977                                         stack_pointer_rtx, x));
26978         }
26979       RTX_FRAME_RELATED_P (insn) = 1;
26980     }
26981
26982   if (TARGET_BACKTRACE)
26983     {
26984       HOST_WIDE_INT offset = 0;
26985       unsigned work_register;
26986       rtx work_reg, x, arm_hfp_rtx;
26987
26988       /* We have been asked to create a stack backtrace structure.
26989          The code looks like this:
26990
26991          0   .align 2
26992          0   func:
26993          0     sub   SP, #16         Reserve space for 4 registers.
26994          2     push  {R7}            Push low registers.
26995          4     add   R7, SP, #20     Get the stack pointer before the push.
26996          6     str   R7, [SP, #8]    Store the stack pointer
26997                                         (before reserving the space).
26998          8     mov   R7, PC          Get hold of the start of this code + 12.
26999         10     str   R7, [SP, #16]   Store it.
27000         12     mov   R7, FP          Get hold of the current frame pointer.
27001         14     str   R7, [SP, #4]    Store it.
27002         16     mov   R7, LR          Get hold of the current return address.
27003         18     str   R7, [SP, #12]   Store it.
27004         20     add   R7, SP, #16     Point at the start of the
27005                                         backtrace structure.
27006         22     mov   FP, R7          Put this value into the frame pointer.  */
27007
27008       work_register = thumb_find_work_register (live_regs_mask);
27009       work_reg = gen_rtx_REG (SImode, work_register);
27010       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27011
27012       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27013                                     stack_pointer_rtx, GEN_INT (-16)));
27014       RTX_FRAME_RELATED_P (insn) = 1;
27015
27016       if (l_mask)
27017         {
27018           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27019           RTX_FRAME_RELATED_P (insn) = 1;
27020           lr_needs_saving = false;
27021
27022           offset = bit_count (l_mask) * UNITS_PER_WORD;
27023         }
27024
27025       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27026       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27027
27028       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27029       x = gen_frame_mem (SImode, x);
27030       emit_move_insn (x, work_reg);
27031
27032       /* Make sure that the instruction fetching the PC is in the right place
27033          to calculate "start of backtrace creation code + 12".  */
27034       /* ??? The stores using the common WORK_REG ought to be enough to
27035          prevent the scheduler from doing anything weird.  Failing that
27036          we could always move all of the following into an UNSPEC_VOLATILE.  */
27037       if (l_mask)
27038         {
27039           x = gen_rtx_REG (SImode, PC_REGNUM);
27040           emit_move_insn (work_reg, x);
27041
27042           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27043           x = gen_frame_mem (SImode, x);
27044           emit_move_insn (x, work_reg);
27045
27046           emit_move_insn (work_reg, arm_hfp_rtx);
27047
27048           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27049           x = gen_frame_mem (SImode, x);
27050           emit_move_insn (x, work_reg);
27051         }
27052       else
27053         {
27054           emit_move_insn (work_reg, arm_hfp_rtx);
27055
27056           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27057           x = gen_frame_mem (SImode, x);
27058           emit_move_insn (x, work_reg);
27059
27060           x = gen_rtx_REG (SImode, PC_REGNUM);
27061           emit_move_insn (work_reg, x);
27062
27063           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27064           x = gen_frame_mem (SImode, x);
27065           emit_move_insn (x, work_reg);
27066         }
27067
27068       x = gen_rtx_REG (SImode, LR_REGNUM);
27069       emit_move_insn (work_reg, x);
27070
27071       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27072       x = gen_frame_mem (SImode, x);
27073       emit_move_insn (x, work_reg);
27074
27075       x = GEN_INT (offset + 12);
27076       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27077
27078       emit_move_insn (arm_hfp_rtx, work_reg);
27079     }
27080   /* Optimization:  If we are not pushing any low registers but we are going
27081      to push some high registers then delay our first push.  This will just
27082      be a push of LR and we can combine it with the push of the first high
27083      register.  */
27084   else if ((l_mask & 0xff) != 0
27085            || (high_regs_pushed == 0 && lr_needs_saving))
27086     {
27087       unsigned long mask = l_mask;
27088       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27089       insn = thumb1_emit_multi_reg_push (mask, mask);
27090       RTX_FRAME_RELATED_P (insn) = 1;
27091       lr_needs_saving = false;
27092     }
27093
27094   if (high_regs_pushed)
27095     {
27096       unsigned pushable_regs;
27097       unsigned next_hi_reg;
27098       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27099                                                  : crtl->args.info.nregs;
27100       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27101
27102       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27103         if (live_regs_mask & (1 << next_hi_reg))
27104           break;
27105
27106       /* Here we need to mask out registers used for passing arguments
27107          even if they can be pushed.  This is to avoid using them to
27108          stash the high registers.  Such kind of stash may clobber the
27109          use of arguments.  */
27110       pushable_regs = l_mask & (~arg_regs_mask);
27111       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27112
27113       /* Normally, LR can be used as a scratch register once it has been
27114          saved; but if the function examines its own return address then
27115          the value is still live and we need to avoid using it.  */
27116       bool return_addr_live
27117         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27118                            LR_REGNUM);
27119
27120       if (lr_needs_saving || return_addr_live)
27121         pushable_regs &= ~(1 << LR_REGNUM);
27122
27123       if (pushable_regs == 0)
27124         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27125
27126       while (high_regs_pushed > 0)
27127         {
27128           unsigned long real_regs_mask = 0;
27129           unsigned long push_mask = 0;
27130
27131           for (regno = LR_REGNUM; regno >= 0; regno --)
27132             {
27133               if (pushable_regs & (1 << regno))
27134                 {
27135                   emit_move_insn (gen_rtx_REG (SImode, regno),
27136                                   gen_rtx_REG (SImode, next_hi_reg));
27137
27138                   high_regs_pushed --;
27139                   real_regs_mask |= (1 << next_hi_reg);
27140                   push_mask |= (1 << regno);
27141
27142                   if (high_regs_pushed)
27143                     {
27144                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27145                            next_hi_reg --)
27146                         if (live_regs_mask & (1 << next_hi_reg))
27147                           break;
27148                     }
27149                   else
27150                     break;
27151                 }
27152             }
27153
27154           /* If we had to find a work register and we have not yet
27155              saved the LR then add it to the list of regs to push.  */
27156           if (lr_needs_saving)
27157             {
27158               push_mask |= 1 << LR_REGNUM;
27159               real_regs_mask |= 1 << LR_REGNUM;
27160               lr_needs_saving = false;
27161               /* If the return address is not live at this point, we
27162                  can add LR to the list of registers that we can use
27163                  for pushes.  */
27164               if (!return_addr_live)
27165                 pushable_regs |= 1 << LR_REGNUM;
27166             }
27167
27168           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27169           RTX_FRAME_RELATED_P (insn) = 1;
27170         }
27171     }
27172
27173   /* Load the pic register before setting the frame pointer,
27174      so we can use r7 as a temporary work register.  */
27175   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27176     arm_load_pic_register (live_regs_mask, NULL_RTX);
27177
27178   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27179     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27180                     stack_pointer_rtx);
27181
27182   size = offsets->outgoing_args - offsets->saved_args;
27183   if (flag_stack_usage_info)
27184     current_function_static_stack_size = size;
27185
27186   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27187   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27188        || flag_stack_clash_protection)
27189       && size)
27190     sorry ("%<-fstack-check=specific%> for Thumb-1");
27191
27192   amount = offsets->outgoing_args - offsets->saved_regs;
27193   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27194   if (amount)
27195     {
27196       if (amount < 512)
27197         {
27198           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27199                                         GEN_INT (- amount)));
27200           RTX_FRAME_RELATED_P (insn) = 1;
27201         }
27202       else
27203         {
27204           rtx reg, dwarf;
27205
27206           /* The stack decrement is too big for an immediate value in a single
27207              insn.  In theory we could issue multiple subtracts, but after
27208              three of them it becomes more space efficient to place the full
27209              value in the constant pool and load into a register.  (Also the
27210              ARM debugger really likes to see only one stack decrement per
27211              function).  So instead we look for a scratch register into which
27212              we can load the decrement, and then we subtract this from the
27213              stack pointer.  Unfortunately on the thumb the only available
27214              scratch registers are the argument registers, and we cannot use
27215              these as they may hold arguments to the function.  Instead we
27216              attempt to locate a call preserved register which is used by this
27217              function.  If we can find one, then we know that it will have
27218              been pushed at the start of the prologue and so we can corrupt
27219              it now.  */
27220           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27221             if (live_regs_mask & (1 << regno))
27222               break;
27223
27224           gcc_assert(regno <= LAST_LO_REGNUM);
27225
27226           reg = gen_rtx_REG (SImode, regno);
27227
27228           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27229
27230           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27231                                         stack_pointer_rtx, reg));
27232
27233           dwarf = gen_rtx_SET (stack_pointer_rtx,
27234                                plus_constant (Pmode, stack_pointer_rtx,
27235                                               -amount));
27236           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27237           RTX_FRAME_RELATED_P (insn) = 1;
27238         }
27239     }
27240
27241   if (frame_pointer_needed)
27242     thumb_set_frame_pointer (offsets);
27243
27244   /* If we are profiling, make sure no instructions are scheduled before
27245      the call to mcount.  Similarly if the user has requested no
27246      scheduling in the prolog.  Similarly if we want non-call exceptions
27247      using the EABI unwinder, to prevent faulting instructions from being
27248      swapped with a stack adjustment.  */
27249   if (crtl->profile || !TARGET_SCHED_PROLOG
27250       || (arm_except_unwind_info (&global_options) == UI_TARGET
27251           && cfun->can_throw_non_call_exceptions))
27252     emit_insn (gen_blockage ());
27253
27254   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27255   if (live_regs_mask & 0xff)
27256     cfun->machine->lr_save_eliminated = 0;
27257 }
27258
27259 /* Clear caller saved registers not used to pass return values and leaked
27260    condition flags before exiting a cmse_nonsecure_entry function.  */
27261
27262 void
27263 cmse_nonsecure_entry_clear_before_return (void)
27264 {
27265   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27266   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27267   uint32_t padding_bits_to_clear = 0;
27268   auto_sbitmap to_clear_bitmap (maxregno + 1);
27269   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27270   tree result_type;
27271
27272   bitmap_clear (to_clear_bitmap);
27273   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27274   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27275
27276   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27277      registers.  */
27278   if (clear_vfpregs)
27279     {
27280       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27281
27282       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27283
27284       if (!TARGET_HAVE_FPCXT_CMSE)
27285         {
27286           /* Make sure we don't clear the two scratch registers used to clear
27287              the relevant FPSCR bits in output_return_instruction.  */
27288           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27289           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27290           emit_use (gen_rtx_REG (SImode, 4));
27291           bitmap_clear_bit (to_clear_bitmap, 4);
27292         }
27293     }
27294
27295   /* If the user has defined registers to be caller saved, these are no longer
27296      restored by the function before returning and must thus be cleared for
27297      security purposes.  */
27298   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27299     {
27300       /* We do not touch registers that can be used to pass arguments as per
27301          the AAPCS, since these should never be made callee-saved by user
27302          options.  */
27303       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27304         continue;
27305       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27306         continue;
27307       if (!callee_saved_reg_p (regno)
27308           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27309               || TARGET_HARD_FLOAT))
27310         bitmap_set_bit (to_clear_bitmap, regno);
27311     }
27312
27313   /* Make sure we do not clear the registers used to return the result in.  */
27314   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27315   if (!VOID_TYPE_P (result_type))
27316     {
27317       uint64_t to_clear_return_mask;
27318       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27319
27320       /* No need to check that we return in registers, because we don't
27321          support returning on stack yet.  */
27322       gcc_assert (REG_P (result_rtl));
27323       to_clear_return_mask
27324         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27325                                      &padding_bits_to_clear);
27326       if (to_clear_return_mask)
27327         {
27328           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27329           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27330             {
27331               if (to_clear_return_mask & (1ULL << regno))
27332                 bitmap_clear_bit (to_clear_bitmap, regno);
27333             }
27334         }
27335     }
27336
27337   if (padding_bits_to_clear != 0)
27338     {
27339       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27340       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27341
27342       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27343          returning a composite type, which only uses r0.  Let's make sure that
27344          r1-r3 is cleared too.  */
27345       bitmap_clear (to_clear_arg_regs_bitmap);
27346       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27347       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27348     }
27349
27350   /* Clear full registers that leak before returning.  */
27351   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27352   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27353   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27354                         clearing_reg);
27355 }
27356
27357 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27358    POP instruction can be generated.  LR should be replaced by PC.  All
27359    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27360    all we really need to check here is if single register is to be
27361    returned, or multiple register return.  */
27362 void
27363 thumb2_expand_return (bool simple_return)
27364 {
27365   int i, num_regs;
27366   unsigned long saved_regs_mask;
27367   arm_stack_offsets *offsets;
27368
27369   offsets = arm_get_frame_offsets ();
27370   saved_regs_mask = offsets->saved_regs_mask;
27371
27372   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27373     if (saved_regs_mask & (1 << i))
27374       num_regs++;
27375
27376   if (!simple_return && saved_regs_mask)
27377     {
27378       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27379          functions or adapt code to handle according to ACLE.  This path should
27380          not be reachable for cmse_nonsecure_entry functions though we prefer
27381          to assert it for now to ensure that future code changes do not silently
27382          change this behavior.  */
27383       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27384       if (num_regs == 1)
27385         {
27386           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27387           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27388           rtx addr = gen_rtx_MEM (SImode,
27389                                   gen_rtx_POST_INC (SImode,
27390                                                     stack_pointer_rtx));
27391           set_mem_alias_set (addr, get_frame_alias_set ());
27392           XVECEXP (par, 0, 0) = ret_rtx;
27393           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27394           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27395           emit_jump_insn (par);
27396         }
27397       else
27398         {
27399           saved_regs_mask &= ~ (1 << LR_REGNUM);
27400           saved_regs_mask |=   (1 << PC_REGNUM);
27401           arm_emit_multi_reg_pop (saved_regs_mask);
27402         }
27403     }
27404   else
27405     {
27406       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27407         cmse_nonsecure_entry_clear_before_return ();
27408       emit_jump_insn (simple_return_rtx);
27409     }
27410 }
27411
27412 void
27413 thumb1_expand_epilogue (void)
27414 {
27415   HOST_WIDE_INT amount;
27416   arm_stack_offsets *offsets;
27417   int regno;
27418
27419   /* Naked functions don't have prologues.  */
27420   if (IS_NAKED (arm_current_func_type ()))
27421     return;
27422
27423   offsets = arm_get_frame_offsets ();
27424   amount = offsets->outgoing_args - offsets->saved_regs;
27425
27426   if (frame_pointer_needed)
27427     {
27428       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27429       amount = offsets->locals_base - offsets->saved_regs;
27430     }
27431   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27432
27433   gcc_assert (amount >= 0);
27434   if (amount)
27435     {
27436       emit_insn (gen_blockage ());
27437
27438       if (amount < 512)
27439         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27440                                GEN_INT (amount)));
27441       else
27442         {
27443           /* r3 is always free in the epilogue.  */
27444           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27445
27446           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27447           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27448         }
27449     }
27450
27451   /* Emit a USE (stack_pointer_rtx), so that
27452      the stack adjustment will not be deleted.  */
27453   emit_insn (gen_force_register_use (stack_pointer_rtx));
27454
27455   if (crtl->profile || !TARGET_SCHED_PROLOG)
27456     emit_insn (gen_blockage ());
27457
27458   /* Emit a clobber for each insn that will be restored in the epilogue,
27459      so that flow2 will get register lifetimes correct.  */
27460   for (regno = 0; regno < 13; regno++)
27461     if (reg_needs_saving_p (regno))
27462       emit_clobber (gen_rtx_REG (SImode, regno));
27463
27464   if (! df_regs_ever_live_p (LR_REGNUM))
27465     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27466
27467   /* Clear all caller-saved regs that are not used to return.  */
27468   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27469     cmse_nonsecure_entry_clear_before_return ();
27470 }
27471
27472 /* Epilogue code for APCS frame.  */
27473 static void
27474 arm_expand_epilogue_apcs_frame (bool really_return)
27475 {
27476   unsigned long func_type;
27477   unsigned long saved_regs_mask;
27478   int num_regs = 0;
27479   int i;
27480   int floats_from_frame = 0;
27481   arm_stack_offsets *offsets;
27482
27483   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27484   func_type = arm_current_func_type ();
27485
27486   /* Get frame offsets for ARM.  */
27487   offsets = arm_get_frame_offsets ();
27488   saved_regs_mask = offsets->saved_regs_mask;
27489
27490   /* Find the offset of the floating-point save area in the frame.  */
27491   floats_from_frame
27492     = (offsets->saved_args
27493        + arm_compute_static_chain_stack_bytes ()
27494        - offsets->frame);
27495
27496   /* Compute how many core registers saved and how far away the floats are.  */
27497   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27498     if (saved_regs_mask & (1 << i))
27499       {
27500         num_regs++;
27501         floats_from_frame += 4;
27502       }
27503
27504   if (TARGET_VFP_BASE)
27505     {
27506       int start_reg;
27507       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27508
27509       /* The offset is from IP_REGNUM.  */
27510       int saved_size = arm_get_vfp_saved_size ();
27511       if (saved_size > 0)
27512         {
27513           rtx_insn *insn;
27514           floats_from_frame += saved_size;
27515           insn = emit_insn (gen_addsi3 (ip_rtx,
27516                                         hard_frame_pointer_rtx,
27517                                         GEN_INT (-floats_from_frame)));
27518           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27519                                        ip_rtx, hard_frame_pointer_rtx);
27520         }
27521
27522       /* Generate VFP register multi-pop.  */
27523       start_reg = FIRST_VFP_REGNUM;
27524
27525       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27526         /* Look for a case where a reg does not need restoring.  */
27527         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27528           {
27529             if (start_reg != i)
27530               arm_emit_vfp_multi_reg_pop (start_reg,
27531                                           (i - start_reg) / 2,
27532                                           gen_rtx_REG (SImode,
27533                                                        IP_REGNUM));
27534             start_reg = i + 2;
27535           }
27536
27537       /* Restore the remaining regs that we have discovered (or possibly
27538          even all of them, if the conditional in the for loop never
27539          fired).  */
27540       if (start_reg != i)
27541         arm_emit_vfp_multi_reg_pop (start_reg,
27542                                     (i - start_reg) / 2,
27543                                     gen_rtx_REG (SImode, IP_REGNUM));
27544     }
27545
27546   if (TARGET_IWMMXT)
27547     {
27548       /* The frame pointer is guaranteed to be non-double-word aligned, as
27549          it is set to double-word-aligned old_stack_pointer - 4.  */
27550       rtx_insn *insn;
27551       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27552
27553       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27554         if (reg_needs_saving_p (i))
27555           {
27556             rtx addr = gen_frame_mem (V2SImode,
27557                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27558                                                 - lrm_count * 4));
27559             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27560             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27561                                                gen_rtx_REG (V2SImode, i),
27562                                                NULL_RTX);
27563             lrm_count += 2;
27564           }
27565     }
27566
27567   /* saved_regs_mask should contain IP which contains old stack pointer
27568      at the time of activation creation.  Since SP and IP are adjacent registers,
27569      we can restore the value directly into SP.  */
27570   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27571   saved_regs_mask &= ~(1 << IP_REGNUM);
27572   saved_regs_mask |= (1 << SP_REGNUM);
27573
27574   /* There are two registers left in saved_regs_mask - LR and PC.  We
27575      only need to restore LR (the return address), but to
27576      save time we can load it directly into PC, unless we need a
27577      special function exit sequence, or we are not really returning.  */
27578   if (really_return
27579       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27580       && !crtl->calls_eh_return)
27581     /* Delete LR from the register mask, so that LR on
27582        the stack is loaded into the PC in the register mask.  */
27583     saved_regs_mask &= ~(1 << LR_REGNUM);
27584   else
27585     saved_regs_mask &= ~(1 << PC_REGNUM);
27586
27587   num_regs = bit_count (saved_regs_mask);
27588   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27589     {
27590       rtx_insn *insn;
27591       emit_insn (gen_blockage ());
27592       /* Unwind the stack to just below the saved registers.  */
27593       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27594                                     hard_frame_pointer_rtx,
27595                                     GEN_INT (- 4 * num_regs)));
27596
27597       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27598                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27599     }
27600
27601   arm_emit_multi_reg_pop (saved_regs_mask);
27602
27603   if (IS_INTERRUPT (func_type))
27604     {
27605       /* Interrupt handlers will have pushed the
27606          IP onto the stack, so restore it now.  */
27607       rtx_insn *insn;
27608       rtx addr = gen_rtx_MEM (SImode,
27609                               gen_rtx_POST_INC (SImode,
27610                               stack_pointer_rtx));
27611       set_mem_alias_set (addr, get_frame_alias_set ());
27612       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27613       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27614                                          gen_rtx_REG (SImode, IP_REGNUM),
27615                                          NULL_RTX);
27616     }
27617
27618   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27619     return;
27620
27621   if (crtl->calls_eh_return)
27622     emit_insn (gen_addsi3 (stack_pointer_rtx,
27623                            stack_pointer_rtx,
27624                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27625
27626   if (IS_STACKALIGN (func_type))
27627     /* Restore the original stack pointer.  Before prologue, the stack was
27628        realigned and the original stack pointer saved in r0.  For details,
27629        see comment in arm_expand_prologue.  */
27630     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27631
27632   emit_jump_insn (simple_return_rtx);
27633 }
27634
27635 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27636    function is not a sibcall.  */
27637 void
27638 arm_expand_epilogue (bool really_return)
27639 {
27640   unsigned long func_type;
27641   unsigned long saved_regs_mask;
27642   int num_regs = 0;
27643   int i;
27644   int amount;
27645   arm_stack_offsets *offsets;
27646
27647   func_type = arm_current_func_type ();
27648
27649   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27650      let output_return_instruction take care of instruction emission if any.  */
27651   if (IS_NAKED (func_type)
27652       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27653     {
27654       if (really_return)
27655         emit_jump_insn (simple_return_rtx);
27656       return;
27657     }
27658
27659   /* If we are throwing an exception, then we really must be doing a
27660      return, so we can't tail-call.  */
27661   gcc_assert (!crtl->calls_eh_return || really_return);
27662
27663   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27664     {
27665       arm_expand_epilogue_apcs_frame (really_return);
27666       return;
27667     }
27668
27669   /* Get frame offsets for ARM.  */
27670   offsets = arm_get_frame_offsets ();
27671   saved_regs_mask = offsets->saved_regs_mask;
27672   num_regs = bit_count (saved_regs_mask);
27673
27674   if (frame_pointer_needed)
27675     {
27676       rtx_insn *insn;
27677       /* Restore stack pointer if necessary.  */
27678       if (TARGET_ARM)
27679         {
27680           /* In ARM mode, frame pointer points to first saved register.
27681              Restore stack pointer to last saved register.  */
27682           amount = offsets->frame - offsets->saved_regs;
27683
27684           /* Force out any pending memory operations that reference stacked data
27685              before stack de-allocation occurs.  */
27686           emit_insn (gen_blockage ());
27687           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27688                             hard_frame_pointer_rtx,
27689                             GEN_INT (amount)));
27690           arm_add_cfa_adjust_cfa_note (insn, amount,
27691                                        stack_pointer_rtx,
27692                                        hard_frame_pointer_rtx);
27693
27694           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27695              deleted.  */
27696           emit_insn (gen_force_register_use (stack_pointer_rtx));
27697         }
27698       else
27699         {
27700           /* In Thumb-2 mode, the frame pointer points to the last saved
27701              register.  */
27702           amount = offsets->locals_base - offsets->saved_regs;
27703           if (amount)
27704             {
27705               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27706                                 hard_frame_pointer_rtx,
27707                                 GEN_INT (amount)));
27708               arm_add_cfa_adjust_cfa_note (insn, amount,
27709                                            hard_frame_pointer_rtx,
27710                                            hard_frame_pointer_rtx);
27711             }
27712
27713           /* Force out any pending memory operations that reference stacked data
27714              before stack de-allocation occurs.  */
27715           emit_insn (gen_blockage ());
27716           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27717                                        hard_frame_pointer_rtx));
27718           arm_add_cfa_adjust_cfa_note (insn, 0,
27719                                        stack_pointer_rtx,
27720                                        hard_frame_pointer_rtx);
27721           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27722              deleted.  */
27723           emit_insn (gen_force_register_use (stack_pointer_rtx));
27724         }
27725     }
27726   else
27727     {
27728       /* Pop off outgoing args and local frame to adjust stack pointer to
27729          last saved register.  */
27730       amount = offsets->outgoing_args - offsets->saved_regs;
27731       if (amount)
27732         {
27733           rtx_insn *tmp;
27734           /* Force out any pending memory operations that reference stacked data
27735              before stack de-allocation occurs.  */
27736           emit_insn (gen_blockage ());
27737           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27738                                        stack_pointer_rtx,
27739                                        GEN_INT (amount)));
27740           arm_add_cfa_adjust_cfa_note (tmp, amount,
27741                                        stack_pointer_rtx, stack_pointer_rtx);
27742           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27743              not deleted.  */
27744           emit_insn (gen_force_register_use (stack_pointer_rtx));
27745         }
27746     }
27747
27748   if (TARGET_VFP_BASE)
27749     {
27750       /* Generate VFP register multi-pop.  */
27751       int end_reg = LAST_VFP_REGNUM + 1;
27752
27753       /* Scan the registers in reverse order.  We need to match
27754          any groupings made in the prologue and generate matching
27755          vldm operations.  The need to match groups is because,
27756          unlike pop, vldm can only do consecutive regs.  */
27757       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27758         /* Look for a case where a reg does not need restoring.  */
27759         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27760           {
27761             /* Restore the regs discovered so far (from reg+2 to
27762                end_reg).  */
27763             if (end_reg > i + 2)
27764               arm_emit_vfp_multi_reg_pop (i + 2,
27765                                           (end_reg - (i + 2)) / 2,
27766                                           stack_pointer_rtx);
27767             end_reg = i;
27768           }
27769
27770       /* Restore the remaining regs that we have discovered (or possibly
27771          even all of them, if the conditional in the for loop never
27772          fired).  */
27773       if (end_reg > i + 2)
27774         arm_emit_vfp_multi_reg_pop (i + 2,
27775                                     (end_reg - (i + 2)) / 2,
27776                                     stack_pointer_rtx);
27777     }
27778
27779   if (TARGET_IWMMXT)
27780     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27781       if (reg_needs_saving_p (i))
27782         {
27783           rtx_insn *insn;
27784           rtx addr = gen_rtx_MEM (V2SImode,
27785                                   gen_rtx_POST_INC (SImode,
27786                                                     stack_pointer_rtx));
27787           set_mem_alias_set (addr, get_frame_alias_set ());
27788           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27789           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27790                                              gen_rtx_REG (V2SImode, i),
27791                                              NULL_RTX);
27792           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27793                                        stack_pointer_rtx, stack_pointer_rtx);
27794         }
27795
27796   if (saved_regs_mask)
27797     {
27798       rtx insn;
27799       bool return_in_pc = false;
27800
27801       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27802           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27803           && !IS_CMSE_ENTRY (func_type)
27804           && !IS_STACKALIGN (func_type)
27805           && really_return
27806           && crtl->args.pretend_args_size == 0
27807           && saved_regs_mask & (1 << LR_REGNUM)
27808           && !crtl->calls_eh_return)
27809         {
27810           saved_regs_mask &= ~(1 << LR_REGNUM);
27811           saved_regs_mask |= (1 << PC_REGNUM);
27812           return_in_pc = true;
27813         }
27814
27815       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27816         {
27817           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27818             if (saved_regs_mask & (1 << i))
27819               {
27820                 rtx addr = gen_rtx_MEM (SImode,
27821                                         gen_rtx_POST_INC (SImode,
27822                                                           stack_pointer_rtx));
27823                 set_mem_alias_set (addr, get_frame_alias_set ());
27824
27825                 if (i == PC_REGNUM)
27826                   {
27827                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27828                     XVECEXP (insn, 0, 0) = ret_rtx;
27829                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27830                                                         addr);
27831                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27832                     insn = emit_jump_insn (insn);
27833                   }
27834                 else
27835                   {
27836                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27837                                                  addr));
27838                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27839                                                        gen_rtx_REG (SImode, i),
27840                                                        NULL_RTX);
27841                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27842                                                  stack_pointer_rtx,
27843                                                  stack_pointer_rtx);
27844                   }
27845               }
27846         }
27847       else
27848         {
27849           if (TARGET_LDRD
27850               && current_tune->prefer_ldrd_strd
27851               && !optimize_function_for_size_p (cfun))
27852             {
27853               if (TARGET_THUMB2)
27854                 thumb2_emit_ldrd_pop (saved_regs_mask);
27855               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27856                 arm_emit_ldrd_pop (saved_regs_mask);
27857               else
27858                 arm_emit_multi_reg_pop (saved_regs_mask);
27859             }
27860           else
27861             arm_emit_multi_reg_pop (saved_regs_mask);
27862         }
27863
27864       if (return_in_pc)
27865         return;
27866     }
27867
27868   amount
27869     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27870   if (amount)
27871     {
27872       int i, j;
27873       rtx dwarf = NULL_RTX;
27874       rtx_insn *tmp =
27875         emit_insn (gen_addsi3 (stack_pointer_rtx,
27876                                stack_pointer_rtx,
27877                                GEN_INT (amount)));
27878
27879       RTX_FRAME_RELATED_P (tmp) = 1;
27880
27881       if (cfun->machine->uses_anonymous_args)
27882         {
27883           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27884              pretend_args in stack.  */
27885           int num_regs = crtl->args.pretend_args_size / 4;
27886           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27887           for (j = 0, i = 0; j < num_regs; i++)
27888             if (saved_regs_mask & (1 << i))
27889               {
27890                 rtx reg = gen_rtx_REG (SImode, i);
27891                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27892                 j++;
27893               }
27894           REG_NOTES (tmp) = dwarf;
27895         }
27896       arm_add_cfa_adjust_cfa_note (tmp, amount,
27897                                    stack_pointer_rtx, stack_pointer_rtx);
27898     }
27899
27900   if (IS_CMSE_ENTRY (func_type))
27901     {
27902       /* CMSE_ENTRY always returns.  */
27903       gcc_assert (really_return);
27904       /* Clear all caller-saved regs that are not used to return.  */
27905       cmse_nonsecure_entry_clear_before_return ();
27906
27907       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27908          VLDR.  */
27909       if (TARGET_HAVE_FPCXT_CMSE)
27910         {
27911           rtx_insn *insn;
27912
27913           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27914                                                    GEN_INT (FPCXTNS_ENUM)));
27915           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27916                                   plus_constant (Pmode, stack_pointer_rtx, 4));
27917           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27918           RTX_FRAME_RELATED_P (insn) = 1;
27919         }
27920     }
27921
27922   if (!really_return)
27923     return;
27924
27925   if (crtl->calls_eh_return)
27926     emit_insn (gen_addsi3 (stack_pointer_rtx,
27927                            stack_pointer_rtx,
27928                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27929
27930   if (IS_STACKALIGN (func_type))
27931     /* Restore the original stack pointer.  Before prologue, the stack was
27932        realigned and the original stack pointer saved in r0.  For details,
27933        see comment in arm_expand_prologue.  */
27934     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27935
27936   emit_jump_insn (simple_return_rtx);
27937 }
27938
27939 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27940    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27941
27942 const char *
27943 thumb1_output_interwork (void)
27944 {
27945   const char * name;
27946   FILE *f = asm_out_file;
27947
27948   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27949   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27950               == SYMBOL_REF);
27951   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27952
27953   /* Generate code sequence to switch us into Thumb mode.  */
27954   /* The .code 32 directive has already been emitted by
27955      ASM_DECLARE_FUNCTION_NAME.  */
27956   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27957   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27958
27959   /* Generate a label, so that the debugger will notice the
27960      change in instruction sets.  This label is also used by
27961      the assembler to bypass the ARM code when this function
27962      is called from a Thumb encoded function elsewhere in the
27963      same file.  Hence the definition of STUB_NAME here must
27964      agree with the definition in gas/config/tc-arm.c.  */
27965
27966 #define STUB_NAME ".real_start_of"
27967
27968   fprintf (f, "\t.code\t16\n");
27969 #ifdef ARM_PE
27970   if (arm_dllexport_name_p (name))
27971     name = arm_strip_name_encoding (name);
27972 #endif
27973   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27974   fprintf (f, "\t.thumb_func\n");
27975   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27976
27977   return "";
27978 }
27979
27980 /* Handle the case of a double word load into a low register from
27981    a computed memory address.  The computed address may involve a
27982    register which is overwritten by the load.  */
27983 const char *
27984 thumb_load_double_from_address (rtx *operands)
27985 {
27986   rtx addr;
27987   rtx base;
27988   rtx offset;
27989   rtx arg1;
27990   rtx arg2;
27991
27992   gcc_assert (REG_P (operands[0]));
27993   gcc_assert (MEM_P (operands[1]));
27994
27995   /* Get the memory address.  */
27996   addr = XEXP (operands[1], 0);
27997
27998   /* Work out how the memory address is computed.  */
27999   switch (GET_CODE (addr))
28000     {
28001     case REG:
28002       operands[2] = adjust_address (operands[1], SImode, 4);
28003
28004       if (REGNO (operands[0]) == REGNO (addr))
28005         {
28006           output_asm_insn ("ldr\t%H0, %2", operands);
28007           output_asm_insn ("ldr\t%0, %1", operands);
28008         }
28009       else
28010         {
28011           output_asm_insn ("ldr\t%0, %1", operands);
28012           output_asm_insn ("ldr\t%H0, %2", operands);
28013         }
28014       break;
28015
28016     case CONST:
28017       /* Compute <address> + 4 for the high order load.  */
28018       operands[2] = adjust_address (operands[1], SImode, 4);
28019
28020       output_asm_insn ("ldr\t%0, %1", operands);
28021       output_asm_insn ("ldr\t%H0, %2", operands);
28022       break;
28023
28024     case PLUS:
28025       arg1   = XEXP (addr, 0);
28026       arg2   = XEXP (addr, 1);
28027
28028       if (CONSTANT_P (arg1))
28029         base = arg2, offset = arg1;
28030       else
28031         base = arg1, offset = arg2;
28032
28033       gcc_assert (REG_P (base));
28034
28035       /* Catch the case of <address> = <reg> + <reg> */
28036       if (REG_P (offset))
28037         {
28038           int reg_offset = REGNO (offset);
28039           int reg_base   = REGNO (base);
28040           int reg_dest   = REGNO (operands[0]);
28041
28042           /* Add the base and offset registers together into the
28043              higher destination register.  */
28044           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28045                        reg_dest + 1, reg_base, reg_offset);
28046
28047           /* Load the lower destination register from the address in
28048              the higher destination register.  */
28049           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28050                        reg_dest, reg_dest + 1);
28051
28052           /* Load the higher destination register from its own address
28053              plus 4.  */
28054           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28055                        reg_dest + 1, reg_dest + 1);
28056         }
28057       else
28058         {
28059           /* Compute <address> + 4 for the high order load.  */
28060           operands[2] = adjust_address (operands[1], SImode, 4);
28061
28062           /* If the computed address is held in the low order register
28063              then load the high order register first, otherwise always
28064              load the low order register first.  */
28065           if (REGNO (operands[0]) == REGNO (base))
28066             {
28067               output_asm_insn ("ldr\t%H0, %2", operands);
28068               output_asm_insn ("ldr\t%0, %1", operands);
28069             }
28070           else
28071             {
28072               output_asm_insn ("ldr\t%0, %1", operands);
28073               output_asm_insn ("ldr\t%H0, %2", operands);
28074             }
28075         }
28076       break;
28077
28078     case LABEL_REF:
28079       /* With no registers to worry about we can just load the value
28080          directly.  */
28081       operands[2] = adjust_address (operands[1], SImode, 4);
28082
28083       output_asm_insn ("ldr\t%H0, %2", operands);
28084       output_asm_insn ("ldr\t%0, %1", operands);
28085       break;
28086
28087     default:
28088       gcc_unreachable ();
28089     }
28090
28091   return "";
28092 }
28093
28094 const char *
28095 thumb_output_move_mem_multiple (int n, rtx *operands)
28096 {
28097   switch (n)
28098     {
28099     case 2:
28100       if (REGNO (operands[4]) > REGNO (operands[5]))
28101         std::swap (operands[4], operands[5]);
28102
28103       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28104       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28105       break;
28106
28107     case 3:
28108       if (REGNO (operands[4]) > REGNO (operands[5]))
28109         std::swap (operands[4], operands[5]);
28110       if (REGNO (operands[5]) > REGNO (operands[6]))
28111         std::swap (operands[5], operands[6]);
28112       if (REGNO (operands[4]) > REGNO (operands[5]))
28113         std::swap (operands[4], operands[5]);
28114
28115       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28116       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28117       break;
28118
28119     default:
28120       gcc_unreachable ();
28121     }
28122
28123   return "";
28124 }
28125
28126 /* Output a call-via instruction for thumb state.  */
28127 const char *
28128 thumb_call_via_reg (rtx reg)
28129 {
28130   int regno = REGNO (reg);
28131   rtx *labelp;
28132
28133   gcc_assert (regno < LR_REGNUM);
28134
28135   /* If we are in the normal text section we can use a single instance
28136      per compilation unit.  If we are doing function sections, then we need
28137      an entry per section, since we can't rely on reachability.  */
28138   if (in_section == text_section)
28139     {
28140       thumb_call_reg_needed = 1;
28141
28142       if (thumb_call_via_label[regno] == NULL)
28143         thumb_call_via_label[regno] = gen_label_rtx ();
28144       labelp = thumb_call_via_label + regno;
28145     }
28146   else
28147     {
28148       if (cfun->machine->call_via[regno] == NULL)
28149         cfun->machine->call_via[regno] = gen_label_rtx ();
28150       labelp = cfun->machine->call_via + regno;
28151     }
28152
28153   output_asm_insn ("bl\t%a0", labelp);
28154   return "";
28155 }
28156
28157 /* Routines for generating rtl.  */
28158 void
28159 thumb_expand_cpymemqi (rtx *operands)
28160 {
28161   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28162   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28163   HOST_WIDE_INT len = INTVAL (operands[2]);
28164   HOST_WIDE_INT offset = 0;
28165
28166   while (len >= 12)
28167     {
28168       emit_insn (gen_cpymem12b (out, in, out, in));
28169       len -= 12;
28170     }
28171
28172   if (len >= 8)
28173     {
28174       emit_insn (gen_cpymem8b (out, in, out, in));
28175       len -= 8;
28176     }
28177
28178   if (len >= 4)
28179     {
28180       rtx reg = gen_reg_rtx (SImode);
28181       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28182       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28183       len -= 4;
28184       offset += 4;
28185     }
28186
28187   if (len >= 2)
28188     {
28189       rtx reg = gen_reg_rtx (HImode);
28190       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28191                                               plus_constant (Pmode, in,
28192                                                              offset))));
28193       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28194                                                                 offset)),
28195                             reg));
28196       len -= 2;
28197       offset += 2;
28198     }
28199
28200   if (len)
28201     {
28202       rtx reg = gen_reg_rtx (QImode);
28203       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28204                                               plus_constant (Pmode, in,
28205                                                              offset))));
28206       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28207                                                                 offset)),
28208                             reg));
28209     }
28210 }
28211
28212 void
28213 thumb_reload_out_hi (rtx *operands)
28214 {
28215   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28216 }
28217
28218 /* Return the length of a function name prefix
28219     that starts with the character 'c'.  */
28220 static int
28221 arm_get_strip_length (int c)
28222 {
28223   switch (c)
28224     {
28225     ARM_NAME_ENCODING_LENGTHS
28226       default: return 0;
28227     }
28228 }
28229
28230 /* Return a pointer to a function's name with any
28231    and all prefix encodings stripped from it.  */
28232 const char *
28233 arm_strip_name_encoding (const char *name)
28234 {
28235   int skip;
28236
28237   while ((skip = arm_get_strip_length (* name)))
28238     name += skip;
28239
28240   return name;
28241 }
28242
28243 /* If there is a '*' anywhere in the name's prefix, then
28244    emit the stripped name verbatim, otherwise prepend an
28245    underscore if leading underscores are being used.  */
28246 void
28247 arm_asm_output_labelref (FILE *stream, const char *name)
28248 {
28249   int skip;
28250   int verbatim = 0;
28251
28252   while ((skip = arm_get_strip_length (* name)))
28253     {
28254       verbatim |= (*name == '*');
28255       name += skip;
28256     }
28257
28258   if (verbatim)
28259     fputs (name, stream);
28260   else
28261     asm_fprintf (stream, "%U%s", name);
28262 }
28263
28264 /* This function is used to emit an EABI tag and its associated value.
28265    We emit the numerical value of the tag in case the assembler does not
28266    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28267    the tag name in a comment so that anyone reading the assembler output
28268    will know which tag is being set.
28269
28270    This function is not static because arm-c.cc needs it too.  */
28271
28272 void
28273 arm_emit_eabi_attribute (const char *name, int num, int val)
28274 {
28275   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28276   if (flag_verbose_asm || flag_debug_asm)
28277     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28278   asm_fprintf (asm_out_file, "\n");
28279 }
28280
28281 /* This function is used to print CPU tuning information as comment
28282    in assembler file.  Pointers are not printed for now.  */
28283
28284 void
28285 arm_print_tune_info (void)
28286 {
28287   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28288   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28289                current_tune->constant_limit);
28290   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28291                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28292   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28293                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28294   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28295                "prefetch.l1_cache_size:\t%d\n",
28296                current_tune->prefetch.l1_cache_size);
28297   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28298                "prefetch.l1_cache_line_size:\t%d\n",
28299                current_tune->prefetch.l1_cache_line_size);
28300   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28301                "prefer_constant_pool:\t%d\n",
28302                (int) current_tune->prefer_constant_pool);
28303   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28304                "branch_cost:\t(s:speed, p:predictable)\n");
28305   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28306   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28307                current_tune->branch_cost (false, false));
28308   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28309                current_tune->branch_cost (false, true));
28310   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28311                current_tune->branch_cost (true, false));
28312   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28313                current_tune->branch_cost (true, true));
28314   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28315                "prefer_ldrd_strd:\t%d\n",
28316                (int) current_tune->prefer_ldrd_strd);
28317   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28318                "logical_op_non_short_circuit:\t[%d,%d]\n",
28319                (int) current_tune->logical_op_non_short_circuit_thumb,
28320                (int) current_tune->logical_op_non_short_circuit_arm);
28321   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28322                "disparage_flag_setting_t16_encodings:\t%d\n",
28323                (int) current_tune->disparage_flag_setting_t16_encodings);
28324   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28325                "string_ops_prefer_neon:\t%d\n",
28326                (int) current_tune->string_ops_prefer_neon);
28327   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28328                "max_insns_inline_memset:\t%d\n",
28329                current_tune->max_insns_inline_memset);
28330   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28331                current_tune->fusible_ops);
28332   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28333                (int) current_tune->sched_autopref);
28334 }
28335
28336 /* The last set of target options used to emit .arch directives, etc.  This
28337    could be a function-local static if it were not required to expose it as a
28338    root to the garbage collector.  */
28339 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28340
28341 /* Print .arch and .arch_extension directives corresponding to the
28342    current architecture configuration.  */
28343 static void
28344 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28345 {
28346   arm_build_target build_target;
28347   /* If the target options haven't changed since the last time we were called
28348      there is nothing to do.  This should be sufficient to suppress the
28349      majority of redundant work.  */
28350   if (last_asm_targ_options == targ_options)
28351     return;
28352
28353   last_asm_targ_options = targ_options;
28354
28355   build_target.isa = sbitmap_alloc (isa_num_bits);
28356   arm_configure_build_target (&build_target, targ_options, false);
28357
28358   if (build_target.core_name
28359       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28360     {
28361       const char* truncated_name
28362         = arm_rewrite_selected_cpu (build_target.core_name);
28363       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28364     }
28365
28366   const arch_option *arch
28367     = arm_parse_arch_option_name (all_architectures, "-march",
28368                                   build_target.arch_name);
28369   auto_sbitmap opt_bits (isa_num_bits);
28370
28371   gcc_assert (arch);
28372
28373   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28374     {
28375       /* Keep backward compatability for assemblers which don't support
28376          armv7ve.  Fortunately, none of the following extensions are reset
28377          by a .fpu directive.  */
28378       asm_fprintf (stream, "\t.arch armv7-a\n");
28379       asm_fprintf (stream, "\t.arch_extension virt\n");
28380       asm_fprintf (stream, "\t.arch_extension idiv\n");
28381       asm_fprintf (stream, "\t.arch_extension sec\n");
28382       asm_fprintf (stream, "\t.arch_extension mp\n");
28383     }
28384   else
28385     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28386
28387   /* The .fpu directive will reset any architecture extensions from the
28388      assembler that relate to the fp/vector extensions.  So put this out before
28389      any .arch_extension directives.  */
28390   const char *fpu_name = (TARGET_SOFT_FLOAT
28391                           ? "softvfp"
28392                           : arm_identify_fpu_from_isa (build_target.isa));
28393   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28394
28395   if (!arch->common.extensions)
28396     return;
28397
28398   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28399        opt->name != NULL;
28400        opt++)
28401     {
28402       if (!opt->remove)
28403         {
28404           arm_initialize_isa (opt_bits, opt->isa_bits);
28405
28406           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28407              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28408              floating point instructions is disabled.  So the following check
28409              restricts the printing of ".arch_extension mve" and
28410              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28411              this special behaviour because the feature bit "mve" and
28412              "mve_float" are not part of "fpu bits", so they are not cleared
28413              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28414              TARGET_HAVE_MVE_FLOAT are disabled.  */
28415           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28416               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28417                   && !TARGET_HAVE_MVE_FLOAT))
28418             continue;
28419
28420           /* If every feature bit of this option is set in the target ISA
28421              specification, print out the option name.  However, don't print
28422              anything if all the bits are part of the FPU specification.  */
28423           if (bitmap_subset_p (opt_bits, build_target.isa)
28424               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28425             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28426         }
28427     }
28428 }
28429
28430 static void
28431 arm_file_start (void)
28432 {
28433   int val;
28434
28435   arm_print_asm_arch_directives
28436     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28437
28438   if (TARGET_BPABI)
28439     {
28440       /* If we have a named cpu, but we the assembler does not support that
28441          name via .cpu, put out a cpu name attribute; but don't do this if the
28442          name starts with the fictitious prefix, 'generic'.  */
28443       if (arm_active_target.core_name
28444           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28445           && !startswith (arm_active_target.core_name, "generic"))
28446         {
28447           const char* truncated_name
28448             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28449           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28450             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28451                          truncated_name);
28452         }
28453
28454       if (print_tune_info)
28455         arm_print_tune_info ();
28456
28457       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28458         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28459
28460       if (TARGET_HARD_FLOAT_ABI)
28461         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28462
28463       /* Some of these attributes only apply when the corresponding features
28464          are used.  However we don't have any easy way of figuring this out.
28465          Conservatively record the setting that would have been used.  */
28466
28467       if (flag_rounding_math)
28468         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28469
28470       if (!flag_unsafe_math_optimizations)
28471         {
28472           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28473           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28474         }
28475       if (flag_signaling_nans)
28476         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28477
28478       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28479                            flag_finite_math_only ? 1 : 3);
28480
28481       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28482       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28483       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28484                                flag_short_enums ? 1 : 2);
28485
28486       /* Tag_ABI_optimization_goals.  */
28487       if (optimize_size)
28488         val = 4;
28489       else if (optimize >= 2)
28490         val = 2;
28491       else if (optimize)
28492         val = 1;
28493       else
28494         val = 6;
28495       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28496
28497       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28498                                unaligned_access);
28499
28500       if (arm_fp16_format)
28501         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28502                              (int) arm_fp16_format);
28503
28504       if (arm_lang_output_object_attributes_hook)
28505         arm_lang_output_object_attributes_hook();
28506     }
28507
28508   default_file_start ();
28509 }
28510
28511 static void
28512 arm_file_end (void)
28513 {
28514   int regno;
28515
28516   /* Just in case the last function output in the assembler had non-default
28517      architecture directives, we force the assembler state back to the default
28518      set, so that any 'calculated' build attributes are based on the default
28519      options rather than the special options for that function.  */
28520   arm_print_asm_arch_directives
28521     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28522
28523   if (NEED_INDICATE_EXEC_STACK)
28524     /* Add .note.GNU-stack.  */
28525     file_end_indicate_exec_stack ();
28526
28527   if (! thumb_call_reg_needed)
28528     return;
28529
28530   switch_to_section (text_section);
28531   asm_fprintf (asm_out_file, "\t.code 16\n");
28532   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28533
28534   for (regno = 0; regno < LR_REGNUM; regno++)
28535     {
28536       rtx label = thumb_call_via_label[regno];
28537
28538       if (label != 0)
28539         {
28540           targetm.asm_out.internal_label (asm_out_file, "L",
28541                                           CODE_LABEL_NUMBER (label));
28542           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28543         }
28544     }
28545 }
28546
28547 #ifndef ARM_PE
28548 /* Symbols in the text segment can be accessed without indirecting via the
28549    constant pool; it may take an extra binary operation, but this is still
28550    faster than indirecting via memory.  Don't do this when not optimizing,
28551    since we won't be calculating al of the offsets necessary to do this
28552    simplification.  */
28553
28554 static void
28555 arm_encode_section_info (tree decl, rtx rtl, int first)
28556 {
28557   if (optimize > 0 && TREE_CONSTANT (decl))
28558     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28559
28560   default_encode_section_info (decl, rtl, first);
28561 }
28562 #endif /* !ARM_PE */
28563
28564 static void
28565 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28566 {
28567   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28568       && !strcmp (prefix, "L"))
28569     {
28570       arm_ccfsm_state = 0;
28571       arm_target_insn = NULL;
28572     }
28573   default_internal_label (stream, prefix, labelno);
28574 }
28575
28576 /* Define classes to generate code as RTL or output asm to a file.
28577    Using templates then allows to use the same code to output code
28578    sequences in the two formats.  */
28579 class thumb1_const_rtl
28580 {
28581  public:
28582   thumb1_const_rtl (rtx dst) : dst (dst) {}
28583
28584   void mov (HOST_WIDE_INT val)
28585   {
28586     emit_set_insn (dst, GEN_INT (val));
28587   }
28588
28589   void add (HOST_WIDE_INT val)
28590   {
28591     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28592   }
28593
28594   void ashift (HOST_WIDE_INT shift)
28595   {
28596     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28597   }
28598
28599   void neg ()
28600   {
28601     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28602   }
28603
28604  private:
28605   rtx dst;
28606 };
28607
28608 class thumb1_const_print
28609 {
28610  public:
28611   thumb1_const_print (FILE *f, int regno)
28612   {
28613     t_file = f;
28614     dst_regname = reg_names[regno];
28615   }
28616
28617   void mov (HOST_WIDE_INT val)
28618   {
28619     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28620                  dst_regname, val);
28621   }
28622
28623   void add (HOST_WIDE_INT val)
28624   {
28625     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28626                  dst_regname, val);
28627   }
28628
28629   void ashift (HOST_WIDE_INT shift)
28630   {
28631     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28632                  dst_regname, shift);
28633   }
28634
28635   void neg ()
28636   {
28637     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28638   }
28639
28640  private:
28641   FILE *t_file;
28642   const char *dst_regname;
28643 };
28644
28645 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28646    Avoid generating useless code when one of the bytes is zero.  */
28647 template <class T>
28648 void
28649 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28650 {
28651   bool mov_done_p = false;
28652   unsigned HOST_WIDE_INT val = op1;
28653   int shift = 0;
28654   int i;
28655
28656   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28657
28658   if (val <= 255)
28659     {
28660       dst.mov (val);
28661       return;
28662     }
28663
28664   /* For negative numbers with the first nine bits set, build the
28665      opposite of OP1, then negate it, it's generally shorter and not
28666      longer.  */
28667   if ((val & 0xFF800000) == 0xFF800000)
28668     {
28669       thumb1_gen_const_int_1 (dst, -op1);
28670       dst.neg ();
28671       return;
28672     }
28673
28674   /* In the general case, we need 7 instructions to build
28675      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28676      do better if VAL is small enough, or
28677      right-shiftable by a suitable amount.  If the
28678      right-shift enables to encode at least one less byte,
28679      it's worth it: we save a adds and a lsls at the
28680      expense of a final lsls.  */
28681   int final_shift = number_of_first_bit_set (val);
28682
28683   int leading_zeroes = clz_hwi (val);
28684   int number_of_bytes_needed
28685     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28686        / BITS_PER_UNIT) + 1;
28687   int number_of_bytes_needed2
28688     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28689        / BITS_PER_UNIT) + 1;
28690
28691   if (number_of_bytes_needed2 < number_of_bytes_needed)
28692     val >>= final_shift;
28693   else
28694     final_shift = 0;
28695
28696   /* If we are in a very small range, we can use either a single movs
28697      or movs+adds.  */
28698   if (val <= 510)
28699     {
28700       if (val > 255)
28701         {
28702           unsigned HOST_WIDE_INT high = val - 255;
28703
28704           dst.mov (high);
28705           dst.add (255);
28706         }
28707       else
28708         dst.mov (val);
28709
28710       if (final_shift > 0)
28711         dst.ashift (final_shift);
28712     }
28713   else
28714     {
28715       /* General case, emit upper 3 bytes as needed.  */
28716       for (i = 0; i < 3; i++)
28717         {
28718           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28719
28720           if (byte)
28721             {
28722               /* We are about to emit new bits, stop accumulating a
28723                  shift amount, and left-shift only if we have already
28724                  emitted some upper bits.  */
28725               if (mov_done_p)
28726                 {
28727                   dst.ashift (shift);
28728                   dst.add (byte);
28729                 }
28730               else
28731                 dst.mov (byte);
28732
28733               /* Stop accumulating shift amount since we've just
28734                  emitted some bits.  */
28735               shift = 0;
28736
28737               mov_done_p = true;
28738             }
28739
28740           if (mov_done_p)
28741             shift += 8;
28742         }
28743
28744       /* Emit lower byte.  */
28745       if (!mov_done_p)
28746         dst.mov (val & 0xff);
28747       else
28748         {
28749           dst.ashift (shift);
28750           if (val & 0xff)
28751             dst.add (val & 0xff);
28752         }
28753
28754       if (final_shift > 0)
28755         dst.ashift (final_shift);
28756     }
28757 }
28758
28759 /* Proxies for thumb1.md, since the thumb1_const_print and
28760    thumb1_const_rtl classes are not exported.  */
28761 void
28762 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28763 {
28764   thumb1_const_rtl t (dst);
28765   thumb1_gen_const_int_1 (t, op1);
28766 }
28767
28768 void
28769 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28770 {
28771   thumb1_const_print t (asm_out_file, REGNO (dst));
28772   thumb1_gen_const_int_1 (t, op1);
28773 }
28774
28775 /* Output code to add DELTA to the first argument, and then jump
28776    to FUNCTION.  Used for C++ multiple inheritance.  */
28777
28778 static void
28779 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28780                      HOST_WIDE_INT, tree function)
28781 {
28782   static int thunk_label = 0;
28783   char label[256];
28784   char labelpc[256];
28785   int mi_delta = delta;
28786   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28787   int shift = 0;
28788   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28789                     ? 1 : 0);
28790   if (mi_delta < 0)
28791     mi_delta = - mi_delta;
28792
28793   final_start_function (emit_barrier (), file, 1);
28794
28795   if (TARGET_THUMB1)
28796     {
28797       int labelno = thunk_label++;
28798       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28799       /* Thunks are entered in arm mode when available.  */
28800       if (TARGET_THUMB1_ONLY)
28801         {
28802           /* push r3 so we can use it as a temporary.  */
28803           /* TODO: Omit this save if r3 is not used.  */
28804           fputs ("\tpush {r3}\n", file);
28805
28806           /* With -mpure-code, we cannot load the address from the
28807              constant pool: we build it explicitly.  */
28808           if (target_pure_code)
28809             {
28810               fputs ("\tmovs\tr3, #:upper8_15:#", file);
28811               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28812               fputc ('\n', file);
28813               fputs ("\tlsls r3, #8\n", file);
28814               fputs ("\tadds\tr3, #:upper0_7:#", file);
28815               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28816               fputc ('\n', file);
28817               fputs ("\tlsls r3, #8\n", file);
28818               fputs ("\tadds\tr3, #:lower8_15:#", file);
28819               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28820               fputc ('\n', file);
28821               fputs ("\tlsls r3, #8\n", file);
28822               fputs ("\tadds\tr3, #:lower0_7:#", file);
28823               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28824               fputc ('\n', file);
28825             }
28826           else
28827             fputs ("\tldr\tr3, ", file);
28828         }
28829       else
28830         {
28831           fputs ("\tldr\tr12, ", file);
28832         }
28833
28834       if (!target_pure_code)
28835         {
28836           assemble_name (file, label);
28837           fputc ('\n', file);
28838         }
28839
28840       if (flag_pic)
28841         {
28842           /* If we are generating PIC, the ldr instruction below loads
28843              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28844              the address of the add + 8, so we have:
28845
28846              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28847                  = target + 1.
28848
28849              Note that we have "+ 1" because some versions of GNU ld
28850              don't set the low bit of the result for R_ARM_REL32
28851              relocations against thumb function symbols.
28852              On ARMv6M this is +4, not +8.  */
28853           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28854           assemble_name (file, labelpc);
28855           fputs (":\n", file);
28856           if (TARGET_THUMB1_ONLY)
28857             {
28858               /* This is 2 insns after the start of the thunk, so we know it
28859                  is 4-byte aligned.  */
28860               fputs ("\tadd\tr3, pc, r3\n", file);
28861               fputs ("\tmov r12, r3\n", file);
28862             }
28863           else
28864             fputs ("\tadd\tr12, pc, r12\n", file);
28865         }
28866       else if (TARGET_THUMB1_ONLY)
28867         fputs ("\tmov r12, r3\n", file);
28868     }
28869   if (TARGET_THUMB1_ONLY)
28870     {
28871       if (mi_delta > 255)
28872         {
28873           /* With -mpure-code, we cannot load MI_DELTA from the
28874              constant pool: we build it explicitly.  */
28875           if (target_pure_code)
28876             {
28877               thumb1_const_print r3 (file, 3);
28878               thumb1_gen_const_int_1 (r3, mi_delta);
28879             }
28880           else
28881             {
28882               fputs ("\tldr\tr3, ", file);
28883               assemble_name (file, label);
28884               fputs ("+4\n", file);
28885             }
28886           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28887                        mi_op, this_regno, this_regno);
28888         }
28889       else if (mi_delta != 0)
28890         {
28891           /* Thumb1 unified syntax requires s suffix in instruction name when
28892              one of the operands is immediate.  */
28893           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28894                        mi_op, this_regno, this_regno,
28895                        mi_delta);
28896         }
28897     }
28898   else
28899     {
28900       /* TODO: Use movw/movt for large constants when available.  */
28901       while (mi_delta != 0)
28902         {
28903           if ((mi_delta & (3 << shift)) == 0)
28904             shift += 2;
28905           else
28906             {
28907               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28908                            mi_op, this_regno, this_regno,
28909                            mi_delta & (0xff << shift));
28910               mi_delta &= ~(0xff << shift);
28911               shift += 8;
28912             }
28913         }
28914     }
28915   if (TARGET_THUMB1)
28916     {
28917       if (TARGET_THUMB1_ONLY)
28918         fputs ("\tpop\t{r3}\n", file);
28919
28920       fprintf (file, "\tbx\tr12\n");
28921
28922       /* With -mpure-code, we don't need to emit literals for the
28923          function address and delta since we emitted code to build
28924          them.  */
28925       if (!target_pure_code)
28926         {
28927           ASM_OUTPUT_ALIGN (file, 2);
28928           assemble_name (file, label);
28929           fputs (":\n", file);
28930           if (flag_pic)
28931             {
28932               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28933               rtx tem = XEXP (DECL_RTL (function), 0);
28934               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28935                  pipeline offset is four rather than eight.  Adjust the offset
28936                  accordingly.  */
28937               tem = plus_constant (GET_MODE (tem), tem,
28938                                    TARGET_THUMB1_ONLY ? -3 : -7);
28939               tem = gen_rtx_MINUS (GET_MODE (tem),
28940                                    tem,
28941                                    gen_rtx_SYMBOL_REF (Pmode,
28942                                                        ggc_strdup (labelpc)));
28943               assemble_integer (tem, 4, BITS_PER_WORD, 1);
28944             }
28945           else
28946             /* Output ".word .LTHUNKn".  */
28947             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28948
28949           if (TARGET_THUMB1_ONLY && mi_delta > 255)
28950             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28951         }
28952     }
28953   else
28954     {
28955       fputs ("\tb\t", file);
28956       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28957       if (NEED_PLT_RELOC)
28958         fputs ("(PLT)", file);
28959       fputc ('\n', file);
28960     }
28961
28962   final_end_function ();
28963 }
28964
28965 /* MI thunk handling for TARGET_32BIT.  */
28966
28967 static void
28968 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28969                        HOST_WIDE_INT vcall_offset, tree function)
28970 {
28971   const bool long_call_p = arm_is_long_call_p (function);
28972
28973   /* On ARM, this_regno is R0 or R1 depending on
28974      whether the function returns an aggregate or not.
28975   */
28976   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28977                                        function)
28978                     ? R1_REGNUM : R0_REGNUM);
28979
28980   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28981   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28982   reload_completed = 1;
28983   emit_note (NOTE_INSN_PROLOGUE_END);
28984
28985   /* Add DELTA to THIS_RTX.  */
28986   if (delta != 0)
28987     arm_split_constant (PLUS, Pmode, NULL_RTX,
28988                         delta, this_rtx, this_rtx, false);
28989
28990   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28991   if (vcall_offset != 0)
28992     {
28993       /* Load *THIS_RTX.  */
28994       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28995       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28996       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28997                           false);
28998       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
28999       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29000       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29001     }
29002
29003   /* Generate a tail call to the target function.  */
29004   if (!TREE_USED (function))
29005     {
29006       assemble_external (function);
29007       TREE_USED (function) = 1;
29008     }
29009   rtx funexp = XEXP (DECL_RTL (function), 0);
29010   if (long_call_p)
29011     {
29012       emit_move_insn (temp, funexp);
29013       funexp = temp;
29014     }
29015   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29016   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29017   SIBLING_CALL_P (insn) = 1;
29018   emit_barrier ();
29019
29020   /* Indirect calls require a bit of fixup in PIC mode.  */
29021   if (long_call_p)
29022     {
29023       split_all_insns_noflow ();
29024       arm_reorg ();
29025     }
29026
29027   insn = get_insns ();
29028   shorten_branches (insn);
29029   final_start_function (insn, file, 1);
29030   final (insn, file, 1);
29031   final_end_function ();
29032
29033   /* Stop pretending this is a post-reload pass.  */
29034   reload_completed = 0;
29035 }
29036
29037 /* Output code to add DELTA to the first argument, and then jump
29038    to FUNCTION.  Used for C++ multiple inheritance.  */
29039
29040 static void
29041 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29042                      HOST_WIDE_INT vcall_offset, tree function)
29043 {
29044   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29045
29046   assemble_start_function (thunk, fnname);
29047   if (TARGET_32BIT)
29048     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29049   else
29050     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29051   assemble_end_function (thunk, fnname);
29052 }
29053
29054 int
29055 arm_emit_vector_const (FILE *file, rtx x)
29056 {
29057   int i;
29058   const char * pattern;
29059
29060   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29061
29062   switch (GET_MODE (x))
29063     {
29064     case E_V2SImode: pattern = "%08x"; break;
29065     case E_V4HImode: pattern = "%04x"; break;
29066     case E_V8QImode: pattern = "%02x"; break;
29067     default:       gcc_unreachable ();
29068     }
29069
29070   fprintf (file, "0x");
29071   for (i = CONST_VECTOR_NUNITS (x); i--;)
29072     {
29073       rtx element;
29074
29075       element = CONST_VECTOR_ELT (x, i);
29076       fprintf (file, pattern, INTVAL (element));
29077     }
29078
29079   return 1;
29080 }
29081
29082 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29083    HFmode constant pool entries are actually loaded with ldr.  */
29084 void
29085 arm_emit_fp16_const (rtx c)
29086 {
29087   long bits;
29088
29089   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29090   if (WORDS_BIG_ENDIAN)
29091     assemble_zeros (2);
29092   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29093   if (!WORDS_BIG_ENDIAN)
29094     assemble_zeros (2);
29095 }
29096
29097 const char *
29098 arm_output_load_gr (rtx *operands)
29099 {
29100   rtx reg;
29101   rtx offset;
29102   rtx wcgr;
29103   rtx sum;
29104
29105   if (!MEM_P (operands [1])
29106       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29107       || !REG_P (reg = XEXP (sum, 0))
29108       || !CONST_INT_P (offset = XEXP (sum, 1))
29109       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29110     return "wldrw%?\t%0, %1";
29111
29112   /* Fix up an out-of-range load of a GR register.  */
29113   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29114   wcgr = operands[0];
29115   operands[0] = reg;
29116   output_asm_insn ("ldr%?\t%0, %1", operands);
29117
29118   operands[0] = wcgr;
29119   operands[1] = reg;
29120   output_asm_insn ("tmcr%?\t%0, %1", operands);
29121   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29122
29123   return "";
29124 }
29125
29126 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29127
29128    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29129    named arg and all anonymous args onto the stack.
29130    XXX I know the prologue shouldn't be pushing registers, but it is faster
29131    that way.  */
29132
29133 static void
29134 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29135                             const function_arg_info &arg,
29136                             int *pretend_size,
29137                             int second_time ATTRIBUTE_UNUSED)
29138 {
29139   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29140   int nregs;
29141
29142   cfun->machine->uses_anonymous_args = 1;
29143   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29144     {
29145       nregs = pcum->aapcs_ncrn;
29146       if (nregs & 1)
29147         {
29148           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29149           if (res < 0 && warn_psabi)
29150             inform (input_location, "parameter passing for argument of "
29151                     "type %qT changed in GCC 7.1", arg.type);
29152           else if (res > 0)
29153             {
29154               nregs++;
29155               if (res > 1 && warn_psabi)
29156                 inform (input_location,
29157                         "parameter passing for argument of type "
29158                         "%qT changed in GCC 9.1", arg.type);
29159             }
29160         }
29161     }
29162   else
29163     nregs = pcum->nregs;
29164
29165   if (nregs < NUM_ARG_REGS)
29166     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29167 }
29168
29169 /* We can't rely on the caller doing the proper promotion when
29170    using APCS or ATPCS.  */
29171
29172 static bool
29173 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29174 {
29175     return !TARGET_AAPCS_BASED;
29176 }
29177
29178 static machine_mode
29179 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29180                            machine_mode mode,
29181                            int *punsignedp ATTRIBUTE_UNUSED,
29182                            const_tree fntype ATTRIBUTE_UNUSED,
29183                            int for_return ATTRIBUTE_UNUSED)
29184 {
29185   if (GET_MODE_CLASS (mode) == MODE_INT
29186       && GET_MODE_SIZE (mode) < 4)
29187     return SImode;
29188
29189   return mode;
29190 }
29191
29192
29193 static bool
29194 arm_default_short_enums (void)
29195 {
29196   return ARM_DEFAULT_SHORT_ENUMS;
29197 }
29198
29199
29200 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29201
29202 static bool
29203 arm_align_anon_bitfield (void)
29204 {
29205   return TARGET_AAPCS_BASED;
29206 }
29207
29208
29209 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29210
29211 static tree
29212 arm_cxx_guard_type (void)
29213 {
29214   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29215 }
29216
29217
29218 /* The EABI says test the least significant bit of a guard variable.  */
29219
29220 static bool
29221 arm_cxx_guard_mask_bit (void)
29222 {
29223   return TARGET_AAPCS_BASED;
29224 }
29225
29226
29227 /* The EABI specifies that all array cookies are 8 bytes long.  */
29228
29229 static tree
29230 arm_get_cookie_size (tree type)
29231 {
29232   tree size;
29233
29234   if (!TARGET_AAPCS_BASED)
29235     return default_cxx_get_cookie_size (type);
29236
29237   size = build_int_cst (sizetype, 8);
29238   return size;
29239 }
29240
29241
29242 /* The EABI says that array cookies should also contain the element size.  */
29243
29244 static bool
29245 arm_cookie_has_size (void)
29246 {
29247   return TARGET_AAPCS_BASED;
29248 }
29249
29250
29251 /* The EABI says constructors and destructors should return a pointer to
29252    the object constructed/destroyed.  */
29253
29254 static bool
29255 arm_cxx_cdtor_returns_this (void)
29256 {
29257   return TARGET_AAPCS_BASED;
29258 }
29259
29260 /* The EABI says that an inline function may never be the key
29261    method.  */
29262
29263 static bool
29264 arm_cxx_key_method_may_be_inline (void)
29265 {
29266   return !TARGET_AAPCS_BASED;
29267 }
29268
29269 static void
29270 arm_cxx_determine_class_data_visibility (tree decl)
29271 {
29272   if (!TARGET_AAPCS_BASED
29273       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29274     return;
29275
29276   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29277      is exported.  However, on systems without dynamic vague linkage,
29278      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29279   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29280     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29281   else
29282     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29283   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29284 }
29285
29286 static bool
29287 arm_cxx_class_data_always_comdat (void)
29288 {
29289   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29290      vague linkage if the class has no key function.  */
29291   return !TARGET_AAPCS_BASED;
29292 }
29293
29294
29295 /* The EABI says __aeabi_atexit should be used to register static
29296    destructors.  */
29297
29298 static bool
29299 arm_cxx_use_aeabi_atexit (void)
29300 {
29301   return TARGET_AAPCS_BASED;
29302 }
29303
29304
29305 void
29306 arm_set_return_address (rtx source, rtx scratch)
29307 {
29308   arm_stack_offsets *offsets;
29309   HOST_WIDE_INT delta;
29310   rtx addr, mem;
29311   unsigned long saved_regs;
29312
29313   offsets = arm_get_frame_offsets ();
29314   saved_regs = offsets->saved_regs_mask;
29315
29316   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29317     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29318   else
29319     {
29320       if (frame_pointer_needed)
29321         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29322       else
29323         {
29324           /* LR will be the first saved register.  */
29325           delta = offsets->outgoing_args - (offsets->frame + 4);
29326
29327
29328           if (delta >= 4096)
29329             {
29330               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29331                                      GEN_INT (delta & ~4095)));
29332               addr = scratch;
29333               delta &= 4095;
29334             }
29335           else
29336             addr = stack_pointer_rtx;
29337
29338           addr = plus_constant (Pmode, addr, delta);
29339         }
29340
29341       /* The store needs to be marked to prevent DSE from deleting
29342          it as dead if it is based on fp.  */
29343       mem = gen_frame_mem (Pmode, addr);
29344       MEM_VOLATILE_P (mem) = true;
29345       emit_move_insn (mem, source);
29346     }
29347 }
29348
29349
29350 void
29351 thumb_set_return_address (rtx source, rtx scratch)
29352 {
29353   arm_stack_offsets *offsets;
29354   HOST_WIDE_INT delta;
29355   HOST_WIDE_INT limit;
29356   int reg;
29357   rtx addr, mem;
29358   unsigned long mask;
29359
29360   emit_use (source);
29361
29362   offsets = arm_get_frame_offsets ();
29363   mask = offsets->saved_regs_mask;
29364   if (mask & (1 << LR_REGNUM))
29365     {
29366       limit = 1024;
29367       /* Find the saved regs.  */
29368       if (frame_pointer_needed)
29369         {
29370           delta = offsets->soft_frame - offsets->saved_args;
29371           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29372           if (TARGET_THUMB1)
29373             limit = 128;
29374         }
29375       else
29376         {
29377           delta = offsets->outgoing_args - offsets->saved_args;
29378           reg = SP_REGNUM;
29379         }
29380       /* Allow for the stack frame.  */
29381       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29382         delta -= 16;
29383       /* The link register is always the first saved register.  */
29384       delta -= 4;
29385
29386       /* Construct the address.  */
29387       addr = gen_rtx_REG (SImode, reg);
29388       if (delta > limit)
29389         {
29390           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29391           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29392           addr = scratch;
29393         }
29394       else
29395         addr = plus_constant (Pmode, addr, delta);
29396
29397       /* The store needs to be marked to prevent DSE from deleting
29398          it as dead if it is based on fp.  */
29399       mem = gen_frame_mem (Pmode, addr);
29400       MEM_VOLATILE_P (mem) = true;
29401       emit_move_insn (mem, source);
29402     }
29403   else
29404     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29405 }
29406
29407 /* Implements target hook vector_mode_supported_p.  */
29408 bool
29409 arm_vector_mode_supported_p (machine_mode mode)
29410 {
29411   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29412   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29413       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29414       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29415       || mode == V8BFmode))
29416     return true;
29417
29418   if ((TARGET_NEON || TARGET_IWMMXT)
29419       && ((mode == V2SImode)
29420           || (mode == V4HImode)
29421           || (mode == V8QImode)))
29422     return true;
29423
29424   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29425       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29426       || mode == V2HAmode))
29427     return true;
29428
29429   if (TARGET_HAVE_MVE
29430       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29431           || mode == V16QImode
29432           || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29433       return true;
29434
29435   if (TARGET_HAVE_MVE_FLOAT
29436       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29437       return true;
29438
29439   return false;
29440 }
29441
29442 /* Implements target hook array_mode_supported_p.  */
29443
29444 static bool
29445 arm_array_mode_supported_p (machine_mode mode,
29446                             unsigned HOST_WIDE_INT nelems)
29447 {
29448   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29449      for now, as the lane-swapping logic needs to be extended in the expanders.
29450      See PR target/82518.  */
29451   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29452       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29453       && (nelems >= 2 && nelems <= 4))
29454     return true;
29455
29456   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29457       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29458     return true;
29459
29460   return false;
29461 }
29462
29463 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29464    registers when autovectorizing for Neon, at least until multiple vector
29465    widths are supported properly by the middle-end.  */
29466
29467 static machine_mode
29468 arm_preferred_simd_mode (scalar_mode mode)
29469 {
29470   if (TARGET_NEON)
29471     switch (mode)
29472       {
29473       case E_HFmode:
29474         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29475       case E_SFmode:
29476         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29477       case E_SImode:
29478         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29479       case E_HImode:
29480         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29481       case E_QImode:
29482         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29483       case E_DImode:
29484         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29485           return V2DImode;
29486         break;
29487
29488       default:;
29489       }
29490
29491   if (TARGET_REALLY_IWMMXT)
29492     switch (mode)
29493       {
29494       case E_SImode:
29495         return V2SImode;
29496       case E_HImode:
29497         return V4HImode;
29498       case E_QImode:
29499         return V8QImode;
29500
29501       default:;
29502       }
29503
29504   if (TARGET_HAVE_MVE)
29505     switch (mode)
29506       {
29507       case E_QImode:
29508         return V16QImode;
29509       case E_HImode:
29510         return V8HImode;
29511       case E_SImode:
29512         return V4SImode;
29513
29514       default:;
29515       }
29516
29517   if (TARGET_HAVE_MVE_FLOAT)
29518     switch (mode)
29519       {
29520       case E_HFmode:
29521         return V8HFmode;
29522       case E_SFmode:
29523         return V4SFmode;
29524
29525       default:;
29526       }
29527
29528   return word_mode;
29529 }
29530
29531 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29532
29533    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29534    using r0-r4 for function arguments, r7 for the stack frame and don't have
29535    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29536    potentially problematic instructions accept high registers so this is not
29537    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29538    that require many low registers.  */
29539 static bool
29540 arm_class_likely_spilled_p (reg_class_t rclass)
29541 {
29542   if ((TARGET_THUMB1 && rclass == LO_REGS)
29543       || rclass  == CC_REG)
29544     return true;
29545
29546   return default_class_likely_spilled_p (rclass);
29547 }
29548
29549 /* Implements target hook small_register_classes_for_mode_p.  */
29550 bool
29551 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29552 {
29553   return TARGET_THUMB1;
29554 }
29555
29556 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29557    ARM insns and therefore guarantee that the shift count is modulo 256.
29558    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29559    guarantee no particular behavior for out-of-range counts.  */
29560
29561 static unsigned HOST_WIDE_INT
29562 arm_shift_truncation_mask (machine_mode mode)
29563 {
29564   return mode == SImode ? 255 : 0;
29565 }
29566
29567
29568 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29569
29570 unsigned int
29571 arm_debugger_regno (unsigned int regno)
29572 {
29573   if (regno < 16)
29574     return regno;
29575
29576   if (IS_VFP_REGNUM (regno))
29577     {
29578       /* See comment in arm_dwarf_register_span.  */
29579       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29580         return 64 + regno - FIRST_VFP_REGNUM;
29581       else
29582         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29583     }
29584
29585   if (IS_IWMMXT_GR_REGNUM (regno))
29586     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29587
29588   if (IS_IWMMXT_REGNUM (regno))
29589     return 112 + regno - FIRST_IWMMXT_REGNUM;
29590
29591   return DWARF_FRAME_REGISTERS;
29592 }
29593
29594 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29595    GCC models tham as 64 32-bit registers, so we need to describe this to
29596    the DWARF generation code.  Other registers can use the default.  */
29597 static rtx
29598 arm_dwarf_register_span (rtx rtl)
29599 {
29600   machine_mode mode;
29601   unsigned regno;
29602   rtx parts[16];
29603   int nregs;
29604   int i;
29605
29606   regno = REGNO (rtl);
29607   if (!IS_VFP_REGNUM (regno))
29608     return NULL_RTX;
29609
29610   /* XXX FIXME: The EABI defines two VFP register ranges:
29611         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29612         256-287: D0-D31
29613      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29614      corresponding D register.  Until GDB supports this, we shall use the
29615      legacy encodings.  We also use these encodings for D0-D15 for
29616      compatibility with older debuggers.  */
29617   mode = GET_MODE (rtl);
29618   if (GET_MODE_SIZE (mode) < 8)
29619     return NULL_RTX;
29620
29621   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29622     {
29623       nregs = GET_MODE_SIZE (mode) / 4;
29624       for (i = 0; i < nregs; i += 2)
29625         if (TARGET_BIG_END)
29626           {
29627             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29628             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29629           }
29630         else
29631           {
29632             parts[i] = gen_rtx_REG (SImode, regno + i);
29633             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29634           }
29635     }
29636   else
29637     {
29638       nregs = GET_MODE_SIZE (mode) / 8;
29639       for (i = 0; i < nregs; i++)
29640         parts[i] = gen_rtx_REG (DImode, regno + i);
29641     }
29642
29643   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29644 }
29645
29646 #if ARM_UNWIND_INFO
29647 /* Emit unwind directives for a store-multiple instruction or stack pointer
29648    push during alignment.
29649    These should only ever be generated by the function prologue code, so
29650    expect them to have a particular form.
29651    The store-multiple instruction sometimes pushes pc as the last register,
29652    although it should not be tracked into unwind information, or for -Os
29653    sometimes pushes some dummy registers before first register that needs
29654    to be tracked in unwind information; such dummy registers are there just
29655    to avoid separate stack adjustment, and will not be restored in the
29656    epilogue.  */
29657
29658 static void
29659 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29660 {
29661   int i;
29662   HOST_WIDE_INT offset;
29663   HOST_WIDE_INT nregs;
29664   int reg_size;
29665   unsigned reg;
29666   unsigned lastreg;
29667   unsigned padfirst = 0, padlast = 0;
29668   rtx e;
29669
29670   e = XVECEXP (p, 0, 0);
29671   gcc_assert (GET_CODE (e) == SET);
29672
29673   /* First insn will adjust the stack pointer.  */
29674   gcc_assert (GET_CODE (e) == SET
29675               && REG_P (SET_DEST (e))
29676               && REGNO (SET_DEST (e)) == SP_REGNUM
29677               && GET_CODE (SET_SRC (e)) == PLUS);
29678
29679   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29680   nregs = XVECLEN (p, 0) - 1;
29681   gcc_assert (nregs);
29682
29683   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29684   if (reg < 16)
29685     {
29686       /* For -Os dummy registers can be pushed at the beginning to
29687          avoid separate stack pointer adjustment.  */
29688       e = XVECEXP (p, 0, 1);
29689       e = XEXP (SET_DEST (e), 0);
29690       if (GET_CODE (e) == PLUS)
29691         padfirst = INTVAL (XEXP (e, 1));
29692       gcc_assert (padfirst == 0 || optimize_size);
29693       /* The function prologue may also push pc, but not annotate it as it is
29694          never restored.  We turn this into a stack pointer adjustment.  */
29695       e = XVECEXP (p, 0, nregs);
29696       e = XEXP (SET_DEST (e), 0);
29697       if (GET_CODE (e) == PLUS)
29698         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29699       else
29700         padlast = offset - 4;
29701       gcc_assert (padlast == 0 || padlast == 4);
29702       if (padlast == 4)
29703         fprintf (out_file, "\t.pad #4\n");
29704       reg_size = 4;
29705       fprintf (out_file, "\t.save {");
29706     }
29707   else if (IS_VFP_REGNUM (reg))
29708     {
29709       reg_size = 8;
29710       fprintf (out_file, "\t.vsave {");
29711     }
29712   else
29713     /* Unknown register type.  */
29714     gcc_unreachable ();
29715
29716   /* If the stack increment doesn't match the size of the saved registers,
29717      something has gone horribly wrong.  */
29718   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29719
29720   offset = padfirst;
29721   lastreg = 0;
29722   /* The remaining insns will describe the stores.  */
29723   for (i = 1; i <= nregs; i++)
29724     {
29725       /* Expect (set (mem <addr>) (reg)).
29726          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29727       e = XVECEXP (p, 0, i);
29728       gcc_assert (GET_CODE (e) == SET
29729                   && MEM_P (SET_DEST (e))
29730                   && REG_P (SET_SRC (e)));
29731
29732       reg = REGNO (SET_SRC (e));
29733       gcc_assert (reg >= lastreg);
29734
29735       if (i != 1)
29736         fprintf (out_file, ", ");
29737       /* We can't use %r for vfp because we need to use the
29738          double precision register names.  */
29739       if (IS_VFP_REGNUM (reg))
29740         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29741       else
29742         asm_fprintf (out_file, "%r", reg);
29743
29744       if (flag_checking)
29745         {
29746           /* Check that the addresses are consecutive.  */
29747           e = XEXP (SET_DEST (e), 0);
29748           if (GET_CODE (e) == PLUS)
29749             gcc_assert (REG_P (XEXP (e, 0))
29750                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29751                         && CONST_INT_P (XEXP (e, 1))
29752                         && offset == INTVAL (XEXP (e, 1)));
29753           else
29754             gcc_assert (i == 1
29755                         && REG_P (e)
29756                         && REGNO (e) == SP_REGNUM);
29757           offset += reg_size;
29758         }
29759     }
29760   fprintf (out_file, "}\n");
29761   if (padfirst)
29762     fprintf (out_file, "\t.pad #%d\n", padfirst);
29763 }
29764
29765 /*  Emit unwind directives for a SET.  */
29766
29767 static void
29768 arm_unwind_emit_set (FILE * out_file, rtx p)
29769 {
29770   rtx e0;
29771   rtx e1;
29772   unsigned reg;
29773
29774   e0 = XEXP (p, 0);
29775   e1 = XEXP (p, 1);
29776   switch (GET_CODE (e0))
29777     {
29778     case MEM:
29779       /* Pushing a single register.  */
29780       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29781           || !REG_P (XEXP (XEXP (e0, 0), 0))
29782           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29783         abort ();
29784
29785       asm_fprintf (out_file, "\t.save ");
29786       if (IS_VFP_REGNUM (REGNO (e1)))
29787         asm_fprintf(out_file, "{d%d}\n",
29788                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29789       else
29790         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29791       break;
29792
29793     case REG:
29794       if (REGNO (e0) == SP_REGNUM)
29795         {
29796           /* A stack increment.  */
29797           if (GET_CODE (e1) != PLUS
29798               || !REG_P (XEXP (e1, 0))
29799               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29800               || !CONST_INT_P (XEXP (e1, 1)))
29801             abort ();
29802
29803           asm_fprintf (out_file, "\t.pad #%wd\n",
29804                        -INTVAL (XEXP (e1, 1)));
29805         }
29806       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29807         {
29808           HOST_WIDE_INT offset;
29809
29810           if (GET_CODE (e1) == PLUS)
29811             {
29812               if (!REG_P (XEXP (e1, 0))
29813                   || !CONST_INT_P (XEXP (e1, 1)))
29814                 abort ();
29815               reg = REGNO (XEXP (e1, 0));
29816               offset = INTVAL (XEXP (e1, 1));
29817               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29818                            HARD_FRAME_POINTER_REGNUM, reg,
29819                            offset);
29820             }
29821           else if (REG_P (e1))
29822             {
29823               reg = REGNO (e1);
29824               asm_fprintf (out_file, "\t.setfp %r, %r\n",
29825                            HARD_FRAME_POINTER_REGNUM, reg);
29826             }
29827           else
29828             abort ();
29829         }
29830       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29831         {
29832           /* Move from sp to reg.  */
29833           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29834         }
29835      else if (GET_CODE (e1) == PLUS
29836               && REG_P (XEXP (e1, 0))
29837               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29838               && CONST_INT_P (XEXP (e1, 1)))
29839         {
29840           /* Set reg to offset from sp.  */
29841           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29842                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29843         }
29844       else
29845         abort ();
29846       break;
29847
29848     default:
29849       abort ();
29850     }
29851 }
29852
29853
29854 /* Emit unwind directives for the given insn.  */
29855
29856 static void
29857 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29858 {
29859   rtx note, pat;
29860   bool handled_one = false;
29861
29862   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29863     return;
29864
29865   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29866       && (TREE_NOTHROW (current_function_decl)
29867           || crtl->all_throwers_are_sibcalls))
29868     return;
29869
29870   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29871     return;
29872
29873   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29874     {
29875       switch (REG_NOTE_KIND (note))
29876         {
29877         case REG_FRAME_RELATED_EXPR:
29878           pat = XEXP (note, 0);
29879           goto found;
29880
29881         case REG_CFA_REGISTER:
29882           pat = XEXP (note, 0);
29883           if (pat == NULL)
29884             {
29885               pat = PATTERN (insn);
29886               if (GET_CODE (pat) == PARALLEL)
29887                 pat = XVECEXP (pat, 0, 0);
29888             }
29889
29890           /* Only emitted for IS_STACKALIGN re-alignment.  */
29891           {
29892             rtx dest, src;
29893             unsigned reg;
29894
29895             src = SET_SRC (pat);
29896             dest = SET_DEST (pat);
29897
29898             gcc_assert (src == stack_pointer_rtx);
29899             reg = REGNO (dest);
29900             asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29901                          reg + 0x90, reg);
29902           }
29903           handled_one = true;
29904           break;
29905
29906         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29907            to get correct dwarf information for shrink-wrap.  We should not
29908            emit unwind information for it because these are used either for
29909            pretend arguments or notes to adjust sp and restore registers from
29910            stack.  */
29911         case REG_CFA_DEF_CFA:
29912         case REG_CFA_ADJUST_CFA:
29913         case REG_CFA_RESTORE:
29914           return;
29915
29916         case REG_CFA_EXPRESSION:
29917         case REG_CFA_OFFSET:
29918           /* ??? Only handling here what we actually emit.  */
29919           gcc_unreachable ();
29920
29921         default:
29922           break;
29923         }
29924     }
29925   if (handled_one)
29926     return;
29927   pat = PATTERN (insn);
29928  found:
29929
29930   switch (GET_CODE (pat))
29931     {
29932     case SET:
29933       arm_unwind_emit_set (out_file, pat);
29934       break;
29935
29936     case SEQUENCE:
29937       /* Store multiple.  */
29938       arm_unwind_emit_sequence (out_file, pat);
29939       break;
29940
29941     default:
29942       abort();
29943     }
29944 }
29945
29946
29947 /* Output a reference from a function exception table to the type_info
29948    object X.  The EABI specifies that the symbol should be relocated by
29949    an R_ARM_TARGET2 relocation.  */
29950
29951 static bool
29952 arm_output_ttype (rtx x)
29953 {
29954   fputs ("\t.word\t", asm_out_file);
29955   output_addr_const (asm_out_file, x);
29956   /* Use special relocations for symbol references.  */
29957   if (!CONST_INT_P (x))
29958     fputs ("(TARGET2)", asm_out_file);
29959   fputc ('\n', asm_out_file);
29960
29961   return TRUE;
29962 }
29963
29964 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29965
29966 static void
29967 arm_asm_emit_except_personality (rtx personality)
29968 {
29969   fputs ("\t.personality\t", asm_out_file);
29970   output_addr_const (asm_out_file, personality);
29971   fputc ('\n', asm_out_file);
29972 }
29973 #endif /* ARM_UNWIND_INFO */
29974
29975 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29976
29977 static void
29978 arm_asm_init_sections (void)
29979 {
29980 #if ARM_UNWIND_INFO
29981   exception_section = get_unnamed_section (0, output_section_asm_op,
29982                                            "\t.handlerdata");
29983 #endif /* ARM_UNWIND_INFO */
29984
29985 #ifdef OBJECT_FORMAT_ELF
29986   if (target_pure_code)
29987     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29988 #endif
29989 }
29990
29991 /* Output unwind directives for the start/end of a function.  */
29992
29993 void
29994 arm_output_fn_unwind (FILE * f, bool prologue)
29995 {
29996   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29997     return;
29998
29999   if (prologue)
30000     fputs ("\t.fnstart\n", f);
30001   else
30002     {
30003       /* If this function will never be unwound, then mark it as such.
30004          The came condition is used in arm_unwind_emit to suppress
30005          the frame annotations.  */
30006       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30007           && (TREE_NOTHROW (current_function_decl)
30008               || crtl->all_throwers_are_sibcalls))
30009         fputs("\t.cantunwind\n", f);
30010
30011       fputs ("\t.fnend\n", f);
30012     }
30013 }
30014
30015 static bool
30016 arm_emit_tls_decoration (FILE *fp, rtx x)
30017 {
30018   enum tls_reloc reloc;
30019   rtx val;
30020
30021   val = XVECEXP (x, 0, 0);
30022   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30023
30024   output_addr_const (fp, val);
30025
30026   switch (reloc)
30027     {
30028     case TLS_GD32:
30029       fputs ("(tlsgd)", fp);
30030       break;
30031     case TLS_GD32_FDPIC:
30032       fputs ("(tlsgd_fdpic)", fp);
30033       break;
30034     case TLS_LDM32:
30035       fputs ("(tlsldm)", fp);
30036       break;
30037     case TLS_LDM32_FDPIC:
30038       fputs ("(tlsldm_fdpic)", fp);
30039       break;
30040     case TLS_LDO32:
30041       fputs ("(tlsldo)", fp);
30042       break;
30043     case TLS_IE32:
30044       fputs ("(gottpoff)", fp);
30045       break;
30046     case TLS_IE32_FDPIC:
30047       fputs ("(gottpoff_fdpic)", fp);
30048       break;
30049     case TLS_LE32:
30050       fputs ("(tpoff)", fp);
30051       break;
30052     case TLS_DESCSEQ:
30053       fputs ("(tlsdesc)", fp);
30054       break;
30055     default:
30056       gcc_unreachable ();
30057     }
30058
30059   switch (reloc)
30060     {
30061     case TLS_GD32:
30062     case TLS_LDM32:
30063     case TLS_IE32:
30064     case TLS_DESCSEQ:
30065       fputs (" + (. - ", fp);
30066       output_addr_const (fp, XVECEXP (x, 0, 2));
30067       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30068       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30069       output_addr_const (fp, XVECEXP (x, 0, 3));
30070       fputc (')', fp);
30071       break;
30072     default:
30073       break;
30074     }
30075
30076   return TRUE;
30077 }
30078
30079 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30080
30081 static void
30082 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30083 {
30084   gcc_assert (size == 4);
30085   fputs ("\t.word\t", file);
30086   output_addr_const (file, x);
30087   fputs ("(tlsldo)", file);
30088 }
30089
30090 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30091
30092 static bool
30093 arm_output_addr_const_extra (FILE *fp, rtx x)
30094 {
30095   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30096     return arm_emit_tls_decoration (fp, x);
30097   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30098     {
30099       char label[256];
30100       int labelno = INTVAL (XVECEXP (x, 0, 0));
30101
30102       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30103       assemble_name_raw (fp, label);
30104
30105       return TRUE;
30106     }
30107   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30108     {
30109       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30110       if (GOT_PCREL)
30111         fputs ("+.", fp);
30112       fputs ("-(", fp);
30113       output_addr_const (fp, XVECEXP (x, 0, 0));
30114       fputc (')', fp);
30115       return TRUE;
30116     }
30117   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30118     {
30119       output_addr_const (fp, XVECEXP (x, 0, 0));
30120       if (GOT_PCREL)
30121         fputs ("+.", fp);
30122       fputs ("-(", fp);
30123       output_addr_const (fp, XVECEXP (x, 0, 1));
30124       fputc (')', fp);
30125       return TRUE;
30126     }
30127   else if (GET_CODE (x) == CONST_VECTOR)
30128     return arm_emit_vector_const (fp, x);
30129
30130   return FALSE;
30131 }
30132
30133 /* Output assembly for a shift instruction.
30134    SET_FLAGS determines how the instruction modifies the condition codes.
30135    0 - Do not set condition codes.
30136    1 - Set condition codes.
30137    2 - Use smallest instruction.  */
30138 const char *
30139 arm_output_shift(rtx * operands, int set_flags)
30140 {
30141   char pattern[100];
30142   static const char flag_chars[3] = {'?', '.', '!'};
30143   const char *shift;
30144   HOST_WIDE_INT val;
30145   char c;
30146
30147   c = flag_chars[set_flags];
30148   shift = shift_op(operands[3], &val);
30149   if (shift)
30150     {
30151       if (val != -1)
30152         operands[2] = GEN_INT(val);
30153       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30154     }
30155   else
30156     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30157
30158   output_asm_insn (pattern, operands);
30159   return "";
30160 }
30161
30162 /* Output assembly for a WMMX immediate shift instruction.  */
30163 const char *
30164 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30165 {
30166   int shift = INTVAL (operands[2]);
30167   char templ[50];
30168   machine_mode opmode = GET_MODE (operands[0]);
30169
30170   gcc_assert (shift >= 0);
30171
30172   /* If the shift value in the register versions is > 63 (for D qualifier),
30173      31 (for W qualifier) or 15 (for H qualifier).  */
30174   if (((opmode == V4HImode) && (shift > 15))
30175         || ((opmode == V2SImode) && (shift > 31))
30176         || ((opmode == DImode) && (shift > 63)))
30177   {
30178     if (wror_or_wsra)
30179       {
30180         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30181         output_asm_insn (templ, operands);
30182         if (opmode == DImode)
30183           {
30184             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30185             output_asm_insn (templ, operands);
30186           }
30187       }
30188     else
30189       {
30190         /* The destination register will contain all zeros.  */
30191         sprintf (templ, "wzero\t%%0");
30192         output_asm_insn (templ, operands);
30193       }
30194     return "";
30195   }
30196
30197   if ((opmode == DImode) && (shift > 32))
30198     {
30199       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30200       output_asm_insn (templ, operands);
30201       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30202       output_asm_insn (templ, operands);
30203     }
30204   else
30205     {
30206       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30207       output_asm_insn (templ, operands);
30208     }
30209   return "";
30210 }
30211
30212 /* Output assembly for a WMMX tinsr instruction.  */
30213 const char *
30214 arm_output_iwmmxt_tinsr (rtx *operands)
30215 {
30216   int mask = INTVAL (operands[3]);
30217   int i;
30218   char templ[50];
30219   int units = mode_nunits[GET_MODE (operands[0])];
30220   gcc_assert ((mask & (mask - 1)) == 0);
30221   for (i = 0; i < units; ++i)
30222     {
30223       if ((mask & 0x01) == 1)
30224         {
30225           break;
30226         }
30227       mask >>= 1;
30228     }
30229   gcc_assert (i < units);
30230   {
30231     switch (GET_MODE (operands[0]))
30232       {
30233       case E_V8QImode:
30234         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30235         break;
30236       case E_V4HImode:
30237         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30238         break;
30239       case E_V2SImode:
30240         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30241         break;
30242       default:
30243         gcc_unreachable ();
30244         break;
30245       }
30246     output_asm_insn (templ, operands);
30247   }
30248   return "";
30249 }
30250
30251 /* Output a Thumb-1 casesi dispatch sequence.  */
30252 const char *
30253 thumb1_output_casesi (rtx *operands)
30254 {
30255   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30256
30257   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30258
30259   switch (GET_MODE(diff_vec))
30260     {
30261     case E_QImode:
30262       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30263               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30264     case E_HImode:
30265       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30266               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30267     case E_SImode:
30268       return "bl\t%___gnu_thumb1_case_si";
30269     default:
30270       gcc_unreachable ();
30271     }
30272 }
30273
30274 /* Output a Thumb-2 casesi instruction.  */
30275 const char *
30276 thumb2_output_casesi (rtx *operands)
30277 {
30278   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30279
30280   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30281
30282   output_asm_insn ("cmp\t%0, %1", operands);
30283   output_asm_insn ("bhi\t%l3", operands);
30284   switch (GET_MODE(diff_vec))
30285     {
30286     case E_QImode:
30287       return "tbb\t[%|pc, %0]";
30288     case E_HImode:
30289       return "tbh\t[%|pc, %0, lsl #1]";
30290     case E_SImode:
30291       if (flag_pic)
30292         {
30293           output_asm_insn ("adr\t%4, %l2", operands);
30294           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30295           output_asm_insn ("add\t%4, %4, %5", operands);
30296           return "bx\t%4";
30297         }
30298       else
30299         {
30300           output_asm_insn ("adr\t%4, %l2", operands);
30301           return "ldr\t%|pc, [%4, %0, lsl #2]";
30302         }
30303     default:
30304       gcc_unreachable ();
30305     }
30306 }
30307
30308 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30309    per-core tuning structs.  */
30310 static int
30311 arm_issue_rate (void)
30312 {
30313   return current_tune->issue_rate;
30314 }
30315
30316 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30317 static int
30318 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30319 {
30320   if (DEBUG_INSN_P (insn))
30321     return more;
30322
30323   rtx_code code = GET_CODE (PATTERN (insn));
30324   if (code == USE || code == CLOBBER)
30325     return more;
30326
30327   if (get_attr_type (insn) == TYPE_NO_INSN)
30328     return more;
30329
30330   return more - 1;
30331 }
30332
30333 /* Return how many instructions should scheduler lookahead to choose the
30334    best one.  */
30335 static int
30336 arm_first_cycle_multipass_dfa_lookahead (void)
30337 {
30338   int issue_rate = arm_issue_rate ();
30339
30340   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30341 }
30342
30343 /* Enable modeling of L2 auto-prefetcher.  */
30344 static int
30345 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30346 {
30347   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30348 }
30349
30350 const char *
30351 arm_mangle_type (const_tree type)
30352 {
30353   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30354      has to be managled as if it is in the "std" namespace.  */
30355   if (TARGET_AAPCS_BASED
30356       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30357     return "St9__va_list";
30358
30359   /* Half-precision floating point types.  */
30360   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30361     {
30362       if (TYPE_MODE (type) == BFmode)
30363         return "u6__bf16";
30364       else
30365         return "Dh";
30366     }
30367
30368   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30369      builtin type.  */
30370   if (TYPE_NAME (type) != NULL)
30371     return arm_mangle_builtin_type (type);
30372
30373   /* Use the default mangling.  */
30374   return NULL;
30375 }
30376
30377 /* Order of allocation of core registers for Thumb: this allocation is
30378    written over the corresponding initial entries of the array
30379    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30380    first.  Saving and restoring a low register is usually cheaper than
30381    using a call-clobbered high register.  */
30382
30383 static const int thumb_core_reg_alloc_order[] =
30384 {
30385    3,  2,  1,  0,  4,  5,  6,  7,
30386   12, 14,  8,  9, 10, 11
30387 };
30388
30389 /* Adjust register allocation order when compiling for Thumb.  */
30390
30391 void
30392 arm_order_regs_for_local_alloc (void)
30393 {
30394   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30395   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30396   if (TARGET_THUMB)
30397     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30398             sizeof (thumb_core_reg_alloc_order));
30399 }
30400
30401 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30402
30403 bool
30404 arm_frame_pointer_required (void)
30405 {
30406   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30407     return true;
30408
30409   /* If the function receives nonlocal gotos, it needs to save the frame
30410      pointer in the nonlocal_goto_save_area object.  */
30411   if (cfun->has_nonlocal_label)
30412     return true;
30413
30414   /* The frame pointer is required for non-leaf APCS frames.  */
30415   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30416     return true;
30417
30418   /* If we are probing the stack in the prologue, we will have a faulting
30419      instruction prior to the stack adjustment and this requires a frame
30420      pointer if we want to catch the exception using the EABI unwinder.  */
30421   if (!IS_INTERRUPT (arm_current_func_type ())
30422       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30423           || flag_stack_clash_protection)
30424       && arm_except_unwind_info (&global_options) == UI_TARGET
30425       && cfun->can_throw_non_call_exceptions)
30426     {
30427       HOST_WIDE_INT size = get_frame_size ();
30428
30429       /* That's irrelevant if there is no stack adjustment.  */
30430       if (size <= 0)
30431         return false;
30432
30433       /* That's relevant only if there is a stack probe.  */
30434       if (crtl->is_leaf && !cfun->calls_alloca)
30435         {
30436           /* We don't have the final size of the frame so adjust.  */
30437           size += 32 * UNITS_PER_WORD;
30438           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30439             return true;
30440         }
30441       else
30442         return true;
30443     }
30444
30445   return false;
30446 }
30447
30448 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30449    All modes except THUMB1 have conditional execution.
30450    If we have conditional arithmetic, return false before reload to
30451    enable some ifcvt transformations. */
30452 static bool
30453 arm_have_conditional_execution (void)
30454 {
30455   bool has_cond_exec, enable_ifcvt_trans;
30456
30457   /* Only THUMB1 cannot support conditional execution. */
30458   has_cond_exec = !TARGET_THUMB1;
30459
30460   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30461      before reload. */
30462   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30463
30464   return has_cond_exec && !enable_ifcvt_trans;
30465 }
30466
30467 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30468 static HOST_WIDE_INT
30469 arm_vector_alignment (const_tree type)
30470 {
30471   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30472
30473   if (TARGET_AAPCS_BASED)
30474     align = MIN (align, 64);
30475
30476   return align;
30477 }
30478
30479 static unsigned int
30480 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30481 {
30482   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30483     {
30484       modes->safe_push (V16QImode);
30485       modes->safe_push (V8QImode);
30486     }
30487   return 0;
30488 }
30489
30490 static bool
30491 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30492 {
30493   /* Vectors which aren't in packed structures will not be less aligned than
30494      the natural alignment of their element type, so this is safe.  */
30495   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30496     return !is_packed;
30497
30498   return default_builtin_vector_alignment_reachable (type, is_packed);
30499 }
30500
30501 static bool
30502 arm_builtin_support_vector_misalignment (machine_mode mode,
30503                                          const_tree type, int misalignment,
30504                                          bool is_packed)
30505 {
30506   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30507     {
30508       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30509
30510       if (is_packed)
30511         return align == 1;
30512
30513       /* If the misalignment is unknown, we should be able to handle the access
30514          so long as it is not to a member of a packed data structure.  */
30515       if (misalignment == -1)
30516         return true;
30517
30518       /* Return true if the misalignment is a multiple of the natural alignment
30519          of the vector's element type.  This is probably always going to be
30520          true in practice, since we've already established that this isn't a
30521          packed access.  */
30522       return ((misalignment % align) == 0);
30523     }
30524
30525   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30526                                                       is_packed);
30527 }
30528
30529 static void
30530 arm_conditional_register_usage (void)
30531 {
30532   int regno;
30533
30534   if (TARGET_THUMB1 && optimize_size)
30535     {
30536       /* When optimizing for size on Thumb-1, it's better not
30537         to use the HI regs, because of the overhead of
30538         stacking them.  */
30539       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30540         fixed_regs[regno] = call_used_regs[regno] = 1;
30541     }
30542
30543   /* The link register can be clobbered by any branch insn,
30544      but we have no way to track that at present, so mark
30545      it as unavailable.  */
30546   if (TARGET_THUMB1)
30547     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30548
30549   if (TARGET_32BIT && TARGET_VFP_BASE)
30550     {
30551       /* VFPv3 registers are disabled when earlier VFP
30552          versions are selected due to the definition of
30553          LAST_VFP_REGNUM.  */
30554       for (regno = FIRST_VFP_REGNUM;
30555            regno <= LAST_VFP_REGNUM; ++ regno)
30556         {
30557           fixed_regs[regno] = 0;
30558           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30559             || regno >= FIRST_VFP_REGNUM + 32;
30560         }
30561       if (TARGET_HAVE_MVE)
30562         fixed_regs[VPR_REGNUM] = 0;
30563     }
30564
30565   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30566     {
30567       regno = FIRST_IWMMXT_GR_REGNUM;
30568       /* The 2002/10/09 revision of the XScale ABI has wCG0
30569          and wCG1 as call-preserved registers.  The 2002/11/21
30570          revision changed this so that all wCG registers are
30571          scratch registers.  */
30572       for (regno = FIRST_IWMMXT_GR_REGNUM;
30573            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30574         fixed_regs[regno] = 0;
30575       /* The XScale ABI has wR0 - wR9 as scratch registers,
30576          the rest as call-preserved registers.  */
30577       for (regno = FIRST_IWMMXT_REGNUM;
30578            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30579         {
30580           fixed_regs[regno] = 0;
30581           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30582         }
30583     }
30584
30585   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30586     {
30587       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30588       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30589     }
30590   else if (TARGET_APCS_STACK)
30591     {
30592       fixed_regs[10]     = 1;
30593       call_used_regs[10] = 1;
30594     }
30595   /* -mcaller-super-interworking reserves r11 for calls to
30596      _interwork_r11_call_via_rN().  Making the register global
30597      is an easy way of ensuring that it remains valid for all
30598      calls.  */
30599   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30600       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30601     {
30602       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30603       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30604       if (TARGET_CALLER_INTERWORKING)
30605         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30606     }
30607
30608   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30609   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30610   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30611
30612   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30613 }
30614
30615 static reg_class_t
30616 arm_preferred_rename_class (reg_class_t rclass)
30617 {
30618   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30619      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30620      and code size can be reduced.  */
30621   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30622     return LO_REGS;
30623   else
30624     return NO_REGS;
30625 }
30626
30627 /* Compute the attribute "length" of insn "*push_multi".
30628    So this function MUST be kept in sync with that insn pattern.  */
30629 int
30630 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30631 {
30632   int i, regno, hi_reg;
30633   int num_saves = XVECLEN (parallel_op, 0);
30634
30635   /* ARM mode.  */
30636   if (TARGET_ARM)
30637     return 4;
30638   /* Thumb1 mode.  */
30639   if (TARGET_THUMB1)
30640     return 2;
30641
30642   /* Thumb2 mode.  */
30643   regno = REGNO (first_op);
30644   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30645      list is 8-bit.  Normally this means all registers in the list must be
30646      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30647      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30648      with 16-bit encoding.  */
30649   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30650   for (i = 1; i < num_saves && !hi_reg; i++)
30651     {
30652       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30653       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30654     }
30655
30656   if (!hi_reg)
30657     return 2;
30658   return 4;
30659 }
30660
30661 /* Compute the attribute "length" of insn.  Currently, this function is used
30662    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30663    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30664    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30665    true if OPERANDS contains insn which explicit updates base register.  */
30666
30667 int
30668 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30669 {
30670   /* ARM mode.  */
30671   if (TARGET_ARM)
30672     return 4;
30673   /* Thumb1 mode.  */
30674   if (TARGET_THUMB1)
30675     return 2;
30676
30677   rtx parallel_op = operands[0];
30678   /* Initialize to elements number of PARALLEL.  */
30679   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30680   /* Initialize the value to base register.  */
30681   unsigned regno = REGNO (operands[1]);
30682   /* Skip return and write back pattern.
30683      We only need register pop pattern for later analysis.  */
30684   unsigned first_indx = 0;
30685   first_indx += return_pc ? 1 : 0;
30686   first_indx += write_back_p ? 1 : 0;
30687
30688   /* A pop operation can be done through LDM or POP.  If the base register is SP
30689      and if it's with write back, then a LDM will be alias of POP.  */
30690   bool pop_p = (regno == SP_REGNUM && write_back_p);
30691   bool ldm_p = !pop_p;
30692
30693   /* Check base register for LDM.  */
30694   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30695     return 4;
30696
30697   /* Check each register in the list.  */
30698   for (; indx >= first_indx; indx--)
30699     {
30700       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30701       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30702          comment in arm_attr_length_push_multi.  */
30703       if (REGNO_REG_CLASS (regno) == HI_REGS
30704           && (regno != PC_REGNUM || ldm_p))
30705         return 4;
30706     }
30707
30708   return 2;
30709 }
30710
30711 /* Compute the number of instructions emitted by output_move_double.  */
30712 int
30713 arm_count_output_move_double_insns (rtx *operands)
30714 {
30715   int count;
30716   rtx ops[2];
30717   /* output_move_double may modify the operands array, so call it
30718      here on a copy of the array.  */
30719   ops[0] = operands[0];
30720   ops[1] = operands[1];
30721   output_move_double (ops, false, &count);
30722   return count;
30723 }
30724
30725 /* Same as above, but operands are a register/memory pair in SImode.
30726    Assumes operands has the base register in position 0 and memory in position
30727    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30728 int
30729 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30730 {
30731   int count;
30732   rtx ops[2];
30733   int regnum, memnum;
30734   if (load)
30735     regnum = 0, memnum = 1;
30736   else
30737     regnum = 1, memnum = 0;
30738   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30739   ops[memnum] = adjust_address (operands[2], DImode, 0);
30740   output_move_double (ops, false, &count);
30741   return count;
30742 }
30743
30744
30745 int
30746 vfp3_const_double_for_fract_bits (rtx operand)
30747 {
30748   REAL_VALUE_TYPE r0;
30749
30750   if (!CONST_DOUBLE_P (operand))
30751     return 0;
30752
30753   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30754   if (exact_real_inverse (DFmode, &r0)
30755       && !REAL_VALUE_NEGATIVE (r0))
30756     {
30757       if (exact_real_truncate (DFmode, &r0))
30758         {
30759           HOST_WIDE_INT value = real_to_integer (&r0);
30760           value = value & 0xffffffff;
30761           if ((value != 0) && ( (value & (value - 1)) == 0))
30762             {
30763               int ret = exact_log2 (value);
30764               gcc_assert (IN_RANGE (ret, 0, 31));
30765               return ret;
30766             }
30767         }
30768     }
30769   return 0;
30770 }
30771
30772 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30773    log2 is in [1, 32], return that log2.  Otherwise return -1.
30774    This is used in the patterns for vcvt.s32.f32 floating-point to
30775    fixed-point conversions.  */
30776
30777 int
30778 vfp3_const_double_for_bits (rtx x)
30779 {
30780   const REAL_VALUE_TYPE *r;
30781
30782   if (!CONST_DOUBLE_P (x))
30783     return -1;
30784
30785   r = CONST_DOUBLE_REAL_VALUE (x);
30786
30787   if (REAL_VALUE_NEGATIVE (*r)
30788       || REAL_VALUE_ISNAN (*r)
30789       || REAL_VALUE_ISINF (*r)
30790       || !real_isinteger (r, SFmode))
30791     return -1;
30792
30793   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30794
30795 /* The exact_log2 above will have returned -1 if this is
30796    not an exact log2.  */
30797   if (!IN_RANGE (hwint, 1, 32))
30798     return -1;
30799
30800   return hwint;
30801 }
30802
30803 \f
30804 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30805
30806 static void
30807 arm_pre_atomic_barrier (enum memmodel model)
30808 {
30809   if (need_atomic_barrier_p (model, true))
30810     emit_insn (gen_memory_barrier ());
30811 }
30812
30813 static void
30814 arm_post_atomic_barrier (enum memmodel model)
30815 {
30816   if (need_atomic_barrier_p (model, false))
30817     emit_insn (gen_memory_barrier ());
30818 }
30819
30820 /* Emit the load-exclusive and store-exclusive instructions.
30821    Use acquire and release versions if necessary.  */
30822
30823 static void
30824 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30825 {
30826   rtx (*gen) (rtx, rtx);
30827
30828   if (acq)
30829     {
30830       switch (mode)
30831         {
30832         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30833         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30834         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30835         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30836         default:
30837           gcc_unreachable ();
30838         }
30839     }
30840   else
30841     {
30842       switch (mode)
30843         {
30844         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30845         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30846         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30847         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30848         default:
30849           gcc_unreachable ();
30850         }
30851     }
30852
30853   emit_insn (gen (rval, mem));
30854 }
30855
30856 static void
30857 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30858                           rtx mem, bool rel)
30859 {
30860   rtx (*gen) (rtx, rtx, rtx);
30861
30862   if (rel)
30863     {
30864       switch (mode)
30865         {
30866         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30867         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30868         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30869         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30870         default:
30871           gcc_unreachable ();
30872         }
30873     }
30874   else
30875     {
30876       switch (mode)
30877         {
30878         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30879         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30880         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30881         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30882         default:
30883           gcc_unreachable ();
30884         }
30885     }
30886
30887   emit_insn (gen (bval, rval, mem));
30888 }
30889
30890 /* Mark the previous jump instruction as unlikely.  */
30891
30892 static void
30893 emit_unlikely_jump (rtx insn)
30894 {
30895   rtx_insn *jump = emit_jump_insn (insn);
30896   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30897 }
30898
30899 /* Expand a compare and swap pattern.  */
30900
30901 void
30902 arm_expand_compare_and_swap (rtx operands[])
30903 {
30904   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30905   machine_mode mode, cmp_mode;
30906
30907   bval = operands[0];
30908   rval = operands[1];
30909   mem = operands[2];
30910   oldval = operands[3];
30911   newval = operands[4];
30912   is_weak = operands[5];
30913   mod_s = operands[6];
30914   mod_f = operands[7];
30915   mode = GET_MODE (mem);
30916
30917   /* Normally the succ memory model must be stronger than fail, but in the
30918      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30919      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30920
30921   if (TARGET_HAVE_LDACQ
30922       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30923       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30924     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30925
30926   switch (mode)
30927     {
30928     case E_QImode:
30929     case E_HImode:
30930       /* For narrow modes, we're going to perform the comparison in SImode,
30931          so do the zero-extension now.  */
30932       rval = gen_reg_rtx (SImode);
30933       oldval = convert_modes (SImode, mode, oldval, true);
30934       /* FALLTHRU */
30935
30936     case E_SImode:
30937       /* Force the value into a register if needed.  We waited until after
30938          the zero-extension above to do this properly.  */
30939       if (!arm_add_operand (oldval, SImode))
30940         oldval = force_reg (SImode, oldval);
30941       break;
30942
30943     case E_DImode:
30944       if (!cmpdi_operand (oldval, mode))
30945         oldval = force_reg (mode, oldval);
30946       break;
30947
30948     default:
30949       gcc_unreachable ();
30950     }
30951
30952   if (TARGET_THUMB1)
30953     cmp_mode = E_SImode;
30954   else
30955     cmp_mode = CC_Zmode;
30956
30957   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30958   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30959                                         oldval, newval, is_weak, mod_s, mod_f));
30960
30961   if (mode == QImode || mode == HImode)
30962     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30963
30964   /* In all cases, we arrange for success to be signaled by Z set.
30965      This arrangement allows for the boolean result to be used directly
30966      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30967      boolean negation of the result is also stored in bval because Thumb-1
30968      backend lacks dependency tracking for CC flag due to flag-setting not
30969      being represented at RTL level.  */
30970   if (TARGET_THUMB1)
30971       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30972   else
30973     {
30974       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30975       emit_insn (gen_rtx_SET (bval, x));
30976     }
30977 }
30978
30979 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30980    another memory store between the load-exclusive and store-exclusive can
30981    reset the monitor from Exclusive to Open state.  This means we must wait
30982    until after reload to split the pattern, lest we get a register spill in
30983    the middle of the atomic sequence.  Success of the compare and swap is
30984    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30985    for Thumb-1 targets (ie. negation of the boolean value returned by
30986    atomic_compare_and_swapmode standard pattern in operand 0).  */
30987
30988 void
30989 arm_split_compare_and_swap (rtx operands[])
30990 {
30991   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30992   machine_mode mode;
30993   enum memmodel mod_s, mod_f;
30994   bool is_weak;
30995   rtx_code_label *label1, *label2;
30996   rtx x, cond;
30997
30998   rval = operands[1];
30999   mem = operands[2];
31000   oldval = operands[3];
31001   newval = operands[4];
31002   is_weak = (operands[5] != const0_rtx);
31003   mod_s_rtx = operands[6];
31004   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31005   mod_f = memmodel_from_int (INTVAL (operands[7]));
31006   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31007   mode = GET_MODE (mem);
31008
31009   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31010
31011   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31012   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31013
31014   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31015      a full barrier is emitted after the store-release.  */
31016   if (is_armv8_sync)
31017     use_acquire = false;
31018
31019   /* Checks whether a barrier is needed and emits one accordingly.  */
31020   if (!(use_acquire || use_release))
31021     arm_pre_atomic_barrier (mod_s);
31022
31023   label1 = NULL;
31024   if (!is_weak)
31025     {
31026       label1 = gen_label_rtx ();
31027       emit_label (label1);
31028     }
31029   label2 = gen_label_rtx ();
31030
31031   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31032
31033   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31034      as required to communicate with arm_expand_compare_and_swap.  */
31035   if (TARGET_32BIT)
31036     {
31037       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31038       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31039       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31040                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31041       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31042     }
31043   else
31044     {
31045       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31046       if (thumb1_cmpneg_operand (oldval, SImode))
31047         {
31048           rtx src = rval;
31049           if (!satisfies_constraint_L (oldval))
31050             {
31051               gcc_assert (satisfies_constraint_J (oldval));
31052
31053               /* For such immediates, ADDS needs the source and destination regs
31054                  to be the same.
31055
31056                  Normally this would be handled by RA, but this is all happening
31057                  after RA.  */
31058               emit_move_insn (neg_bval, rval);
31059               src = neg_bval;
31060             }
31061
31062           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31063                                                        label2, cond));
31064         }
31065       else
31066         {
31067           emit_move_insn (neg_bval, const1_rtx);
31068           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31069         }
31070     }
31071
31072   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31073
31074   /* Weak or strong, we want EQ to be true for success, so that we
31075      match the flags that we got from the compare above.  */
31076   if (TARGET_32BIT)
31077     {
31078       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31079       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31080       emit_insn (gen_rtx_SET (cond, x));
31081     }
31082
31083   if (!is_weak)
31084     {
31085       /* Z is set to boolean value of !neg_bval, as required to communicate
31086          with arm_expand_compare_and_swap.  */
31087       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31088       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31089     }
31090
31091   if (!is_mm_relaxed (mod_f))
31092     emit_label (label2);
31093
31094   /* Checks whether a barrier is needed and emits one accordingly.  */
31095   if (is_armv8_sync
31096       || !(use_acquire || use_release))
31097     arm_post_atomic_barrier (mod_s);
31098
31099   if (is_mm_relaxed (mod_f))
31100     emit_label (label2);
31101 }
31102
31103 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31104    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31105    operation).  Operation is performed on the content at MEM and on VALUE
31106    following the memory model MODEL_RTX.  The content at MEM before and after
31107    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31108    success of the operation is returned in COND.  Using a scratch register or
31109    an operand register for these determines what result is returned for that
31110    pattern.  */
31111
31112 void
31113 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31114                      rtx value, rtx model_rtx, rtx cond)
31115 {
31116   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31117   machine_mode mode = GET_MODE (mem);
31118   machine_mode wmode = (mode == DImode ? DImode : SImode);
31119   rtx_code_label *label;
31120   bool all_low_regs, bind_old_new;
31121   rtx x;
31122
31123   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31124
31125   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31126   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31127
31128   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31129      a full barrier is emitted after the store-release.  */
31130   if (is_armv8_sync)
31131     use_acquire = false;
31132
31133   /* Checks whether a barrier is needed and emits one accordingly.  */
31134   if (!(use_acquire || use_release))
31135     arm_pre_atomic_barrier (model);
31136
31137   label = gen_label_rtx ();
31138   emit_label (label);
31139
31140   if (new_out)
31141     new_out = gen_lowpart (wmode, new_out);
31142   if (old_out)
31143     old_out = gen_lowpart (wmode, old_out);
31144   else
31145     old_out = new_out;
31146   value = simplify_gen_subreg (wmode, value, mode, 0);
31147
31148   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31149
31150   /* Does the operation require destination and first operand to use the same
31151      register?  This is decided by register constraints of relevant insn
31152      patterns in thumb1.md.  */
31153   gcc_assert (!new_out || REG_P (new_out));
31154   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31155                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31156                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31157   bind_old_new =
31158     (TARGET_THUMB1
31159      && code != SET
31160      && code != MINUS
31161      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31162
31163   /* We want to return the old value while putting the result of the operation
31164      in the same register as the old value so copy the old value over to the
31165      destination register and use that register for the operation.  */
31166   if (old_out && bind_old_new)
31167     {
31168       emit_move_insn (new_out, old_out);
31169       old_out = new_out;
31170     }
31171
31172   switch (code)
31173     {
31174     case SET:
31175       new_out = value;
31176       break;
31177
31178     case NOT:
31179       x = gen_rtx_AND (wmode, old_out, value);
31180       emit_insn (gen_rtx_SET (new_out, x));
31181       x = gen_rtx_NOT (wmode, new_out);
31182       emit_insn (gen_rtx_SET (new_out, x));
31183       break;
31184
31185     case MINUS:
31186       if (CONST_INT_P (value))
31187         {
31188           value = gen_int_mode (-INTVAL (value), wmode);
31189           code = PLUS;
31190         }
31191       /* FALLTHRU */
31192
31193     case PLUS:
31194       if (mode == DImode)
31195         {
31196           /* DImode plus/minus need to clobber flags.  */
31197           /* The adddi3 and subdi3 patterns are incorrectly written so that
31198              they require matching operands, even when we could easily support
31199              three operands.  Thankfully, this can be fixed up post-splitting,
31200              as the individual add+adc patterns do accept three operands and
31201              post-reload cprop can make these moves go away.  */
31202           emit_move_insn (new_out, old_out);
31203           if (code == PLUS)
31204             x = gen_adddi3 (new_out, new_out, value);
31205           else
31206             x = gen_subdi3 (new_out, new_out, value);
31207           emit_insn (x);
31208           break;
31209         }
31210       /* FALLTHRU */
31211
31212     default:
31213       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31214       emit_insn (gen_rtx_SET (new_out, x));
31215       break;
31216     }
31217
31218   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31219                             use_release);
31220
31221   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31222   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31223
31224   /* Checks whether a barrier is needed and emits one accordingly.  */
31225   if (is_armv8_sync
31226       || !(use_acquire || use_release))
31227     arm_post_atomic_barrier (model);
31228 }
31229 \f
31230 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31231 opt_machine_mode
31232 arm_mode_to_pred_mode (machine_mode mode)
31233 {
31234   switch (GET_MODE_NUNITS (mode))
31235     {
31236     case 16: return V16BImode;
31237     case 8: return V8BImode;
31238     case 4: return V4BImode;
31239     }
31240   return opt_machine_mode ();
31241 }
31242
31243 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31244    If CAN_INVERT, store either the result or its inverse in TARGET
31245    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31246    always store the result in TARGET, never its inverse.
31247
31248    Note that the handling of floating-point comparisons is not
31249    IEEE compliant.  */
31250
31251 bool
31252 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31253                            bool can_invert)
31254 {
31255   machine_mode cmp_result_mode = GET_MODE (target);
31256   machine_mode cmp_mode = GET_MODE (op0);
31257
31258   bool inverted;
31259
31260   /* MVE supports more comparisons than Neon.  */
31261   if (TARGET_HAVE_MVE)
31262       inverted = false;
31263   else
31264     switch (code)
31265       {
31266         /* For these we need to compute the inverse of the requested
31267            comparison.  */
31268       case UNORDERED:
31269       case UNLT:
31270       case UNLE:
31271       case UNGT:
31272       case UNGE:
31273       case UNEQ:
31274       case NE:
31275         code = reverse_condition_maybe_unordered (code);
31276         if (!can_invert)
31277           {
31278             /* Recursively emit the inverted comparison into a temporary
31279                and then store its inverse in TARGET.  This avoids reusing
31280                TARGET (which for integer NE could be one of the inputs).  */
31281             rtx tmp = gen_reg_rtx (cmp_result_mode);
31282             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31283               gcc_unreachable ();
31284             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31285             return false;
31286           }
31287         inverted = true;
31288         break;
31289
31290       default:
31291         inverted = false;
31292         break;
31293       }
31294
31295   switch (code)
31296     {
31297     /* These are natively supported by Neon for zero comparisons, but otherwise
31298        require the operands to be swapped. For MVE, we can only compare
31299        registers.  */
31300     case LE:
31301     case LT:
31302       if (!TARGET_HAVE_MVE)
31303         if (op1 != CONST0_RTX (cmp_mode))
31304           {
31305             code = swap_condition (code);
31306             std::swap (op0, op1);
31307           }
31308       /* Fall through.  */
31309
31310     /* These are natively supported by Neon for both register and zero
31311        operands. MVE supports registers only.  */
31312     case EQ:
31313     case GE:
31314     case GT:
31315     case NE:
31316       if (TARGET_HAVE_MVE)
31317         {
31318           switch (GET_MODE_CLASS (cmp_mode))
31319             {
31320             case MODE_VECTOR_INT:
31321               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31322                                         op0, force_reg (cmp_mode, op1)));
31323               break;
31324             case MODE_VECTOR_FLOAT:
31325               if (TARGET_HAVE_MVE_FLOAT)
31326                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31327                                             op0, force_reg (cmp_mode, op1)));
31328               else
31329                 gcc_unreachable ();
31330               break;
31331             default:
31332               gcc_unreachable ();
31333             }
31334         }
31335       else
31336         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31337       return inverted;
31338
31339     /* These are natively supported for register operands only.
31340        Comparisons with zero aren't useful and should be folded
31341        or canonicalized by target-independent code.  */
31342     case GEU:
31343     case GTU:
31344       if (TARGET_HAVE_MVE)
31345         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31346                                   op0, force_reg (cmp_mode, op1)));
31347       else
31348         emit_insn (gen_neon_vc (code, cmp_mode, target,
31349                                 op0, force_reg (cmp_mode, op1)));
31350       return inverted;
31351
31352     /* These require the operands to be swapped and likewise do not
31353        support comparisons with zero.  */
31354     case LEU:
31355     case LTU:
31356       if (TARGET_HAVE_MVE)
31357         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31358                                   force_reg (cmp_mode, op1), op0));
31359       else
31360         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31361                                 target, force_reg (cmp_mode, op1), op0));
31362       return inverted;
31363
31364     /* These need a combination of two comparisons.  */
31365     case LTGT:
31366     case ORDERED:
31367       {
31368         /* Operands are LTGT iff (a > b || a > b).
31369            Operands are ORDERED iff (a > b || a <= b).  */
31370         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31371         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31372         rtx_code alt_code = (code == LTGT ? LT : LE);
31373         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31374             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31375           gcc_unreachable ();
31376         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31377                                                      gt_res, alt_res)));
31378         return inverted;
31379       }
31380
31381     default:
31382       gcc_unreachable ();
31383     }
31384 }
31385
31386 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31387    CMP_RESULT_MODE is the mode of the comparison result.  */
31388
31389 void
31390 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31391 {
31392   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31393      arm_expand_vector_compare, and another one here.  */
31394   rtx mask;
31395
31396   if (TARGET_HAVE_MVE)
31397     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31398   else
31399     mask = gen_reg_rtx (cmp_result_mode);
31400
31401   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31402                                              operands[4], operands[5], true);
31403   if (inverted)
31404     std::swap (operands[1], operands[2]);
31405   if (TARGET_NEON)
31406   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31407                             mask, operands[1], operands[2]));
31408   else
31409     {
31410       machine_mode cmp_mode = GET_MODE (operands[0]);
31411
31412       switch (GET_MODE_CLASS (cmp_mode))
31413         {
31414         case MODE_VECTOR_INT:
31415           emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31416                                      operands[1], operands[2], mask));
31417           break;
31418         case MODE_VECTOR_FLOAT:
31419           if (TARGET_HAVE_MVE_FLOAT)
31420             emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31421                                          operands[1], operands[2], mask));
31422           else
31423             gcc_unreachable ();
31424           break;
31425         default:
31426           gcc_unreachable ();
31427         }
31428     }
31429 }
31430 \f
31431 #define MAX_VECT_LEN 16
31432
31433 struct expand_vec_perm_d
31434 {
31435   rtx target, op0, op1;
31436   vec_perm_indices perm;
31437   machine_mode vmode;
31438   bool one_vector_p;
31439   bool testing_p;
31440 };
31441
31442 /* Generate a variable permutation.  */
31443
31444 static void
31445 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31446 {
31447   machine_mode vmode = GET_MODE (target);
31448   bool one_vector_p = rtx_equal_p (op0, op1);
31449
31450   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31451   gcc_checking_assert (GET_MODE (op0) == vmode);
31452   gcc_checking_assert (GET_MODE (op1) == vmode);
31453   gcc_checking_assert (GET_MODE (sel) == vmode);
31454   gcc_checking_assert (TARGET_NEON);
31455
31456   if (one_vector_p)
31457     {
31458       if (vmode == V8QImode)
31459         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31460       else
31461         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31462     }
31463   else
31464     {
31465       rtx pair;
31466
31467       if (vmode == V8QImode)
31468         {
31469           pair = gen_reg_rtx (V16QImode);
31470           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31471           pair = gen_lowpart (TImode, pair);
31472           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31473         }
31474       else
31475         {
31476           pair = gen_reg_rtx (OImode);
31477           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31478           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31479         }
31480     }
31481 }
31482
31483 void
31484 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31485 {
31486   machine_mode vmode = GET_MODE (target);
31487   unsigned int nelt = GET_MODE_NUNITS (vmode);
31488   bool one_vector_p = rtx_equal_p (op0, op1);
31489   rtx mask;
31490
31491   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31492      numbering of elements for big-endian, we must reverse the order.  */
31493   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31494
31495   /* The VTBL instruction does not use a modulo index, so we must take care
31496      of that ourselves.  */
31497   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31498   mask = gen_const_vec_duplicate (vmode, mask);
31499   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31500
31501   arm_expand_vec_perm_1 (target, op0, op1, sel);
31502 }
31503
31504 /* Map lane ordering between architectural lane order, and GCC lane order,
31505    taking into account ABI.  See comment above output_move_neon for details.  */
31506
31507 static int
31508 neon_endian_lane_map (machine_mode mode, int lane)
31509 {
31510   if (BYTES_BIG_ENDIAN)
31511   {
31512     int nelems = GET_MODE_NUNITS (mode);
31513     /* Reverse lane order.  */
31514     lane = (nelems - 1 - lane);
31515     /* Reverse D register order, to match ABI.  */
31516     if (GET_MODE_SIZE (mode) == 16)
31517       lane = lane ^ (nelems / 2);
31518   }
31519   return lane;
31520 }
31521
31522 /* Some permutations index into pairs of vectors, this is a helper function
31523    to map indexes into those pairs of vectors.  */
31524
31525 static int
31526 neon_pair_endian_lane_map (machine_mode mode, int lane)
31527 {
31528   int nelem = GET_MODE_NUNITS (mode);
31529   if (BYTES_BIG_ENDIAN)
31530     lane =
31531       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31532   return lane;
31533 }
31534
31535 /* Generate or test for an insn that supports a constant permutation.  */
31536
31537 /* Recognize patterns for the VUZP insns.  */
31538
31539 static bool
31540 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31541 {
31542   unsigned int i, odd, mask, nelt = d->perm.length ();
31543   rtx out0, out1, in0, in1;
31544   int first_elem;
31545   int swap_nelt;
31546
31547   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31548     return false;
31549
31550   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31551      big endian pattern on 64 bit vectors, so we correct for that.  */
31552   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31553     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31554
31555   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31556
31557   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31558     odd = 0;
31559   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31560     odd = 1;
31561   else
31562     return false;
31563   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31564
31565   for (i = 0; i < nelt; i++)
31566     {
31567       unsigned elt =
31568         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31569       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31570         return false;
31571     }
31572
31573   /* Success!  */
31574   if (d->testing_p)
31575     return true;
31576
31577   in0 = d->op0;
31578   in1 = d->op1;
31579   if (swap_nelt != 0)
31580     std::swap (in0, in1);
31581
31582   out0 = d->target;
31583   out1 = gen_reg_rtx (d->vmode);
31584   if (odd)
31585     std::swap (out0, out1);
31586
31587   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31588   return true;
31589 }
31590
31591 /* Recognize patterns for the VZIP insns.  */
31592
31593 static bool
31594 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31595 {
31596   unsigned int i, high, mask, nelt = d->perm.length ();
31597   rtx out0, out1, in0, in1;
31598   int first_elem;
31599   bool is_swapped;
31600
31601   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31602     return false;
31603
31604   is_swapped = BYTES_BIG_ENDIAN;
31605
31606   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31607
31608   high = nelt / 2;
31609   if (first_elem == neon_endian_lane_map (d->vmode, high))
31610     ;
31611   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31612     high = 0;
31613   else
31614     return false;
31615   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31616
31617   for (i = 0; i < nelt / 2; i++)
31618     {
31619       unsigned elt =
31620         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31621       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31622           != elt)
31623         return false;
31624       elt =
31625         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31626       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31627           != elt)
31628         return false;
31629     }
31630
31631   /* Success!  */
31632   if (d->testing_p)
31633     return true;
31634
31635   in0 = d->op0;
31636   in1 = d->op1;
31637   if (is_swapped)
31638     std::swap (in0, in1);
31639
31640   out0 = d->target;
31641   out1 = gen_reg_rtx (d->vmode);
31642   if (high)
31643     std::swap (out0, out1);
31644
31645   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31646   return true;
31647 }
31648
31649 /* Recognize patterns for the VREV insns.  */
31650 static bool
31651 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31652 {
31653   unsigned int i, j, diff, nelt = d->perm.length ();
31654   rtx (*gen) (machine_mode, rtx, rtx);
31655
31656   if (!d->one_vector_p)
31657     return false;
31658
31659   diff = d->perm[0];
31660   switch (diff)
31661     {
31662     case 7:
31663        switch (d->vmode)
31664         {
31665          case E_V16QImode:
31666          case E_V8QImode:
31667           gen = gen_neon_vrev64;
31668           break;
31669          default:
31670           return false;
31671         }
31672        break;
31673     case 3:
31674        switch (d->vmode)
31675         {
31676         case E_V16QImode:
31677         case E_V8QImode:
31678           gen = gen_neon_vrev32;
31679           break;
31680         case E_V8HImode:
31681         case E_V4HImode:
31682         case E_V8HFmode:
31683         case E_V4HFmode:
31684           gen = gen_neon_vrev64;
31685           break;
31686         default:
31687           return false;
31688         }
31689       break;
31690     case 1:
31691       switch (d->vmode)
31692         {
31693         case E_V16QImode:
31694         case E_V8QImode:
31695           gen = gen_neon_vrev16;
31696           break;
31697         case E_V8HImode:
31698         case E_V4HImode:
31699           gen = gen_neon_vrev32;
31700           break;
31701         case E_V4SImode:
31702         case E_V2SImode:
31703         case E_V4SFmode:
31704         case E_V2SFmode:
31705           gen = gen_neon_vrev64;
31706           break;
31707         default:
31708           return false;
31709         }
31710       break;
31711     default:
31712       return false;
31713     }
31714
31715   for (i = 0; i < nelt ; i += diff + 1)
31716     for (j = 0; j <= diff; j += 1)
31717       {
31718         /* This is guaranteed to be true as the value of diff
31719            is 7, 3, 1 and we should have enough elements in the
31720            queue to generate this. Getting a vector mask with a
31721            value of diff other than these values implies that
31722            something is wrong by the time we get here.  */
31723         gcc_assert (i + j < nelt);
31724         if (d->perm[i + j] != i + diff - j)
31725           return false;
31726       }
31727
31728   /* Success! */
31729   if (d->testing_p)
31730     return true;
31731
31732   emit_insn (gen (d->vmode, d->target, d->op0));
31733   return true;
31734 }
31735
31736 /* Recognize patterns for the VTRN insns.  */
31737
31738 static bool
31739 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31740 {
31741   unsigned int i, odd, mask, nelt = d->perm.length ();
31742   rtx out0, out1, in0, in1;
31743
31744   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31745     return false;
31746
31747   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31748   if (d->perm[0] == 0)
31749     odd = 0;
31750   else if (d->perm[0] == 1)
31751     odd = 1;
31752   else
31753     return false;
31754   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31755
31756   for (i = 0; i < nelt; i += 2)
31757     {
31758       if (d->perm[i] != i + odd)
31759         return false;
31760       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31761         return false;
31762     }
31763
31764   /* Success!  */
31765   if (d->testing_p)
31766     return true;
31767
31768   in0 = d->op0;
31769   in1 = d->op1;
31770   if (BYTES_BIG_ENDIAN)
31771     {
31772       std::swap (in0, in1);
31773       odd = !odd;
31774     }
31775
31776   out0 = d->target;
31777   out1 = gen_reg_rtx (d->vmode);
31778   if (odd)
31779     std::swap (out0, out1);
31780
31781   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31782   return true;
31783 }
31784
31785 /* Recognize patterns for the VEXT insns.  */
31786
31787 static bool
31788 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31789 {
31790   unsigned int i, nelt = d->perm.length ();
31791   rtx offset;
31792
31793   unsigned int location;
31794
31795   unsigned int next  = d->perm[0] + 1;
31796
31797   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31798   if (BYTES_BIG_ENDIAN)
31799     return false;
31800
31801   /* Check if the extracted indexes are increasing by one.  */
31802   for (i = 1; i < nelt; next++, i++)
31803     {
31804       /* If we hit the most significant element of the 2nd vector in
31805          the previous iteration, no need to test further.  */
31806       if (next == 2 * nelt)
31807         return false;
31808
31809       /* If we are operating on only one vector: it could be a
31810          rotation.  If there are only two elements of size < 64, let
31811          arm_evpc_neon_vrev catch it.  */
31812       if (d->one_vector_p && (next == nelt))
31813         {
31814           if ((nelt == 2) && (d->vmode != V2DImode))
31815             return false;
31816           else
31817             next = 0;
31818         }
31819
31820       if (d->perm[i] != next)
31821         return false;
31822     }
31823
31824   location = d->perm[0];
31825
31826   /* Success! */
31827   if (d->testing_p)
31828     return true;
31829
31830   offset = GEN_INT (location);
31831
31832   if(d->vmode == E_DImode)
31833     return false;
31834
31835   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31836   return true;
31837 }
31838
31839 /* The NEON VTBL instruction is a fully variable permuation that's even
31840    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31841    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31842    can do slightly better by expanding this as a constant where we don't
31843    have to apply a mask.  */
31844
31845 static bool
31846 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31847 {
31848   rtx rperm[MAX_VECT_LEN], sel;
31849   machine_mode vmode = d->vmode;
31850   unsigned int i, nelt = d->perm.length ();
31851
31852   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31853      numbering of elements for big-endian, we must reverse the order.  */
31854   if (BYTES_BIG_ENDIAN)
31855     return false;
31856
31857   if (d->testing_p)
31858     return true;
31859
31860   /* Generic code will try constant permutation twice.  Once with the
31861      original mode and again with the elements lowered to QImode.
31862      So wait and don't do the selector expansion ourselves.  */
31863   if (vmode != V8QImode && vmode != V16QImode)
31864     return false;
31865
31866   for (i = 0; i < nelt; ++i)
31867     rperm[i] = GEN_INT (d->perm[i]);
31868   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31869   sel = force_reg (vmode, sel);
31870
31871   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31872   return true;
31873 }
31874
31875 static bool
31876 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31877 {
31878   /* Check if the input mask matches vext before reordering the
31879      operands.  */
31880   if (TARGET_NEON)
31881     if (arm_evpc_neon_vext (d))
31882       return true;
31883
31884   /* The pattern matching functions above are written to look for a small
31885      number to begin the sequence (0, 1, N/2).  If we begin with an index
31886      from the second operand, we can swap the operands.  */
31887   unsigned int nelt = d->perm.length ();
31888   if (d->perm[0] >= nelt)
31889     {
31890       d->perm.rotate_inputs (1);
31891       std::swap (d->op0, d->op1);
31892     }
31893
31894   if (TARGET_NEON)
31895     {
31896       if (arm_evpc_neon_vuzp (d))
31897         return true;
31898       if (arm_evpc_neon_vzip (d))
31899         return true;
31900       if (arm_evpc_neon_vrev (d))
31901         return true;
31902       if (arm_evpc_neon_vtrn (d))
31903         return true;
31904       return arm_evpc_neon_vtbl (d);
31905     }
31906   return false;
31907 }
31908
31909 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31910
31911 static bool
31912 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
31913                               rtx target, rtx op0, rtx op1,
31914                               const vec_perm_indices &sel)
31915 {
31916   if (vmode != op_mode)
31917     return false;
31918
31919   struct expand_vec_perm_d d;
31920   int i, nelt, which;
31921
31922   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31923     return false;
31924
31925   d.target = target;
31926   if (op0)
31927     {
31928       rtx nop0 = force_reg (vmode, op0);
31929       if (op0 == op1)
31930         op1 = nop0;
31931       op0 = nop0;
31932     }
31933   if (op1)
31934     op1 = force_reg (vmode, op1);
31935   d.op0 = op0;
31936   d.op1 = op1;
31937
31938   d.vmode = vmode;
31939   gcc_assert (VECTOR_MODE_P (d.vmode));
31940   d.testing_p = !target;
31941
31942   nelt = GET_MODE_NUNITS (d.vmode);
31943   for (i = which = 0; i < nelt; ++i)
31944     {
31945       int ei = sel[i] & (2 * nelt - 1);
31946       which |= (ei < nelt ? 1 : 2);
31947     }
31948
31949   switch (which)
31950     {
31951     default:
31952       gcc_unreachable();
31953
31954     case 3:
31955       d.one_vector_p = false;
31956       if (d.testing_p || !rtx_equal_p (op0, op1))
31957         break;
31958
31959       /* The elements of PERM do not suggest that only the first operand
31960          is used, but both operands are identical.  Allow easier matching
31961          of the permutation by folding the permutation into the single
31962          input vector.  */
31963       /* FALLTHRU */
31964     case 2:
31965       d.op0 = op1;
31966       d.one_vector_p = true;
31967       break;
31968
31969     case 1:
31970       d.op1 = op0;
31971       d.one_vector_p = true;
31972       break;
31973     }
31974
31975   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31976
31977   if (!d.testing_p)
31978     return arm_expand_vec_perm_const_1 (&d);
31979
31980   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31981   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31982   if (!d.one_vector_p)
31983     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31984
31985   start_sequence ();
31986   bool ret = arm_expand_vec_perm_const_1 (&d);
31987   end_sequence ();
31988
31989   return ret;
31990 }
31991
31992 bool
31993 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31994 {
31995   /* If we are soft float and we do not have ldrd
31996      then all auto increment forms are ok.  */
31997   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31998     return true;
31999
32000   switch (code)
32001     {
32002       /* Post increment and Pre Decrement are supported for all
32003          instruction forms except for vector forms.  */
32004     case ARM_POST_INC:
32005     case ARM_PRE_DEC:
32006       if (VECTOR_MODE_P (mode))
32007         {
32008           if (code != ARM_PRE_DEC)
32009             return true;
32010           else
32011             return false;
32012         }
32013
32014       return true;
32015
32016     case ARM_POST_DEC:
32017     case ARM_PRE_INC:
32018       /* Without LDRD and mode size greater than
32019          word size, there is no point in auto-incrementing
32020          because ldm and stm will not have these forms.  */
32021       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32022         return false;
32023
32024       /* Vector and floating point modes do not support
32025          these auto increment forms.  */
32026       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32027         return false;
32028
32029       return true;
32030
32031     default:
32032       return false;
32033
32034     }
32035
32036   return false;
32037 }
32038
32039 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32040    on ARM, since we know that shifts by negative amounts are no-ops.
32041    Additionally, the default expansion code is not available or suitable
32042    for post-reload insn splits (this can occur when the register allocator
32043    chooses not to do a shift in NEON).
32044
32045    This function is used in both initial expand and post-reload splits, and
32046    handles all kinds of 64-bit shifts.
32047
32048    Input requirements:
32049     - It is safe for the input and output to be the same register, but
32050       early-clobber rules apply for the shift amount and scratch registers.
32051     - Shift by register requires both scratch registers.  In all other cases
32052       the scratch registers may be NULL.
32053     - Ashiftrt by a register also clobbers the CC register.  */
32054 void
32055 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32056                                rtx amount, rtx scratch1, rtx scratch2)
32057 {
32058   rtx out_high = gen_highpart (SImode, out);
32059   rtx out_low = gen_lowpart (SImode, out);
32060   rtx in_high = gen_highpart (SImode, in);
32061   rtx in_low = gen_lowpart (SImode, in);
32062
32063   /* Terminology:
32064         in = the register pair containing the input value.
32065         out = the destination register pair.
32066         up = the high- or low-part of each pair.
32067         down = the opposite part to "up".
32068      In a shift, we can consider bits to shift from "up"-stream to
32069      "down"-stream, so in a left-shift "up" is the low-part and "down"
32070      is the high-part of each register pair.  */
32071
32072   rtx out_up   = code == ASHIFT ? out_low : out_high;
32073   rtx out_down = code == ASHIFT ? out_high : out_low;
32074   rtx in_up   = code == ASHIFT ? in_low : in_high;
32075   rtx in_down = code == ASHIFT ? in_high : in_low;
32076
32077   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32078   gcc_assert (out
32079               && (REG_P (out) || SUBREG_P (out))
32080               && GET_MODE (out) == DImode);
32081   gcc_assert (in
32082               && (REG_P (in) || SUBREG_P (in))
32083               && GET_MODE (in) == DImode);
32084   gcc_assert (amount
32085               && (((REG_P (amount) || SUBREG_P (amount))
32086                    && GET_MODE (amount) == SImode)
32087                   || CONST_INT_P (amount)));
32088   gcc_assert (scratch1 == NULL
32089               || (GET_CODE (scratch1) == SCRATCH)
32090               || (GET_MODE (scratch1) == SImode
32091                   && REG_P (scratch1)));
32092   gcc_assert (scratch2 == NULL
32093               || (GET_CODE (scratch2) == SCRATCH)
32094               || (GET_MODE (scratch2) == SImode
32095                   && REG_P (scratch2)));
32096   gcc_assert (!REG_P (out) || !REG_P (amount)
32097               || !HARD_REGISTER_P (out)
32098               || (REGNO (out) != REGNO (amount)
32099                   && REGNO (out) + 1 != REGNO (amount)));
32100
32101   /* Macros to make following code more readable.  */
32102   #define SUB_32(DEST,SRC) \
32103             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32104   #define RSB_32(DEST,SRC) \
32105             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32106   #define SUB_S_32(DEST,SRC) \
32107             gen_addsi3_compare0 ((DEST), (SRC), \
32108                                  GEN_INT (-32))
32109   #define SET(DEST,SRC) \
32110             gen_rtx_SET ((DEST), (SRC))
32111   #define SHIFT(CODE,SRC,AMOUNT) \
32112             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32113   #define LSHIFT(CODE,SRC,AMOUNT) \
32114             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32115                             SImode, (SRC), (AMOUNT))
32116   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32117             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32118                             SImode, (SRC), (AMOUNT))
32119   #define ORR(A,B) \
32120             gen_rtx_IOR (SImode, (A), (B))
32121   #define BRANCH(COND,LABEL) \
32122             gen_arm_cond_branch ((LABEL), \
32123                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32124                                                    const0_rtx), \
32125                                  cc_reg)
32126
32127   /* Shifts by register and shifts by constant are handled separately.  */
32128   if (CONST_INT_P (amount))
32129     {
32130       /* We have a shift-by-constant.  */
32131
32132       /* First, handle out-of-range shift amounts.
32133          In both cases we try to match the result an ARM instruction in a
32134          shift-by-register would give.  This helps reduce execution
32135          differences between optimization levels, but it won't stop other
32136          parts of the compiler doing different things.  This is "undefined
32137          behavior, in any case.  */
32138       if (INTVAL (amount) <= 0)
32139         emit_insn (gen_movdi (out, in));
32140       else if (INTVAL (amount) >= 64)
32141         {
32142           if (code == ASHIFTRT)
32143             {
32144               rtx const31_rtx = GEN_INT (31);
32145               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32146               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32147             }
32148           else
32149             emit_insn (gen_movdi (out, const0_rtx));
32150         }
32151
32152       /* Now handle valid shifts. */
32153       else if (INTVAL (amount) < 32)
32154         {
32155           /* Shifts by a constant less than 32.  */
32156           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32157
32158           /* Clearing the out register in DImode first avoids lots
32159              of spilling and results in less stack usage.
32160              Later this redundant insn is completely removed.
32161              Do that only if "in" and "out" are different registers.  */
32162           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32163             emit_insn (SET (out, const0_rtx));
32164           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32165           emit_insn (SET (out_down,
32166                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32167                                out_down)));
32168           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32169         }
32170       else
32171         {
32172           /* Shifts by a constant greater than 31.  */
32173           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32174
32175           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32176             emit_insn (SET (out, const0_rtx));
32177           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32178           if (code == ASHIFTRT)
32179             emit_insn (gen_ashrsi3 (out_up, in_up,
32180                                     GEN_INT (31)));
32181           else
32182             emit_insn (SET (out_up, const0_rtx));
32183         }
32184     }
32185   else
32186     {
32187       /* We have a shift-by-register.  */
32188       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32189
32190       /* This alternative requires the scratch registers.  */
32191       gcc_assert (scratch1 && REG_P (scratch1));
32192       gcc_assert (scratch2 && REG_P (scratch2));
32193
32194       /* We will need the values "amount-32" and "32-amount" later.
32195          Swapping them around now allows the later code to be more general. */
32196       switch (code)
32197         {
32198         case ASHIFT:
32199           emit_insn (SUB_32 (scratch1, amount));
32200           emit_insn (RSB_32 (scratch2, amount));
32201           break;
32202         case ASHIFTRT:
32203           emit_insn (RSB_32 (scratch1, amount));
32204           /* Also set CC = amount > 32.  */
32205           emit_insn (SUB_S_32 (scratch2, amount));
32206           break;
32207         case LSHIFTRT:
32208           emit_insn (RSB_32 (scratch1, amount));
32209           emit_insn (SUB_32 (scratch2, amount));
32210           break;
32211         default:
32212           gcc_unreachable ();
32213         }
32214
32215       /* Emit code like this:
32216
32217          arithmetic-left:
32218             out_down = in_down << amount;
32219             out_down = (in_up << (amount - 32)) | out_down;
32220             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32221             out_up = in_up << amount;
32222
32223          arithmetic-right:
32224             out_down = in_down >> amount;
32225             out_down = (in_up << (32 - amount)) | out_down;
32226             if (amount < 32)
32227               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32228             out_up = in_up << amount;
32229
32230          logical-right:
32231             out_down = in_down >> amount;
32232             out_down = (in_up << (32 - amount)) | out_down;
32233             if (amount < 32)
32234               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32235             out_up = in_up << amount;
32236
32237           The ARM and Thumb2 variants are the same but implemented slightly
32238           differently.  If this were only called during expand we could just
32239           use the Thumb2 case and let combine do the right thing, but this
32240           can also be called from post-reload splitters.  */
32241
32242       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32243
32244       if (!TARGET_THUMB2)
32245         {
32246           /* Emit code for ARM mode.  */
32247           emit_insn (SET (out_down,
32248                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32249           if (code == ASHIFTRT)
32250             {
32251               rtx_code_label *done_label = gen_label_rtx ();
32252               emit_jump_insn (BRANCH (LT, done_label));
32253               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32254                                              out_down)));
32255               emit_label (done_label);
32256             }
32257           else
32258             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32259                                            out_down)));
32260         }
32261       else
32262         {
32263           /* Emit code for Thumb2 mode.
32264              Thumb2 can't do shift and or in one insn.  */
32265           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32266           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32267
32268           if (code == ASHIFTRT)
32269             {
32270               rtx_code_label *done_label = gen_label_rtx ();
32271               emit_jump_insn (BRANCH (LT, done_label));
32272               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32273               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32274               emit_label (done_label);
32275             }
32276           else
32277             {
32278               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32279               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32280             }
32281         }
32282
32283       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32284     }
32285
32286   #undef SUB_32
32287   #undef RSB_32
32288   #undef SUB_S_32
32289   #undef SET
32290   #undef SHIFT
32291   #undef LSHIFT
32292   #undef REV_LSHIFT
32293   #undef ORR
32294   #undef BRANCH
32295 }
32296
32297 /* Returns true if the pattern is a valid symbolic address, which is either a
32298    symbol_ref or (symbol_ref + addend).
32299
32300    According to the ARM ELF ABI, the initial addend of REL-type relocations
32301    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32302    literal field of the instruction as a 16-bit signed value in the range
32303    -32768 <= A < 32768.
32304
32305    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32306    unsigned range of 0 <= A < 256 as described in the AAELF32
32307    relocation handling documentation: REL-type relocations are encoded
32308    as unsigned in this case.  */
32309
32310 bool
32311 arm_valid_symbolic_address_p (rtx addr)
32312 {
32313   rtx xop0, xop1 = NULL_RTX;
32314   rtx tmp = addr;
32315
32316   if (target_word_relocations)
32317     return false;
32318
32319   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32320     return true;
32321
32322   /* (const (plus: symbol_ref const_int))  */
32323   if (GET_CODE (addr) == CONST)
32324     tmp = XEXP (addr, 0);
32325
32326   if (GET_CODE (tmp) == PLUS)
32327     {
32328       xop0 = XEXP (tmp, 0);
32329       xop1 = XEXP (tmp, 1);
32330
32331       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32332         {
32333           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32334             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32335           else
32336             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32337         }
32338     }
32339
32340   return false;
32341 }
32342
32343 /* Returns true if a valid comparison operation and makes
32344    the operands in a form that is valid.  */
32345 bool
32346 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32347 {
32348   enum rtx_code code = GET_CODE (*comparison);
32349   int code_int;
32350   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32351     ? GET_MODE (*op2) : GET_MODE (*op1);
32352
32353   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32354
32355   if (code == UNEQ || code == LTGT)
32356     return false;
32357
32358   code_int = (int)code;
32359   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32360   PUT_CODE (*comparison, (enum rtx_code)code_int);
32361
32362   switch (mode)
32363     {
32364     case E_SImode:
32365       if (!arm_add_operand (*op1, mode))
32366         *op1 = force_reg (mode, *op1);
32367       if (!arm_add_operand (*op2, mode))
32368         *op2 = force_reg (mode, *op2);
32369       return true;
32370
32371     case E_DImode:
32372       /* gen_compare_reg() will sort out any invalid operands.  */
32373       return true;
32374
32375     case E_HFmode:
32376       if (!TARGET_VFP_FP16INST)
32377         break;
32378       /* FP16 comparisons are done in SF mode.  */
32379       mode = SFmode;
32380       *op1 = convert_to_mode (mode, *op1, 1);
32381       *op2 = convert_to_mode (mode, *op2, 1);
32382       /* Fall through.  */
32383     case E_SFmode:
32384     case E_DFmode:
32385       if (!vfp_compare_operand (*op1, mode))
32386         *op1 = force_reg (mode, *op1);
32387       if (!vfp_compare_operand (*op2, mode))
32388         *op2 = force_reg (mode, *op2);
32389       return true;
32390     default:
32391       break;
32392     }
32393
32394   return false;
32395
32396 }
32397
32398 /* Maximum number of instructions to set block of memory.  */
32399 static int
32400 arm_block_set_max_insns (void)
32401 {
32402   if (optimize_function_for_size_p (cfun))
32403     return 4;
32404   else
32405     return current_tune->max_insns_inline_memset;
32406 }
32407
32408 /* Return TRUE if it's profitable to set block of memory for
32409    non-vectorized case.  VAL is the value to set the memory
32410    with.  LENGTH is the number of bytes to set.  ALIGN is the
32411    alignment of the destination memory in bytes.  UNALIGNED_P
32412    is TRUE if we can only set the memory with instructions
32413    meeting alignment requirements.  USE_STRD_P is TRUE if we
32414    can use strd to set the memory.  */
32415 static bool
32416 arm_block_set_non_vect_profit_p (rtx val,
32417                                  unsigned HOST_WIDE_INT length,
32418                                  unsigned HOST_WIDE_INT align,
32419                                  bool unaligned_p, bool use_strd_p)
32420 {
32421   int num = 0;
32422   /* For leftovers in bytes of 0-7, we can set the memory block using
32423      strb/strh/str with minimum instruction number.  */
32424   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32425
32426   if (unaligned_p)
32427     {
32428       num = arm_const_inline_cost (SET, val);
32429       num += length / align + length % align;
32430     }
32431   else if (use_strd_p)
32432     {
32433       num = arm_const_double_inline_cost (val);
32434       num += (length >> 3) + leftover[length & 7];
32435     }
32436   else
32437     {
32438       num = arm_const_inline_cost (SET, val);
32439       num += (length >> 2) + leftover[length & 3];
32440     }
32441
32442   /* We may be able to combine last pair STRH/STRB into a single STR
32443      by shifting one byte back.  */
32444   if (unaligned_access && length > 3 && (length & 3) == 3)
32445     num--;
32446
32447   return (num <= arm_block_set_max_insns ());
32448 }
32449
32450 /* Return TRUE if it's profitable to set block of memory for
32451    vectorized case.  LENGTH is the number of bytes to set.
32452    ALIGN is the alignment of destination memory in bytes.
32453    MODE is the vector mode used to set the memory.  */
32454 static bool
32455 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32456                              unsigned HOST_WIDE_INT align,
32457                              machine_mode mode)
32458 {
32459   int num;
32460   bool unaligned_p = ((align & 3) != 0);
32461   unsigned int nelt = GET_MODE_NUNITS (mode);
32462
32463   /* Instruction loading constant value.  */
32464   num = 1;
32465   /* Instructions storing the memory.  */
32466   num += (length + nelt - 1) / nelt;
32467   /* Instructions adjusting the address expression.  Only need to
32468      adjust address expression if it's 4 bytes aligned and bytes
32469      leftover can only be stored by mis-aligned store instruction.  */
32470   if (!unaligned_p && (length & 3) != 0)
32471     num++;
32472
32473   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32474   if (!unaligned_p && mode == V16QImode)
32475     num--;
32476
32477   return (num <= arm_block_set_max_insns ());
32478 }
32479
32480 /* Set a block of memory using vectorization instructions for the
32481    unaligned case.  We fill the first LENGTH bytes of the memory
32482    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32483    the alignment requirement of memory.  Return TRUE if succeeded.  */
32484 static bool
32485 arm_block_set_unaligned_vect (rtx dstbase,
32486                               unsigned HOST_WIDE_INT length,
32487                               unsigned HOST_WIDE_INT value,
32488                               unsigned HOST_WIDE_INT align)
32489 {
32490   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32491   rtx dst, mem;
32492   rtx val_vec, reg;
32493   rtx (*gen_func) (rtx, rtx);
32494   machine_mode mode;
32495   unsigned HOST_WIDE_INT v = value;
32496   unsigned int offset = 0;
32497   gcc_assert ((align & 0x3) != 0);
32498   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32499   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32500   if (length >= nelt_v16)
32501     {
32502       mode = V16QImode;
32503       gen_func = gen_movmisalignv16qi;
32504     }
32505   else
32506     {
32507       mode = V8QImode;
32508       gen_func = gen_movmisalignv8qi;
32509     }
32510   nelt_mode = GET_MODE_NUNITS (mode);
32511   gcc_assert (length >= nelt_mode);
32512   /* Skip if it isn't profitable.  */
32513   if (!arm_block_set_vect_profit_p (length, align, mode))
32514     return false;
32515
32516   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32517   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32518
32519   v = sext_hwi (v, BITS_PER_WORD);
32520
32521   reg = gen_reg_rtx (mode);
32522   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32523   /* Emit instruction loading the constant value.  */
32524   emit_move_insn (reg, val_vec);
32525
32526   /* Handle nelt_mode bytes in a vector.  */
32527   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32528     {
32529       emit_insn ((*gen_func) (mem, reg));
32530       if (i + 2 * nelt_mode <= length)
32531         {
32532           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32533           offset += nelt_mode;
32534           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32535         }
32536     }
32537
32538   /* If there are not less than nelt_v8 bytes leftover, we must be in
32539      V16QI mode.  */
32540   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32541
32542   /* Handle (8, 16) bytes leftover.  */
32543   if (i + nelt_v8 < length)
32544     {
32545       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32546       offset += length - i;
32547       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32548
32549       /* We are shifting bytes back, set the alignment accordingly.  */
32550       if ((length & 1) != 0 && align >= 2)
32551         set_mem_align (mem, BITS_PER_UNIT);
32552
32553       emit_insn (gen_movmisalignv16qi (mem, reg));
32554     }
32555   /* Handle (0, 8] bytes leftover.  */
32556   else if (i < length && i + nelt_v8 >= length)
32557     {
32558       if (mode == V16QImode)
32559         reg = gen_lowpart (V8QImode, reg);
32560
32561       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32562                                               + (nelt_mode - nelt_v8))));
32563       offset += (length - i) + (nelt_mode - nelt_v8);
32564       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32565
32566       /* We are shifting bytes back, set the alignment accordingly.  */
32567       if ((length & 1) != 0 && align >= 2)
32568         set_mem_align (mem, BITS_PER_UNIT);
32569
32570       emit_insn (gen_movmisalignv8qi (mem, reg));
32571     }
32572
32573   return true;
32574 }
32575
32576 /* Set a block of memory using vectorization instructions for the
32577    aligned case.  We fill the first LENGTH bytes of the memory area
32578    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32579    alignment requirement of memory.  Return TRUE if succeeded.  */
32580 static bool
32581 arm_block_set_aligned_vect (rtx dstbase,
32582                             unsigned HOST_WIDE_INT length,
32583                             unsigned HOST_WIDE_INT value,
32584                             unsigned HOST_WIDE_INT align)
32585 {
32586   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32587   rtx dst, addr, mem;
32588   rtx val_vec, reg;
32589   machine_mode mode;
32590   unsigned int offset = 0;
32591
32592   gcc_assert ((align & 0x3) == 0);
32593   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32594   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32595   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32596     mode = V16QImode;
32597   else
32598     mode = V8QImode;
32599
32600   nelt_mode = GET_MODE_NUNITS (mode);
32601   gcc_assert (length >= nelt_mode);
32602   /* Skip if it isn't profitable.  */
32603   if (!arm_block_set_vect_profit_p (length, align, mode))
32604     return false;
32605
32606   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32607
32608   reg = gen_reg_rtx (mode);
32609   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32610   /* Emit instruction loading the constant value.  */
32611   emit_move_insn (reg, val_vec);
32612
32613   i = 0;
32614   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32615   if (mode == V16QImode)
32616     {
32617       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32618       emit_insn (gen_movmisalignv16qi (mem, reg));
32619       i += nelt_mode;
32620       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32621       if (i + nelt_v8 < length && i + nelt_v16 > length)
32622         {
32623           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32624           offset += length - nelt_mode;
32625           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32626           /* We are shifting bytes back, set the alignment accordingly.  */
32627           if ((length & 0x3) == 0)
32628             set_mem_align (mem, BITS_PER_UNIT * 4);
32629           else if ((length & 0x1) == 0)
32630             set_mem_align (mem, BITS_PER_UNIT * 2);
32631           else
32632             set_mem_align (mem, BITS_PER_UNIT);
32633
32634           emit_insn (gen_movmisalignv16qi (mem, reg));
32635           return true;
32636         }
32637       /* Fall through for bytes leftover.  */
32638       mode = V8QImode;
32639       nelt_mode = GET_MODE_NUNITS (mode);
32640       reg = gen_lowpart (V8QImode, reg);
32641     }
32642
32643   /* Handle 8 bytes in a vector.  */
32644   for (; (i + nelt_mode <= length); i += nelt_mode)
32645     {
32646       addr = plus_constant (Pmode, dst, i);
32647       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32648       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32649         emit_move_insn (mem, reg);
32650       else
32651         emit_insn (gen_unaligned_storev8qi (mem, reg));
32652     }
32653
32654   /* Handle single word leftover by shifting 4 bytes back.  We can
32655      use aligned access for this case.  */
32656   if (i + UNITS_PER_WORD == length)
32657     {
32658       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32659       offset += i - UNITS_PER_WORD;
32660       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32661       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32662       if (align > UNITS_PER_WORD)
32663         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32664
32665       emit_insn (gen_unaligned_storev8qi (mem, reg));
32666     }
32667   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32668      We have to use unaligned access for this case.  */
32669   else if (i < length)
32670     {
32671       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32672       offset += length - nelt_mode;
32673       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32674       /* We are shifting bytes back, set the alignment accordingly.  */
32675       if ((length & 1) == 0)
32676         set_mem_align (mem, BITS_PER_UNIT * 2);
32677       else
32678         set_mem_align (mem, BITS_PER_UNIT);
32679
32680       emit_insn (gen_movmisalignv8qi (mem, reg));
32681     }
32682
32683   return true;
32684 }
32685
32686 /* Set a block of memory using plain strh/strb instructions, only
32687    using instructions allowed by ALIGN on processor.  We fill the
32688    first LENGTH bytes of the memory area starting from DSTBASE
32689    with byte constant VALUE.  ALIGN is the alignment requirement
32690    of memory.  */
32691 static bool
32692 arm_block_set_unaligned_non_vect (rtx dstbase,
32693                                   unsigned HOST_WIDE_INT length,
32694                                   unsigned HOST_WIDE_INT value,
32695                                   unsigned HOST_WIDE_INT align)
32696 {
32697   unsigned int i;
32698   rtx dst, addr, mem;
32699   rtx val_exp, val_reg, reg;
32700   machine_mode mode;
32701   HOST_WIDE_INT v = value;
32702
32703   gcc_assert (align == 1 || align == 2);
32704
32705   if (align == 2)
32706     v |= (value << BITS_PER_UNIT);
32707
32708   v = sext_hwi (v, BITS_PER_WORD);
32709   val_exp = GEN_INT (v);
32710   /* Skip if it isn't profitable.  */
32711   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32712                                         align, true, false))
32713     return false;
32714
32715   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32716   mode = (align == 2 ? HImode : QImode);
32717   val_reg = force_reg (SImode, val_exp);
32718   reg = gen_lowpart (mode, val_reg);
32719
32720   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32721     {
32722       addr = plus_constant (Pmode, dst, i);
32723       mem = adjust_automodify_address (dstbase, mode, addr, i);
32724       emit_move_insn (mem, reg);
32725     }
32726
32727   /* Handle single byte leftover.  */
32728   if (i + 1 == length)
32729     {
32730       reg = gen_lowpart (QImode, val_reg);
32731       addr = plus_constant (Pmode, dst, i);
32732       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32733       emit_move_insn (mem, reg);
32734       i++;
32735     }
32736
32737   gcc_assert (i == length);
32738   return true;
32739 }
32740
32741 /* Set a block of memory using plain strd/str/strh/strb instructions,
32742    to permit unaligned copies on processors which support unaligned
32743    semantics for those instructions.  We fill the first LENGTH bytes
32744    of the memory area starting from DSTBASE with byte constant VALUE.
32745    ALIGN is the alignment requirement of memory.  */
32746 static bool
32747 arm_block_set_aligned_non_vect (rtx dstbase,
32748                                 unsigned HOST_WIDE_INT length,
32749                                 unsigned HOST_WIDE_INT value,
32750                                 unsigned HOST_WIDE_INT align)
32751 {
32752   unsigned int i;
32753   rtx dst, addr, mem;
32754   rtx val_exp, val_reg, reg;
32755   unsigned HOST_WIDE_INT v;
32756   bool use_strd_p;
32757
32758   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32759                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32760
32761   v = (value | (value << 8) | (value << 16) | (value << 24));
32762   if (length < UNITS_PER_WORD)
32763     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32764
32765   if (use_strd_p)
32766     v |= (v << BITS_PER_WORD);
32767   else
32768     v = sext_hwi (v, BITS_PER_WORD);
32769
32770   val_exp = GEN_INT (v);
32771   /* Skip if it isn't profitable.  */
32772   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32773                                         align, false, use_strd_p))
32774     {
32775       if (!use_strd_p)
32776         return false;
32777
32778       /* Try without strd.  */
32779       v = (v >> BITS_PER_WORD);
32780       v = sext_hwi (v, BITS_PER_WORD);
32781       val_exp = GEN_INT (v);
32782       use_strd_p = false;
32783       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32784                                             align, false, use_strd_p))
32785         return false;
32786     }
32787
32788   i = 0;
32789   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32790   /* Handle double words using strd if possible.  */
32791   if (use_strd_p)
32792     {
32793       val_reg = force_reg (DImode, val_exp);
32794       reg = val_reg;
32795       for (; (i + 8 <= length); i += 8)
32796         {
32797           addr = plus_constant (Pmode, dst, i);
32798           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32799           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32800             emit_move_insn (mem, reg);
32801           else
32802             emit_insn (gen_unaligned_storedi (mem, reg));
32803         }
32804     }
32805   else
32806     val_reg = force_reg (SImode, val_exp);
32807
32808   /* Handle words.  */
32809   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32810   for (; (i + 4 <= length); i += 4)
32811     {
32812       addr = plus_constant (Pmode, dst, i);
32813       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32814       if ((align & 3) == 0)
32815         emit_move_insn (mem, reg);
32816       else
32817         emit_insn (gen_unaligned_storesi (mem, reg));
32818     }
32819
32820   /* Merge last pair of STRH and STRB into a STR if possible.  */
32821   if (unaligned_access && i > 0 && (i + 3) == length)
32822     {
32823       addr = plus_constant (Pmode, dst, i - 1);
32824       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32825       /* We are shifting one byte back, set the alignment accordingly.  */
32826       if ((align & 1) == 0)
32827         set_mem_align (mem, BITS_PER_UNIT);
32828
32829       /* Most likely this is an unaligned access, and we can't tell at
32830          compilation time.  */
32831       emit_insn (gen_unaligned_storesi (mem, reg));
32832       return true;
32833     }
32834
32835   /* Handle half word leftover.  */
32836   if (i + 2 <= length)
32837     {
32838       reg = gen_lowpart (HImode, val_reg);
32839       addr = plus_constant (Pmode, dst, i);
32840       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32841       if ((align & 1) == 0)
32842         emit_move_insn (mem, reg);
32843       else
32844         emit_insn (gen_unaligned_storehi (mem, reg));
32845
32846       i += 2;
32847     }
32848
32849   /* Handle single byte leftover.  */
32850   if (i + 1 == length)
32851     {
32852       reg = gen_lowpart (QImode, val_reg);
32853       addr = plus_constant (Pmode, dst, i);
32854       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32855       emit_move_insn (mem, reg);
32856     }
32857
32858   return true;
32859 }
32860
32861 /* Set a block of memory using vectorization instructions for both
32862    aligned and unaligned cases.  We fill the first LENGTH bytes of
32863    the memory area starting from DSTBASE with byte constant VALUE.
32864    ALIGN is the alignment requirement of memory.  */
32865 static bool
32866 arm_block_set_vect (rtx dstbase,
32867                     unsigned HOST_WIDE_INT length,
32868                     unsigned HOST_WIDE_INT value,
32869                     unsigned HOST_WIDE_INT align)
32870 {
32871   /* Check whether we need to use unaligned store instruction.  */
32872   if (((align & 3) != 0 || (length & 3) != 0)
32873       /* Check whether unaligned store instruction is available.  */
32874       && (!unaligned_access || BYTES_BIG_ENDIAN))
32875     return false;
32876
32877   if ((align & 3) == 0)
32878     return arm_block_set_aligned_vect (dstbase, length, value, align);
32879   else
32880     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32881 }
32882
32883 /* Expand string store operation.  Firstly we try to do that by using
32884    vectorization instructions, then try with ARM unaligned access and
32885    double-word store if profitable.  OPERANDS[0] is the destination,
32886    OPERANDS[1] is the number of bytes, operands[2] is the value to
32887    initialize the memory, OPERANDS[3] is the known alignment of the
32888    destination.  */
32889 bool
32890 arm_gen_setmem (rtx *operands)
32891 {
32892   rtx dstbase = operands[0];
32893   unsigned HOST_WIDE_INT length;
32894   unsigned HOST_WIDE_INT value;
32895   unsigned HOST_WIDE_INT align;
32896
32897   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32898     return false;
32899
32900   length = UINTVAL (operands[1]);
32901   if (length > 64)
32902     return false;
32903
32904   value = (UINTVAL (operands[2]) & 0xFF);
32905   align = UINTVAL (operands[3]);
32906   if (TARGET_NEON && length >= 8
32907       && current_tune->string_ops_prefer_neon
32908       && arm_block_set_vect (dstbase, length, value, align))
32909     return true;
32910
32911   if (!unaligned_access && (align & 3) != 0)
32912     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32913
32914   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32915 }
32916
32917
32918 static bool
32919 arm_macro_fusion_p (void)
32920 {
32921   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32922 }
32923
32924 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32925    for MOVW / MOVT macro fusion.  */
32926
32927 static bool
32928 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32929 {
32930   /* We are trying to fuse
32931      movw imm / movt imm
32932     instructions as a group that gets scheduled together.  */
32933
32934   rtx set_dest = SET_DEST (curr_set);
32935
32936   if (GET_MODE (set_dest) != SImode)
32937     return false;
32938
32939   /* We are trying to match:
32940      prev (movw)  == (set (reg r0) (const_int imm16))
32941      curr (movt) == (set (zero_extract (reg r0)
32942                                         (const_int 16)
32943                                         (const_int 16))
32944                           (const_int imm16_1))
32945      or
32946      prev (movw) == (set (reg r1)
32947                           (high (symbol_ref ("SYM"))))
32948     curr (movt) == (set (reg r0)
32949                         (lo_sum (reg r1)
32950                                 (symbol_ref ("SYM"))))  */
32951
32952     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32953       {
32954         if (CONST_INT_P (SET_SRC (curr_set))
32955             && CONST_INT_P (SET_SRC (prev_set))
32956             && REG_P (XEXP (set_dest, 0))
32957             && REG_P (SET_DEST (prev_set))
32958             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32959           return true;
32960
32961       }
32962     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32963              && REG_P (SET_DEST (curr_set))
32964              && REG_P (SET_DEST (prev_set))
32965              && GET_CODE (SET_SRC (prev_set)) == HIGH
32966              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32967       return true;
32968
32969   return false;
32970 }
32971
32972 static bool
32973 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32974 {
32975   rtx prev_set = single_set (prev);
32976   rtx curr_set = single_set (curr);
32977
32978   if (!prev_set
32979       || !curr_set)
32980     return false;
32981
32982   if (any_condjump_p (curr))
32983     return false;
32984
32985   if (!arm_macro_fusion_p ())
32986     return false;
32987
32988   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32989       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32990     return true;
32991
32992   return false;
32993 }
32994
32995 /* Return true iff the instruction fusion described by OP is enabled.  */
32996 bool
32997 arm_fusion_enabled_p (tune_params::fuse_ops op)
32998 {
32999   return current_tune->fusible_ops & op;
33000 }
33001
33002 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33003    scheduled for speculative execution.  Reject the long-running division
33004    and square-root instructions.  */
33005
33006 static bool
33007 arm_sched_can_speculate_insn (rtx_insn *insn)
33008 {
33009   switch (get_attr_type (insn))
33010     {
33011       case TYPE_SDIV:
33012       case TYPE_UDIV:
33013       case TYPE_FDIVS:
33014       case TYPE_FDIVD:
33015       case TYPE_FSQRTS:
33016       case TYPE_FSQRTD:
33017       case TYPE_NEON_FP_SQRT_S:
33018       case TYPE_NEON_FP_SQRT_D:
33019       case TYPE_NEON_FP_SQRT_S_Q:
33020       case TYPE_NEON_FP_SQRT_D_Q:
33021       case TYPE_NEON_FP_DIV_S:
33022       case TYPE_NEON_FP_DIV_D:
33023       case TYPE_NEON_FP_DIV_S_Q:
33024       case TYPE_NEON_FP_DIV_D_Q:
33025         return false;
33026       default:
33027         return true;
33028     }
33029 }
33030
33031 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33032
33033 static unsigned HOST_WIDE_INT
33034 arm_asan_shadow_offset (void)
33035 {
33036   return HOST_WIDE_INT_1U << 29;
33037 }
33038
33039
33040 /* This is a temporary fix for PR60655.  Ideally we need
33041    to handle most of these cases in the generic part but
33042    currently we reject minus (..) (sym_ref).  We try to
33043    ameliorate the case with minus (sym_ref1) (sym_ref2)
33044    where they are in the same section.  */
33045
33046 static bool
33047 arm_const_not_ok_for_debug_p (rtx p)
33048 {
33049   tree decl_op0 = NULL;
33050   tree decl_op1 = NULL;
33051
33052   if (GET_CODE (p) == UNSPEC)
33053     return true;
33054   if (GET_CODE (p) == MINUS)
33055     {
33056       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33057         {
33058           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33059           if (decl_op1
33060               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33061               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33062             {
33063               if ((VAR_P (decl_op1)
33064                    || TREE_CODE (decl_op1) == CONST_DECL)
33065                   && (VAR_P (decl_op0)
33066                       || TREE_CODE (decl_op0) == CONST_DECL))
33067                 return (get_variable_section (decl_op1, false)
33068                         != get_variable_section (decl_op0, false));
33069
33070               if (TREE_CODE (decl_op1) == LABEL_DECL
33071                   && TREE_CODE (decl_op0) == LABEL_DECL)
33072                 return (DECL_CONTEXT (decl_op1)
33073                         != DECL_CONTEXT (decl_op0));
33074             }
33075
33076           return true;
33077         }
33078     }
33079
33080   return false;
33081 }
33082
33083 /* return TRUE if x is a reference to a value in a constant pool */
33084 extern bool
33085 arm_is_constant_pool_ref (rtx x)
33086 {
33087   return (MEM_P (x)
33088           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33089           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33090 }
33091
33092 /* Remember the last target of arm_set_current_function.  */
33093 static GTY(()) tree arm_previous_fndecl;
33094
33095 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33096
33097 void
33098 save_restore_target_globals (tree new_tree)
33099 {
33100   /* If we have a previous state, use it.  */
33101   if (TREE_TARGET_GLOBALS (new_tree))
33102     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33103   else if (new_tree == target_option_default_node)
33104     restore_target_globals (&default_target_globals);
33105   else
33106     {
33107       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33108       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33109     }
33110
33111   arm_option_params_internal ();
33112 }
33113
33114 /* Invalidate arm_previous_fndecl.  */
33115
33116 void
33117 arm_reset_previous_fndecl (void)
33118 {
33119   arm_previous_fndecl = NULL_TREE;
33120 }
33121
33122 /* Establish appropriate back-end context for processing the function
33123    FNDECL.  The argument might be NULL to indicate processing at top
33124    level, outside of any function scope.  */
33125
33126 static void
33127 arm_set_current_function (tree fndecl)
33128 {
33129   if (!fndecl || fndecl == arm_previous_fndecl)
33130     return;
33131
33132   tree old_tree = (arm_previous_fndecl
33133                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33134                    : NULL_TREE);
33135
33136   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33137
33138   /* If current function has no attributes but previous one did,
33139      use the default node.  */
33140   if (! new_tree && old_tree)
33141     new_tree = target_option_default_node;
33142
33143   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33144      the default have been handled by save_restore_target_globals from
33145      arm_pragma_target_parse.  */
33146   if (old_tree == new_tree)
33147     return;
33148
33149   arm_previous_fndecl = fndecl;
33150
33151   /* First set the target options.  */
33152   cl_target_option_restore (&global_options, &global_options_set,
33153                             TREE_TARGET_OPTION (new_tree));
33154
33155   save_restore_target_globals (new_tree);
33156
33157   arm_override_options_after_change_1 (&global_options, &global_options_set);
33158 }
33159
33160 /* Implement TARGET_OPTION_PRINT.  */
33161
33162 static void
33163 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33164 {
33165   int flags = ptr->x_target_flags;
33166   const char *fpu_name;
33167
33168   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33169               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33170
33171   fprintf (file, "%*sselected isa %s\n", indent, "",
33172            TARGET_THUMB2_P (flags) ? "thumb2" :
33173            TARGET_THUMB_P (flags) ? "thumb1" :
33174            "arm");
33175
33176   if (ptr->x_arm_arch_string)
33177     fprintf (file, "%*sselected architecture %s\n", indent, "",
33178              ptr->x_arm_arch_string);
33179
33180   if (ptr->x_arm_cpu_string)
33181     fprintf (file, "%*sselected CPU %s\n", indent, "",
33182              ptr->x_arm_cpu_string);
33183
33184   if (ptr->x_arm_tune_string)
33185     fprintf (file, "%*sselected tune %s\n", indent, "",
33186              ptr->x_arm_tune_string);
33187
33188   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33189 }
33190
33191 /* Hook to determine if one function can safely inline another.  */
33192
33193 static bool
33194 arm_can_inline_p (tree caller, tree callee)
33195 {
33196   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33197   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33198   bool can_inline = true;
33199
33200   struct cl_target_option *caller_opts
33201         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33202                                            : target_option_default_node);
33203
33204   struct cl_target_option *callee_opts
33205         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33206                                            : target_option_default_node);
33207
33208   if (callee_opts == caller_opts)
33209     return true;
33210
33211   /* Callee's ISA features should be a subset of the caller's.  */
33212   struct arm_build_target caller_target;
33213   struct arm_build_target callee_target;
33214   caller_target.isa = sbitmap_alloc (isa_num_bits);
33215   callee_target.isa = sbitmap_alloc (isa_num_bits);
33216
33217   arm_configure_build_target (&caller_target, caller_opts, false);
33218   arm_configure_build_target (&callee_target, callee_opts, false);
33219   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33220     can_inline = false;
33221
33222   sbitmap_free (caller_target.isa);
33223   sbitmap_free (callee_target.isa);
33224
33225   /* OK to inline between different modes.
33226      Function with mode specific instructions, e.g using asm,
33227      must be explicitly protected with noinline.  */
33228   return can_inline;
33229 }
33230
33231 /* Hook to fix function's alignment affected by target attribute.  */
33232
33233 static void
33234 arm_relayout_function (tree fndecl)
33235 {
33236   if (DECL_USER_ALIGN (fndecl))
33237     return;
33238
33239   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33240
33241   if (!callee_tree)
33242     callee_tree = target_option_default_node;
33243
33244   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33245   SET_DECL_ALIGN
33246     (fndecl,
33247      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33248 }
33249
33250 /* Inner function to process the attribute((target(...))), take an argument and
33251    set the current options from the argument.  If we have a list, recursively
33252    go over the list.  */
33253
33254 static bool
33255 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33256 {
33257   if (TREE_CODE (args) == TREE_LIST)
33258     {
33259       bool ret = true;
33260
33261       for (; args; args = TREE_CHAIN (args))
33262         if (TREE_VALUE (args)
33263             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33264           ret = false;
33265       return ret;
33266     }
33267
33268   else if (TREE_CODE (args) != STRING_CST)
33269     {
33270       error ("attribute %<target%> argument not a string");
33271       return false;
33272     }
33273
33274   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33275   char *q;
33276
33277   while ((q = strtok (argstr, ",")) != NULL)
33278     {
33279       argstr = NULL;
33280       if (!strcmp (q, "thumb"))
33281         {
33282           opts->x_target_flags |= MASK_THUMB;
33283           if (TARGET_FDPIC && !arm_arch_thumb2)
33284             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33285         }
33286
33287       else if (!strcmp (q, "arm"))
33288         opts->x_target_flags &= ~MASK_THUMB;
33289
33290       else if (!strcmp (q, "general-regs-only"))
33291         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33292
33293       else if (startswith (q, "fpu="))
33294         {
33295           int fpu_index;
33296           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33297                                        &fpu_index, CL_TARGET))
33298             {
33299               error ("invalid fpu for target attribute or pragma %qs", q);
33300               return false;
33301             }
33302           if (fpu_index == TARGET_FPU_auto)
33303             {
33304               /* This doesn't really make sense until we support
33305                  general dynamic selection of the architecture and all
33306                  sub-features.  */
33307               sorry ("auto fpu selection not currently permitted here");
33308               return false;
33309             }
33310           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33311         }
33312       else if (startswith (q, "arch="))
33313         {
33314           char *arch = q + 5;
33315           const arch_option *arm_selected_arch
33316              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33317
33318           if (!arm_selected_arch)
33319             {
33320               error ("invalid architecture for target attribute or pragma %qs",
33321                      q);
33322               return false;
33323             }
33324
33325           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33326         }
33327       else if (q[0] == '+')
33328         {
33329           opts->x_arm_arch_string
33330             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33331         }
33332       else
33333         {
33334           error ("unknown target attribute or pragma %qs", q);
33335           return false;
33336         }
33337     }
33338
33339   return true;
33340 }
33341
33342 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33343
33344 tree
33345 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33346                                  struct gcc_options *opts_set)
33347 {
33348   struct cl_target_option cl_opts;
33349
33350   if (!arm_valid_target_attribute_rec (args, opts))
33351     return NULL_TREE;
33352
33353   cl_target_option_save (&cl_opts, opts, opts_set);
33354   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33355   arm_option_check_internal (opts);
33356   /* Do any overrides, such as global options arch=xxx.
33357      We do this since arm_active_target was overridden.  */
33358   arm_option_reconfigure_globals ();
33359   arm_options_perform_arch_sanity_checks ();
33360   arm_option_override_internal (opts, opts_set);
33361
33362   return build_target_option_node (opts, opts_set);
33363 }
33364
33365 static void
33366 add_attribute (const char * mode, tree *attributes)
33367 {
33368   size_t len = strlen (mode);
33369   tree value = build_string (len, mode);
33370
33371   TREE_TYPE (value) = build_array_type (char_type_node,
33372                                         build_index_type (size_int (len)));
33373
33374   *attributes = tree_cons (get_identifier ("target"),
33375                            build_tree_list (NULL_TREE, value),
33376                            *attributes);
33377 }
33378
33379 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33380
33381 static void
33382 arm_insert_attributes (tree fndecl, tree * attributes)
33383 {
33384   const char *mode;
33385
33386   if (! TARGET_FLIP_THUMB)
33387     return;
33388
33389   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33390       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33391    return;
33392
33393   /* Nested definitions must inherit mode.  */
33394   if (current_function_decl)
33395    {
33396      mode = TARGET_THUMB ? "thumb" : "arm";
33397      add_attribute (mode, attributes);
33398      return;
33399    }
33400
33401   /* If there is already a setting don't change it.  */
33402   if (lookup_attribute ("target", *attributes) != NULL)
33403     return;
33404
33405   mode = thumb_flipper ? "thumb" : "arm";
33406   add_attribute (mode, attributes);
33407
33408   thumb_flipper = !thumb_flipper;
33409 }
33410
33411 /* Hook to validate attribute((target("string"))).  */
33412
33413 static bool
33414 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33415                               tree args, int ARG_UNUSED (flags))
33416 {
33417   bool ret = true;
33418   struct gcc_options func_options, func_options_set;
33419   tree cur_tree, new_optimize;
33420   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33421
33422   /* Get the optimization options of the current function.  */
33423   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33424
33425   /* If the function changed the optimization levels as well as setting target
33426      options, start with the optimizations specified.  */
33427   if (!func_optimize)
33428     func_optimize = optimization_default_node;
33429
33430   /* Init func_options.  */
33431   memset (&func_options, 0, sizeof (func_options));
33432   init_options_struct (&func_options, NULL);
33433   lang_hooks.init_options_struct (&func_options);
33434   memset (&func_options_set, 0, sizeof (func_options_set));
33435
33436   /* Initialize func_options to the defaults.  */
33437   cl_optimization_restore (&func_options, &func_options_set,
33438                            TREE_OPTIMIZATION (func_optimize));
33439
33440   cl_target_option_restore (&func_options, &func_options_set,
33441                             TREE_TARGET_OPTION (target_option_default_node));
33442
33443   /* Set func_options flags with new target mode.  */
33444   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33445                                               &func_options_set);
33446
33447   if (cur_tree == NULL_TREE)
33448     ret = false;
33449
33450   new_optimize = build_optimization_node (&func_options, &func_options_set);
33451
33452   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33453
33454   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33455
33456   return ret;
33457 }
33458
33459 /* Match an ISA feature bitmap to a named FPU.  We always use the
33460    first entry that exactly matches the feature set, so that we
33461    effectively canonicalize the FPU name for the assembler.  */
33462 static const char*
33463 arm_identify_fpu_from_isa (sbitmap isa)
33464 {
33465   auto_sbitmap fpubits (isa_num_bits);
33466   auto_sbitmap cand_fpubits (isa_num_bits);
33467
33468   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33469
33470   /* If there are no ISA feature bits relating to the FPU, we must be
33471      doing soft-float.  */
33472   if (bitmap_empty_p (fpubits))
33473     return "softvfp";
33474
33475   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33476     {
33477       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33478       if (bitmap_equal_p (fpubits, cand_fpubits))
33479         return all_fpus[i].name;
33480     }
33481   /* We must find an entry, or things have gone wrong.  */
33482   gcc_unreachable ();
33483 }
33484
33485 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33486    by the function fndecl.  */
33487 void
33488 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33489 {
33490   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33491
33492   struct cl_target_option *targ_options;
33493   if (target_parts)
33494     targ_options = TREE_TARGET_OPTION (target_parts);
33495   else
33496     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33497   gcc_assert (targ_options);
33498
33499   arm_print_asm_arch_directives (stream, targ_options);
33500
33501   fprintf (stream, "\t.syntax unified\n");
33502
33503   if (TARGET_THUMB)
33504     {
33505       if (is_called_in_ARM_mode (decl)
33506           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33507               && cfun->is_thunk))
33508         fprintf (stream, "\t.code 32\n");
33509       else if (TARGET_THUMB1)
33510         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33511       else
33512         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33513     }
33514   else
33515     fprintf (stream, "\t.arm\n");
33516
33517   if (TARGET_POKE_FUNCTION_NAME)
33518     arm_poke_function_name (stream, (const char *) name);
33519 }
33520
33521 /* If MEM is in the form of [base+offset], extract the two parts
33522    of address and set to BASE and OFFSET, otherwise return false
33523    after clearing BASE and OFFSET.  */
33524
33525 static bool
33526 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33527 {
33528   rtx addr;
33529
33530   gcc_assert (MEM_P (mem));
33531
33532   addr = XEXP (mem, 0);
33533
33534   /* Strip off const from addresses like (const (addr)).  */
33535   if (GET_CODE (addr) == CONST)
33536     addr = XEXP (addr, 0);
33537
33538   if (REG_P (addr))
33539     {
33540       *base = addr;
33541       *offset = const0_rtx;
33542       return true;
33543     }
33544
33545   if (GET_CODE (addr) == PLUS
33546       && GET_CODE (XEXP (addr, 0)) == REG
33547       && CONST_INT_P (XEXP (addr, 1)))
33548     {
33549       *base = XEXP (addr, 0);
33550       *offset = XEXP (addr, 1);
33551       return true;
33552     }
33553
33554   *base = NULL_RTX;
33555   *offset = NULL_RTX;
33556
33557   return false;
33558 }
33559
33560 /* If INSN is a load or store of address in the form of [base+offset],
33561    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33562    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33563    otherwise return FALSE.  */
33564
33565 static bool
33566 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33567 {
33568   rtx x, dest, src;
33569
33570   gcc_assert (INSN_P (insn));
33571   x = PATTERN (insn);
33572   if (GET_CODE (x) != SET)
33573     return false;
33574
33575   src = SET_SRC (x);
33576   dest = SET_DEST (x);
33577   if (REG_P (src) && MEM_P (dest))
33578     {
33579       *is_load = false;
33580       extract_base_offset_in_addr (dest, base, offset);
33581     }
33582   else if (MEM_P (src) && REG_P (dest))
33583     {
33584       *is_load = true;
33585       extract_base_offset_in_addr (src, base, offset);
33586     }
33587   else
33588     return false;
33589
33590   return (*base != NULL_RTX && *offset != NULL_RTX);
33591 }
33592
33593 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33594
33595    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33596    and PRI are only calculated for these instructions.  For other instruction,
33597    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33598    instruction fusion can be supported by returning different priorities.
33599
33600    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33601
33602 static void
33603 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33604                            int *fusion_pri, int *pri)
33605 {
33606   int tmp, off_val;
33607   bool is_load;
33608   rtx base, offset;
33609
33610   gcc_assert (INSN_P (insn));
33611
33612   tmp = max_pri - 1;
33613   if (!fusion_load_store (insn, &base, &offset, &is_load))
33614     {
33615       *pri = tmp;
33616       *fusion_pri = tmp;
33617       return;
33618     }
33619
33620   /* Load goes first.  */
33621   if (is_load)
33622     *fusion_pri = tmp - 1;
33623   else
33624     *fusion_pri = tmp - 2;
33625
33626   tmp /= 2;
33627
33628   /* INSN with smaller base register goes first.  */
33629   tmp -= ((REGNO (base) & 0xff) << 20);
33630
33631   /* INSN with smaller offset goes first.  */
33632   off_val = (int)(INTVAL (offset));
33633   if (off_val >= 0)
33634     tmp -= (off_val & 0xfffff);
33635   else
33636     tmp += ((- off_val) & 0xfffff);
33637
33638   *pri = tmp;
33639   return;
33640 }
33641
33642
33643 /* Construct and return a PARALLEL RTX vector with elements numbering the
33644    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33645    the vector - from the perspective of the architecture.  This does not
33646    line up with GCC's perspective on lane numbers, so we end up with
33647    different masks depending on our target endian-ness.  The diagram
33648    below may help.  We must draw the distinction when building masks
33649    which select one half of the vector.  An instruction selecting
33650    architectural low-lanes for a big-endian target, must be described using
33651    a mask selecting GCC high-lanes.
33652
33653                  Big-Endian             Little-Endian
33654
33655 GCC             0   1   2   3           3   2   1   0
33656               | x | x | x | x |       | x | x | x | x |
33657 Architecture    3   2   1   0           3   2   1   0
33658
33659 Low Mask:         { 2, 3 }                { 0, 1 }
33660 High Mask:        { 0, 1 }                { 2, 3 }
33661 */
33662
33663 rtx
33664 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33665 {
33666   int nunits = GET_MODE_NUNITS (mode);
33667   rtvec v = rtvec_alloc (nunits / 2);
33668   int high_base = nunits / 2;
33669   int low_base = 0;
33670   int base;
33671   rtx t1;
33672   int i;
33673
33674   if (BYTES_BIG_ENDIAN)
33675     base = high ? low_base : high_base;
33676   else
33677     base = high ? high_base : low_base;
33678
33679   for (i = 0; i < nunits / 2; i++)
33680     RTVEC_ELT (v, i) = GEN_INT (base + i);
33681
33682   t1 = gen_rtx_PARALLEL (mode, v);
33683   return t1;
33684 }
33685
33686 /* Check OP for validity as a PARALLEL RTX vector with elements
33687    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33688    from the perspective of the architecture.  See the diagram above
33689    arm_simd_vect_par_cnst_half_p for more details.  */
33690
33691 bool
33692 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33693                                        bool high)
33694 {
33695   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33696   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33697   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33698   int i = 0;
33699
33700   if (!VECTOR_MODE_P (mode))
33701     return false;
33702
33703   if (count_op != count_ideal)
33704     return false;
33705
33706   for (i = 0; i < count_ideal; i++)
33707     {
33708       rtx elt_op = XVECEXP (op, 0, i);
33709       rtx elt_ideal = XVECEXP (ideal, 0, i);
33710
33711       if (!CONST_INT_P (elt_op)
33712           || INTVAL (elt_ideal) != INTVAL (elt_op))
33713         return false;
33714     }
33715   return true;
33716 }
33717
33718 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33719    in Thumb1.  */
33720 static bool
33721 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33722                          const_tree)
33723 {
33724   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33725   if (vcall_offset && TARGET_THUMB1)
33726     return false;
33727
33728   /* Otherwise ok.  */
33729   return true;
33730 }
33731
33732 /* Generate RTL for a conditional branch with rtx comparison CODE in
33733    mode CC_MODE. The destination of the unlikely conditional branch
33734    is LABEL_REF.  */
33735
33736 void
33737 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33738                           rtx label_ref)
33739 {
33740   rtx x;
33741   x = gen_rtx_fmt_ee (code, VOIDmode,
33742                       gen_rtx_REG (cc_mode, CC_REGNUM),
33743                       const0_rtx);
33744
33745   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33746                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
33747                             pc_rtx);
33748   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33749 }
33750
33751 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33752
33753    For pure-code sections there is no letter code for this attribute, so
33754    output all the section flags numerically when this is needed.  */
33755
33756 static bool
33757 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33758 {
33759
33760   if (flags & SECTION_ARM_PURECODE)
33761     {
33762       *num = 0x20000000;
33763
33764       if (!(flags & SECTION_DEBUG))
33765         *num |= 0x2;
33766       if (flags & SECTION_EXCLUDE)
33767         *num |= 0x80000000;
33768       if (flags & SECTION_WRITE)
33769         *num |= 0x1;
33770       if (flags & SECTION_CODE)
33771         *num |= 0x4;
33772       if (flags & SECTION_MERGE)
33773         *num |= 0x10;
33774       if (flags & SECTION_STRINGS)
33775         *num |= 0x20;
33776       if (flags & SECTION_TLS)
33777         *num |= 0x400;
33778       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33779         *num |= 0x200;
33780
33781         return true;
33782     }
33783
33784   return false;
33785 }
33786
33787 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33788
33789    If pure-code is passed as an option, make sure all functions are in
33790    sections that have the SHF_ARM_PURECODE attribute.  */
33791
33792 static section *
33793 arm_function_section (tree decl, enum node_frequency freq,
33794                       bool startup, bool exit)
33795 {
33796   const char * section_name;
33797   section * sec;
33798
33799   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33800     return default_function_section (decl, freq, startup, exit);
33801
33802   if (!target_pure_code)
33803     return default_function_section (decl, freq, startup, exit);
33804
33805
33806   section_name = DECL_SECTION_NAME (decl);
33807
33808   /* If a function is not in a named section then it falls under the 'default'
33809      text section, also known as '.text'.  We can preserve previous behavior as
33810      the default text section already has the SHF_ARM_PURECODE section
33811      attribute.  */
33812   if (!section_name)
33813     {
33814       section *default_sec = default_function_section (decl, freq, startup,
33815                                                        exit);
33816
33817       /* If default_sec is not null, then it must be a special section like for
33818          example .text.startup.  We set the pure-code attribute and return the
33819          same section to preserve existing behavior.  */
33820       if (default_sec)
33821           default_sec->common.flags |= SECTION_ARM_PURECODE;
33822       return default_sec;
33823     }
33824
33825   /* Otherwise look whether a section has already been created with
33826      'section_name'.  */
33827   sec = get_named_section (decl, section_name, 0);
33828   if (!sec)
33829     /* If that is not the case passing NULL as the section's name to
33830        'get_named_section' will create a section with the declaration's
33831        section name.  */
33832     sec = get_named_section (decl, NULL, 0);
33833
33834   /* Set the SHF_ARM_PURECODE attribute.  */
33835   sec->common.flags |= SECTION_ARM_PURECODE;
33836
33837   return sec;
33838 }
33839
33840 /* Implements the TARGET_SECTION_FLAGS hook.
33841
33842    If DECL is a function declaration and pure-code is passed as an option
33843    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33844    section's name and RELOC indicates whether the declarations initializer may
33845    contain runtime relocations.  */
33846
33847 static unsigned int
33848 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33849 {
33850   unsigned int flags = default_section_type_flags (decl, name, reloc);
33851
33852   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33853     flags |= SECTION_ARM_PURECODE;
33854
33855   return flags;
33856 }
33857
33858 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33859
33860 static void
33861 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33862                            rtx op0, rtx op1,
33863                            rtx *quot_p, rtx *rem_p)
33864 {
33865   if (mode == SImode)
33866     gcc_assert (!TARGET_IDIV);
33867
33868   scalar_int_mode libval_mode
33869     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33870
33871   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33872                                         libval_mode, op0, mode, op1, mode);
33873
33874   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33875   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33876                                        GET_MODE_SIZE (mode));
33877
33878   gcc_assert (quotient);
33879   gcc_assert (remainder);
33880
33881   *quot_p = quotient;
33882   *rem_p = remainder;
33883 }
33884
33885 /*  This function checks for the availability of the coprocessor builtin passed
33886     in BUILTIN for the current target.  Returns true if it is available and
33887     false otherwise.  If a BUILTIN is passed for which this function has not
33888     been implemented it will cause an exception.  */
33889
33890 bool
33891 arm_coproc_builtin_available (enum unspecv builtin)
33892 {
33893   /* None of these builtins are available in Thumb mode if the target only
33894      supports Thumb-1.  */
33895   if (TARGET_THUMB1)
33896     return false;
33897
33898   switch (builtin)
33899     {
33900       case VUNSPEC_CDP:
33901       case VUNSPEC_LDC:
33902       case VUNSPEC_LDCL:
33903       case VUNSPEC_STC:
33904       case VUNSPEC_STCL:
33905       case VUNSPEC_MCR:
33906       case VUNSPEC_MRC:
33907         if (arm_arch4)
33908           return true;
33909         break;
33910       case VUNSPEC_CDP2:
33911       case VUNSPEC_LDC2:
33912       case VUNSPEC_LDC2L:
33913       case VUNSPEC_STC2:
33914       case VUNSPEC_STC2L:
33915       case VUNSPEC_MCR2:
33916       case VUNSPEC_MRC2:
33917         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33918            ARMv8-{A,M}.  */
33919         if (arm_arch5t)
33920           return true;
33921         break;
33922       case VUNSPEC_MCRR:
33923       case VUNSPEC_MRRC:
33924         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33925            ARMv8-{A,M}.  */
33926         if (arm_arch6 || arm_arch5te)
33927           return true;
33928         break;
33929       case VUNSPEC_MCRR2:
33930       case VUNSPEC_MRRC2:
33931         if (arm_arch6)
33932           return true;
33933         break;
33934       default:
33935         gcc_unreachable ();
33936     }
33937   return false;
33938 }
33939
33940 /* This function returns true if OP is a valid memory operand for the ldc and
33941    stc coprocessor instructions and false otherwise.  */
33942
33943 bool
33944 arm_coproc_ldc_stc_legitimate_address (rtx op)
33945 {
33946   HOST_WIDE_INT range;
33947   /* Has to be a memory operand.  */
33948   if (!MEM_P (op))
33949     return false;
33950
33951   op = XEXP (op, 0);
33952
33953   /* We accept registers.  */
33954   if (REG_P (op))
33955     return true;
33956
33957   switch GET_CODE (op)
33958     {
33959       case PLUS:
33960         {
33961           /* Or registers with an offset.  */
33962           if (!REG_P (XEXP (op, 0)))
33963             return false;
33964
33965           op = XEXP (op, 1);
33966
33967           /* The offset must be an immediate though.  */
33968           if (!CONST_INT_P (op))
33969             return false;
33970
33971           range = INTVAL (op);
33972
33973           /* Within the range of [-1020,1020].  */
33974           if (!IN_RANGE (range, -1020, 1020))
33975             return false;
33976
33977           /* And a multiple of 4.  */
33978           return (range % 4) == 0;
33979         }
33980       case PRE_INC:
33981       case POST_INC:
33982       case PRE_DEC:
33983       case POST_DEC:
33984         return REG_P (XEXP (op, 0));
33985       default:
33986         gcc_unreachable ();
33987     }
33988   return false;
33989 }
33990
33991 /* Return the diagnostic message string if conversion from FROMTYPE to
33992    TOTYPE is not allowed, NULL otherwise.  */
33993
33994 static const char *
33995 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33996 {
33997   if (element_mode (fromtype) != element_mode (totype))
33998     {
33999       /* Do no allow conversions to/from BFmode scalar types.  */
34000       if (TYPE_MODE (fromtype) == BFmode)
34001         return N_("invalid conversion from type %<bfloat16_t%>");
34002       if (TYPE_MODE (totype) == BFmode)
34003         return N_("invalid conversion to type %<bfloat16_t%>");
34004     }
34005
34006   /* Conversion allowed.  */
34007   return NULL;
34008 }
34009
34010 /* Return the diagnostic message string if the unary operation OP is
34011    not permitted on TYPE, NULL otherwise.  */
34012
34013 static const char *
34014 arm_invalid_unary_op (int op, const_tree type)
34015 {
34016   /* Reject all single-operand operations on BFmode except for &.  */
34017   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34018     return N_("operation not permitted on type %<bfloat16_t%>");
34019
34020   /* Operation allowed.  */
34021   return NULL;
34022 }
34023
34024 /* Return the diagnostic message string if the binary operation OP is
34025    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34026
34027 static const char *
34028 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34029                            const_tree type2)
34030 {
34031   /* Reject all 2-operand operations on BFmode.  */
34032   if (element_mode (type1) == BFmode
34033       || element_mode (type2) == BFmode)
34034     return N_("operation not permitted on type %<bfloat16_t%>");
34035
34036   /* Operation allowed.  */
34037   return NULL;
34038 }
34039
34040 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34041
34042    In VFPv1, VFP registers could only be accessed in the mode they were
34043    set, so subregs would be invalid there.  However, we don't support
34044    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34045
34046    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34047    VFP registers in little-endian order.  We can't describe that accurately to
34048    GCC, so avoid taking subregs of such values.
34049
34050    The only exception is going from a 128-bit to a 64-bit type.  In that
34051    case the data layout happens to be consistent for big-endian, so we
34052    explicitly allow that case.  */
34053
34054 static bool
34055 arm_can_change_mode_class (machine_mode from, machine_mode to,
34056                            reg_class_t rclass)
34057 {
34058   if (TARGET_BIG_END
34059       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34060       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34061           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34062       && reg_classes_intersect_p (VFP_REGS, rclass))
34063     return false;
34064   return true;
34065 }
34066
34067 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34068    strcpy from constants will be faster.  */
34069
34070 static HOST_WIDE_INT
34071 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34072 {
34073   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34074   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34075     return MAX (align, BITS_PER_WORD * factor);
34076   return align;
34077 }
34078
34079 /* Emit a speculation barrier on target architectures that do not have
34080    DSB/ISB directly.  Such systems probably don't need a barrier
34081    themselves, but if the code is ever run on a later architecture, it
34082    might become a problem.  */
34083 void
34084 arm_emit_speculation_barrier_function ()
34085 {
34086   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34087 }
34088
34089 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34090 bool
34091 arm_q_bit_access (void)
34092 {
34093   if (cfun && cfun->decl)
34094     return lookup_attribute ("acle qbit",
34095                              DECL_ATTRIBUTES (cfun->decl));
34096   return true;
34097 }
34098
34099 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34100 bool
34101 arm_ge_bits_access (void)
34102 {
34103   if (cfun && cfun->decl)
34104     return lookup_attribute ("acle gebits",
34105                              DECL_ATTRIBUTES (cfun->decl));
34106   return true;
34107 }
34108
34109 /* NULL if insn INSN is valid within a low-overhead loop.
34110    Otherwise return why doloop cannot be applied.  */
34111
34112 static const char *
34113 arm_invalid_within_doloop (const rtx_insn *insn)
34114 {
34115   if (!TARGET_HAVE_LOB)
34116     return default_invalid_within_doloop (insn);
34117
34118   if (CALL_P (insn))
34119     return "Function call in the loop.";
34120
34121   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34122     return "LR is used inside loop.";
34123
34124   return NULL;
34125 }
34126
34127 bool
34128 arm_target_insn_ok_for_lob (rtx insn)
34129 {
34130   basic_block bb = BLOCK_FOR_INSN (insn);
34131   /* Make sure the basic block of the target insn is a simple latch
34132      having as single predecessor and successor the body of the loop
34133      itself.  Only simple loops with a single basic block as body are
34134      supported for 'low over head loop' making sure that LE target is
34135      above LE itself in the generated code.  */
34136
34137   return single_succ_p (bb)
34138     && single_pred_p (bb)
34139     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34140     && contains_no_active_insn_p (bb);
34141 }
34142
34143 #if CHECKING_P
34144 namespace selftest {
34145
34146 /* Scan the static data tables generated by parsecpu.awk looking for
34147    potential issues with the data.  We primarily check for
34148    inconsistencies in the option extensions at present (extensions
34149    that duplicate others but aren't marked as aliases).  Furthermore,
34150    for correct canonicalization later options must never be a subset
34151    of an earlier option.  Any extension should also only specify other
34152    feature bits and never an architecture bit.  The architecture is inferred
34153    from the declaration of the extension.  */
34154 static void
34155 arm_test_cpu_arch_data (void)
34156 {
34157   const arch_option *arch;
34158   const cpu_option *cpu;
34159   auto_sbitmap target_isa (isa_num_bits);
34160   auto_sbitmap isa1 (isa_num_bits);
34161   auto_sbitmap isa2 (isa_num_bits);
34162
34163   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34164     {
34165       const cpu_arch_extension *ext1, *ext2;
34166
34167       if (arch->common.extensions == NULL)
34168         continue;
34169
34170       arm_initialize_isa (target_isa, arch->common.isa_bits);
34171
34172       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34173         {
34174           if (ext1->alias)
34175             continue;
34176
34177           arm_initialize_isa (isa1, ext1->isa_bits);
34178           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34179             {
34180               if (ext2->alias || ext1->remove != ext2->remove)
34181                 continue;
34182
34183               arm_initialize_isa (isa2, ext2->isa_bits);
34184               /* If the option is a subset of the parent option, it doesn't
34185                  add anything and so isn't useful.  */
34186               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34187
34188               /* If the extension specifies any architectural bits then
34189                  disallow it.  Extensions should only specify feature bits.  */
34190               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34191             }
34192         }
34193     }
34194
34195   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34196     {
34197       const cpu_arch_extension *ext1, *ext2;
34198
34199       if (cpu->common.extensions == NULL)
34200         continue;
34201
34202       arm_initialize_isa (target_isa, arch->common.isa_bits);
34203
34204       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34205         {
34206           if (ext1->alias)
34207             continue;
34208
34209           arm_initialize_isa (isa1, ext1->isa_bits);
34210           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34211             {
34212               if (ext2->alias || ext1->remove != ext2->remove)
34213                 continue;
34214
34215               arm_initialize_isa (isa2, ext2->isa_bits);
34216               /* If the option is a subset of the parent option, it doesn't
34217                  add anything and so isn't useful.  */
34218               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34219
34220               /* If the extension specifies any architectural bits then
34221                  disallow it.  Extensions should only specify feature bits.  */
34222               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34223             }
34224         }
34225     }
34226 }
34227
34228 /* Scan the static data tables generated by parsecpu.awk looking for
34229    potential issues with the data.  Here we check for consistency between the
34230    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34231    a feature bit that is not defined by any FPU flag.  */
34232 static void
34233 arm_test_fpu_data (void)
34234 {
34235   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34236   auto_sbitmap fpubits (isa_num_bits);
34237   auto_sbitmap tmpset (isa_num_bits);
34238
34239   static const enum isa_feature fpu_bitlist_internal[]
34240     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34241   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34242
34243   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34244   {
34245     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34246     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34247     bitmap_clear (isa_all_fpubits_internal);
34248     bitmap_copy (isa_all_fpubits_internal, tmpset);
34249   }
34250
34251   if (!bitmap_empty_p (isa_all_fpubits_internal))
34252     {
34253         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34254                          " group that are not defined by any FPU.\n"
34255                          "       Check your arm-cpus.in.\n");
34256         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34257     }
34258 }
34259
34260 static void
34261 arm_run_selftests (void)
34262 {
34263   arm_test_cpu_arch_data ();
34264   arm_test_fpu_data ();
34265 }
34266 } /* Namespace selftest.  */
34267
34268 #undef TARGET_RUN_TARGET_SELFTESTS
34269 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34270 #endif /* CHECKING_P */
34271
34272 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34273    global variable based guard use the default else
34274    return a null tree.  */
34275 static tree
34276 arm_stack_protect_guard (void)
34277 {
34278   if (arm_stack_protector_guard == SSP_GLOBAL)
34279     return default_stack_protect_guard ();
34280
34281   return NULL_TREE;
34282 }
34283
34284 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34285    Unlike the arm version, we do NOT implement asm flag outputs.  */
34286
34287 rtx_insn *
34288 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34289                       vec<machine_mode> & /*input_modes*/,
34290                       vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34291                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34292 {
34293   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34294     if (startswith (constraints[i], "=@cc"))
34295       {
34296         sorry ("%<asm%> flags not supported in thumb1 mode");
34297         break;
34298       }
34299   return NULL;
34300 }
34301
34302 /* Generate code to enable conditional branches in functions over 1 MiB.
34303    Parameters are:
34304      operands: is the operands list of the asm insn (see arm_cond_branch or
34305        arm_cond_branch_reversed).
34306      pos_label: is an index into the operands array where operands[pos_label] is
34307        the asm label of the final jump destination.
34308      dest: is a string which is used to generate the asm label of the intermediate
34309        destination
34310    branch_format: is a string denoting the intermediate branch format, e.g.
34311      "beq", "bne", etc.  */
34312
34313 const char *
34314 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34315                     const char * branch_format)
34316 {
34317   rtx_code_label * tmp_label = gen_label_rtx ();
34318   char label_buf[256];
34319   char buffer[128];
34320   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34321                         CODE_LABEL_NUMBER (tmp_label));
34322   const char *label_ptr = arm_strip_name_encoding (label_buf);
34323   rtx dest_label = operands[pos_label];
34324   operands[pos_label] = tmp_label;
34325
34326   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34327   output_asm_insn (buffer, operands);
34328
34329   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34330   operands[pos_label] = dest_label;
34331   output_asm_insn (buffer, operands);
34332   return "";
34333 }
34334
34335 /* If given mode matches, load from memory to LO_REGS.
34336    (i.e [Rn], Rn <= LO_REGS).  */
34337 enum reg_class
34338 arm_mode_base_reg_class (machine_mode mode)
34339 {
34340   if (TARGET_HAVE_MVE
34341       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34342     return LO_REGS;
34343
34344   return MODE_BASE_REG_REG_CLASS (mode);
34345 }
34346
34347 struct gcc_target targetm = TARGET_INITIALIZER;
34348
34349 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34350
34351 opt_machine_mode
34352 arm_get_mask_mode (machine_mode mode)
34353 {
34354   if (TARGET_HAVE_MVE)
34355     return arm_mode_to_pred_mode (mode);
34356
34357   return default_get_mask_mode (mode);
34358 }
34359
34360 #include "gt-arm.h"