gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #define INCLUDE_STRING
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "memmodel.h"
  32 #include "cfghooks.h"
  33 #include "df.h"
  34 #include "tm_p.h"
  35 #include "stringpool.h"
  36 #include "attribs.h"
  37 #include "optabs.h"
  38 #include "regs.h"
  39 #include "emit-rtl.h"
  40 #include "recog.h"
  41 #include "cgraph.h"
  42 #include "diagnostic-core.h"
  43 #include "alias.h"
  44 #include "fold-const.h"
  45 #include "stor-layout.h"
  46 #include "calls.h"
  47 #include "varasm.h"
  48 #include "output.h"
  49 #include "insn-attr.h"
  50 #include "flags.h"
  51 #include "reload.h"
  52 #include "explow.h"
  53 #include "expr.h"
  54 #include "cfgrtl.h"
  55 #include "sched-int.h"
  56 #include "common/common-target.h"
  57 #include "langhooks.h"
  58 #include "intl.h"
  59 #include "libfuncs.h"
  60 #include "params.h"
  61 #include "opts.h"
  62 #include "dumpfile.h"
  63 #include "target-globals.h"
  64 #include "builtins.h"
  65 #include "tm-constrs.h"
  66 #include "rtl-iter.h"
  67 #include "optabs-libfuncs.h"
  68 #include "gimplify.h"
  69 #include "gimple.h"
  70 #include "selftest.h"
  71
  72 /* This file should be included last.  */
  73 #include "target-def.h"
  74
  75 /* Forward definitions of types.  */
  76 typedef struct minipool_node    Mnode;
  77 typedef struct minipool_fixup   Mfix;
  78
  79 void (*arm_lang_output_object_attributes_hook)(void);
  80
  81 struct four_ints
  82 {
  83   int i[4];
  84 };
  85
  86 /* Forward function declarations.  */
  87 static bool arm_const_not_ok_for_debug_p (rtx);
  88 static int arm_needs_doubleword_align (machine_mode, const_tree);
  89 static int arm_compute_static_chain_stack_bytes (void);
  90 static arm_stack_offsets *arm_get_frame_offsets (void);
  91 static void arm_compute_frame_layout (void);
  92 static void arm_add_gc_roots (void);
  93 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  94                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  95 static unsigned bit_count (unsigned long);
  96 static unsigned bitmap_popcount (const sbitmap);
  97 static int arm_address_register_rtx_p (rtx, int);
  98 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  99 static bool is_called_in_ARM_mode (tree);
 100 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 101 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 102 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 103 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 104 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 105 inline static int thumb1_index_register_rtx_p (rtx, int);
 106 static int thumb_far_jump_used_p (void);
 107 static bool thumb_force_lr_save (void);
 108 static unsigned arm_size_return_regs (void);
 109 static bool arm_assemble_integer (rtx, unsigned int, int);
 110 static void arm_print_operand (FILE *, rtx, int);
 111 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 112 static bool arm_print_operand_punct_valid_p (unsigned char code);
 113 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 114 static arm_cc get_arm_condition_code (rtx);
 115 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 116 static const char *output_multi_immediate (rtx *, const char *, const char *,
 117                                            int, HOST_WIDE_INT);
 118 static const char *shift_op (rtx, HOST_WIDE_INT *);
 119 static struct machine_function *arm_init_machine_status (void);
 120 static void thumb_exit (FILE *, int);
 121 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 122 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 123 static Mnode *add_minipool_forward_ref (Mfix *);
 124 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 125 static Mnode *add_minipool_backward_ref (Mfix *);
 126 static void assign_minipool_offsets (Mfix *);
 127 static void arm_print_value (FILE *, rtx);
 128 static void dump_minipool (rtx_insn *);
 129 static int arm_barrier_cost (rtx_insn *);
 130 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 131 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 132 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 133                                machine_mode, rtx);
 134 static void arm_reorg (void);
 135 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 136 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 137 static unsigned long arm_compute_save_core_reg_mask (void);
 138 static unsigned long arm_isr_value (tree);
 139 static unsigned long arm_compute_func_type (void);
 140 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 142 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 143 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 144 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 145 #endif
 146 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 148 static void arm_output_function_epilogue (FILE *);
 149 static void arm_output_function_prologue (FILE *);
 150 static int arm_comp_type_attributes (const_tree, const_tree);
 151 static void arm_set_default_type_attributes (tree);
 152 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 153 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 154 static int optimal_immediate_sequence (enum rtx_code code,
 155                                        unsigned HOST_WIDE_INT val,
 156                                        struct four_ints *return_sequence);
 157 static int optimal_immediate_sequence_1 (enum rtx_code code,
 158                                          unsigned HOST_WIDE_INT val,
 159                                          struct four_ints *return_sequence,
 160                                          int i);
 161 static int arm_get_strip_length (int);
 162 static bool arm_function_ok_for_sibcall (tree, tree);
 163 static machine_mode arm_promote_function_mode (const_tree,
 164                                                     machine_mode, int *,
 165                                                     const_tree, int);
 166 static bool arm_return_in_memory (const_tree, const_tree);
 167 static rtx arm_function_value (const_tree, const_tree, bool);
 168 static rtx arm_libcall_value_1 (machine_mode);
 169 static rtx arm_libcall_value (machine_mode, const_rtx);
 170 static bool arm_function_value_regno_p (const unsigned int);
 171 static void arm_internal_label (FILE *, const char *, unsigned long);
 172 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 173                                  tree);
 174 static bool arm_have_conditional_execution (void);
 175 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 176 static bool arm_legitimate_constant_p (machine_mode, rtx);
 177 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 178 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 179 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 180 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 181 static void emit_constant_insn (rtx cond, rtx pattern);
 182 static rtx_insn *emit_set_insn (rtx, rtx);
 183 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 184 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 185                                   tree, bool);
 186 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 187                              const_tree, bool);
 188 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 189                                       const_tree, bool);
 190 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 191 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 192 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 193                                       const_tree);
 194 static rtx aapcs_libcall_value (machine_mode);
 195 static int aapcs_select_return_coproc (const_tree, const_tree);
 196
 197 #ifdef OBJECT_FORMAT_ELF
 198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 200 #endif
 201 #ifndef ARM_PE
 202 static void arm_encode_section_info (tree, rtx, int);
 203 #endif
 204
 205 static void arm_file_end (void);
 206 static void arm_file_start (void);
 207 static void arm_insert_attributes (tree, tree *);
 208
 209 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 210                                         tree, int *, int);
 211 static bool arm_pass_by_reference (cumulative_args_t,
 212                                    machine_mode, const_tree, bool);
 213 static bool arm_promote_prototypes (const_tree);
 214 static bool arm_default_short_enums (void);
 215 static bool arm_align_anon_bitfield (void);
 216 static bool arm_return_in_msb (const_tree);
 217 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 218 static bool arm_return_in_memory (const_tree, const_tree);
 219 #if ARM_UNWIND_INFO
 220 static void arm_unwind_emit (FILE *, rtx_insn *);
 221 static bool arm_output_ttype (rtx);
 222 static void arm_asm_emit_except_personality (rtx);
 223 #endif
 224 static void arm_asm_init_sections (void);
 225 static rtx arm_dwarf_register_span (rtx);
 226
 227 static tree arm_cxx_guard_type (void);
 228 static bool arm_cxx_guard_mask_bit (void);
 229 static tree arm_get_cookie_size (tree);
 230 static bool arm_cookie_has_size (void);
 231 static bool arm_cxx_cdtor_returns_this (void);
 232 static bool arm_cxx_key_method_may_be_inline (void);
 233 static void arm_cxx_determine_class_data_visibility (tree);
 234 static bool arm_cxx_class_data_always_comdat (void);
 235 static bool arm_cxx_use_aeabi_atexit (void);
 236 static void arm_init_libfuncs (void);
 237 static tree arm_build_builtin_va_list (void);
 238 static void arm_expand_builtin_va_start (tree, rtx);
 239 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 240 static void arm_option_override (void);
 241 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 242 static void arm_option_restore (struct gcc_options *,
 243                                 struct cl_target_option *);
 244 static void arm_override_options_after_change (void);
 245 static void arm_option_print (FILE *, int, struct cl_target_option *);
 246 static void arm_set_current_function (tree);
 247 static bool arm_can_inline_p (tree, tree);
 248 static void arm_relayout_function (tree);
 249 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 250 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 251 static bool arm_sched_can_speculate_insn (rtx_insn *);
 252 static bool arm_macro_fusion_p (void);
 253 static bool arm_cannot_copy_insn_p (rtx_insn *);
 254 static int arm_issue_rate (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead (void);
 256 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 257 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 258 static bool arm_output_addr_const_extra (FILE *, rtx);
 259 static bool arm_allocate_stack_slots_for_args (void);
 260 static bool arm_warn_func_return (tree);
 261 static tree arm_promoted_type (const_tree t);
 262 static bool arm_scalar_mode_supported_p (scalar_mode);
 263 static bool arm_frame_pointer_required (void);
 264 static bool arm_can_eliminate (const int, const int);
 265 static void arm_asm_trampoline_template (FILE *);
 266 static void arm_trampoline_init (rtx, tree, rtx);
 267 static rtx arm_trampoline_adjust_address (rtx);
 268 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 269 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 272 static bool arm_array_mode_supported_p (machine_mode,
 273                                         unsigned HOST_WIDE_INT);
 274 static machine_mode arm_preferred_simd_mode (scalar_mode);
 275 static bool arm_class_likely_spilled_p (reg_class_t);
 276 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 277 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 278 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 279                                                      const_tree type,
 280                                                      int misalignment,
 281                                                      bool is_packed);
 282 static void arm_conditional_register_usage (void);
 283 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 284 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 285 static unsigned int arm_autovectorize_vector_sizes (void);
 286 static int arm_default_branch_cost (bool, bool);
 287 static int arm_cortex_a5_branch_cost (bool, bool);
 288 static int arm_cortex_m_branch_cost (bool, bool);
 289 static int arm_cortex_m7_branch_cost (bool, bool);
 290
 291 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 292
 293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 294
 295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 296                                            tree vectype,
 297                                            int misalign ATTRIBUTE_UNUSED);
 298 static unsigned arm_add_stmt_cost (void *data, int count,
 299                                    enum vect_cost_for_stmt kind,
 300                                    struct _stmt_vec_info *stmt_info,
 301                                    int misalign,
 302                                    enum vect_cost_model_location where);
 303
 304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 305                                          bool op0_preserve_value);
 306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 307
 308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 310                                      const_tree);
 311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 314                                                 int reloc);
 315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 317 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 318 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 319 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 320 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 321 \f
 322 /* Table of machine attributes.  */
 323 static const struct attribute_spec arm_attribute_table[] =
 324 {
 325   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 326        affects_type_identity, exclusions } */
 327   /* Function calls made to this symbol must be done indirectly, because
 328      it may lie outside of the 26 bit addressing range of a normal function
 329      call.  */
 330   { "long_call",    0, 0, false, true,  true,  NULL, false, NULL },
 331   /* Whereas these functions are always known to reside within the 26 bit
 332      addressing range.  */
 333   { "short_call",   0, 0, false, true,  true,  NULL, false, NULL },
 334   /* Specify the procedure call conventions for a function.  */
 335   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 336     false, NULL },
 337   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 338   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 339     false, NULL },
 340   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 341     false, NULL },
 342   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 343     false, NULL },
 344 #ifdef ARM_PE
 345   /* ARM/PE has three new attributes:
 346      interfacearm - ?
 347      dllexport - for exporting a function/variable that will live in a dll
 348      dllimport - for importing a function/variable from a dll
 349
 350      Microsoft allows multiple declspecs in one __declspec, separating
 351      them with spaces.  We do NOT support this.  Instead, use __declspec
 352      multiple times.
 353   */
 354   { "dllimport",    0, 0, true,  false, false, NULL, false, NULL },
 355   { "dllexport",    0, 0, true,  false, false, NULL, false, NULL },
 356   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 357     false, NULL },
 358 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 359   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false,
 360     NULL },
 361   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false,
 362     NULL },
 363   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 364     false, NULL },
 365 #endif
 366   /* ARMv8-M Security Extensions support.  */
 367   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 368     arm_handle_cmse_nonsecure_entry, false, NULL },
 369   { "cmse_nonsecure_call", 0, 0, true, false, false,
 370     arm_handle_cmse_nonsecure_call, true, NULL },
 371   { NULL, 0, 0, false, false, false, NULL, false, NULL }
 372 };
 373 \f
 374 /* Initialize the GCC target structure.  */
 375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 376 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 378 #endif
 379
 380 #undef TARGET_LEGITIMIZE_ADDRESS
 381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 382
 383 #undef  TARGET_ATTRIBUTE_TABLE
 384 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 385
 386 #undef  TARGET_INSERT_ATTRIBUTES
 387 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 388
 389 #undef TARGET_ASM_FILE_START
 390 #define TARGET_ASM_FILE_START arm_file_start
 391 #undef TARGET_ASM_FILE_END
 392 #define TARGET_ASM_FILE_END arm_file_end
 393
 394 #undef  TARGET_ASM_ALIGNED_SI_OP
 395 #define TARGET_ASM_ALIGNED_SI_OP NULL
 396 #undef  TARGET_ASM_INTEGER
 397 #define TARGET_ASM_INTEGER arm_assemble_integer
 398
 399 #undef TARGET_PRINT_OPERAND
 400 #define TARGET_PRINT_OPERAND arm_print_operand
 401 #undef TARGET_PRINT_OPERAND_ADDRESS
 402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 405
 406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 408
 409 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 411
 412 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 414
 415 #undef TARGET_CAN_INLINE_P
 416 #define TARGET_CAN_INLINE_P arm_can_inline_p
 417
 418 #undef TARGET_RELAYOUT_FUNCTION
 419 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 420
 421 #undef  TARGET_OPTION_OVERRIDE
 422 #define TARGET_OPTION_OVERRIDE arm_option_override
 423
 424 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 425 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 426
 427 #undef TARGET_OPTION_SAVE
 428 #define TARGET_OPTION_SAVE arm_option_save
 429
 430 #undef TARGET_OPTION_RESTORE
 431 #define TARGET_OPTION_RESTORE arm_option_restore
 432
 433 #undef TARGET_OPTION_PRINT
 434 #define TARGET_OPTION_PRINT arm_option_print
 435
 436 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 437 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 438
 439 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 440 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_P
 443 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 444
 445 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 446 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 447
 448 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 449 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 450
 451 #undef  TARGET_SCHED_ADJUST_COST
 452 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 453
 454 #undef TARGET_SET_CURRENT_FUNCTION
 455 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 456
 457 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 458 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 459
 460 #undef TARGET_SCHED_REORDER
 461 #define TARGET_SCHED_REORDER arm_sched_reorder
 462
 463 #undef TARGET_REGISTER_MOVE_COST
 464 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 465
 466 #undef TARGET_MEMORY_MOVE_COST
 467 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 468
 469 #undef TARGET_ENCODE_SECTION_INFO
 470 #ifdef ARM_PE
 471 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 472 #else
 473 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 474 #endif
 475
 476 #undef  TARGET_STRIP_NAME_ENCODING
 477 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 478
 479 #undef  TARGET_ASM_INTERNAL_LABEL
 480 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 481
 482 #undef TARGET_FLOATN_MODE
 483 #define TARGET_FLOATN_MODE arm_floatn_mode
 484
 485 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 486 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 487
 488 #undef  TARGET_FUNCTION_VALUE
 489 #define TARGET_FUNCTION_VALUE arm_function_value
 490
 491 #undef  TARGET_LIBCALL_VALUE
 492 #define TARGET_LIBCALL_VALUE arm_libcall_value
 493
 494 #undef TARGET_FUNCTION_VALUE_REGNO_P
 495 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 496
 497 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 498 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 499 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 500 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 501
 502 #undef  TARGET_RTX_COSTS
 503 #define TARGET_RTX_COSTS arm_rtx_costs
 504 #undef  TARGET_ADDRESS_COST
 505 #define TARGET_ADDRESS_COST arm_address_cost
 506
 507 #undef TARGET_SHIFT_TRUNCATION_MASK
 508 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 509 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 510 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 511 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 512 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 513 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 514 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 515 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 516 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 517   arm_autovectorize_vector_sizes
 518
 519 #undef  TARGET_MACHINE_DEPENDENT_REORG
 520 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 521
 522 #undef  TARGET_INIT_BUILTINS
 523 #define TARGET_INIT_BUILTINS  arm_init_builtins
 524 #undef  TARGET_EXPAND_BUILTIN
 525 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 526 #undef  TARGET_BUILTIN_DECL
 527 #define TARGET_BUILTIN_DECL arm_builtin_decl
 528
 529 #undef TARGET_INIT_LIBFUNCS
 530 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 531
 532 #undef TARGET_PROMOTE_FUNCTION_MODE
 533 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 534 #undef TARGET_PROMOTE_PROTOTYPES
 535 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 536 #undef TARGET_PASS_BY_REFERENCE
 537 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 538 #undef TARGET_ARG_PARTIAL_BYTES
 539 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 540 #undef TARGET_FUNCTION_ARG
 541 #define TARGET_FUNCTION_ARG arm_function_arg
 542 #undef TARGET_FUNCTION_ARG_ADVANCE
 543 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 544 #undef TARGET_FUNCTION_ARG_PADDING
 545 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 546 #undef TARGET_FUNCTION_ARG_BOUNDARY
 547 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 548
 549 #undef  TARGET_SETUP_INCOMING_VARARGS
 550 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 551
 552 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 553 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 554
 555 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 556 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 557 #undef TARGET_TRAMPOLINE_INIT
 558 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 559 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 560 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 561
 562 #undef TARGET_WARN_FUNC_RETURN
 563 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 564
 565 #undef TARGET_DEFAULT_SHORT_ENUMS
 566 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 567
 568 #undef TARGET_ALIGN_ANON_BITFIELD
 569 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 570
 571 #undef TARGET_NARROW_VOLATILE_BITFIELD
 572 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 573
 574 #undef TARGET_CXX_GUARD_TYPE
 575 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 576
 577 #undef TARGET_CXX_GUARD_MASK_BIT
 578 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 579
 580 #undef TARGET_CXX_GET_COOKIE_SIZE
 581 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 582
 583 #undef TARGET_CXX_COOKIE_HAS_SIZE
 584 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 585
 586 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 587 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 588
 589 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 590 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 591
 592 #undef TARGET_CXX_USE_AEABI_ATEXIT
 593 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 594
 595 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 596 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 597   arm_cxx_determine_class_data_visibility
 598
 599 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 600 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 601
 602 #undef TARGET_RETURN_IN_MSB
 603 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 604
 605 #undef TARGET_RETURN_IN_MEMORY
 606 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 607
 608 #undef TARGET_MUST_PASS_IN_STACK
 609 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 610
 611 #if ARM_UNWIND_INFO
 612 #undef TARGET_ASM_UNWIND_EMIT
 613 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 614
 615 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 616 #undef TARGET_ASM_TTYPE
 617 #define TARGET_ASM_TTYPE arm_output_ttype
 618
 619 #undef TARGET_ARM_EABI_UNWINDER
 620 #define TARGET_ARM_EABI_UNWINDER true
 621
 622 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 623 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 624
 625 #endif /* ARM_UNWIND_INFO */
 626
 627 #undef TARGET_ASM_INIT_SECTIONS
 628 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 629
 630 #undef TARGET_DWARF_REGISTER_SPAN
 631 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 632
 633 #undef  TARGET_CANNOT_COPY_INSN_P
 634 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 635
 636 #ifdef HAVE_AS_TLS
 637 #undef TARGET_HAVE_TLS
 638 #define TARGET_HAVE_TLS true
 639 #endif
 640
 641 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 642 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 643
 644 #undef TARGET_LEGITIMATE_CONSTANT_P
 645 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 646
 647 #undef TARGET_CANNOT_FORCE_CONST_MEM
 648 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 649
 650 #undef TARGET_MAX_ANCHOR_OFFSET
 651 #define TARGET_MAX_ANCHOR_OFFSET 4095
 652
 653 /* The minimum is set such that the total size of the block
 654    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 655    divisible by eight, ensuring natural spacing of anchors.  */
 656 #undef TARGET_MIN_ANCHOR_OFFSET
 657 #define TARGET_MIN_ANCHOR_OFFSET -4088
 658
 659 #undef TARGET_SCHED_ISSUE_RATE
 660 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 661
 662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 664   arm_first_cycle_multipass_dfa_lookahead
 665
 666 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 667 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 668   arm_first_cycle_multipass_dfa_lookahead_guard
 669
 670 #undef TARGET_MANGLE_TYPE
 671 #define TARGET_MANGLE_TYPE arm_mangle_type
 672
 673 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 674 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 675
 676 #undef TARGET_BUILD_BUILTIN_VA_LIST
 677 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 678 #undef TARGET_EXPAND_BUILTIN_VA_START
 679 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 680 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 681 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 682
 683 #ifdef HAVE_AS_TLS
 684 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 685 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 686 #endif
 687
 688 #undef TARGET_LEGITIMATE_ADDRESS_P
 689 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 690
 691 #undef TARGET_PREFERRED_RELOAD_CLASS
 692 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 693
 694 #undef TARGET_PROMOTED_TYPE
 695 #define TARGET_PROMOTED_TYPE arm_promoted_type
 696
 697 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 698 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 699
 700 #undef TARGET_COMPUTE_FRAME_LAYOUT
 701 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 702
 703 #undef TARGET_FRAME_POINTER_REQUIRED
 704 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 705
 706 #undef TARGET_CAN_ELIMINATE
 707 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 708
 709 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 710 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 711
 712 #undef TARGET_CLASS_LIKELY_SPILLED_P
 713 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 714
 715 #undef TARGET_VECTORIZE_BUILTINS
 716 #define TARGET_VECTORIZE_BUILTINS
 717
 718 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 719 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 720   arm_builtin_vectorized_function
 721
 722 #undef TARGET_VECTOR_ALIGNMENT
 723 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 724
 725 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 726 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 727   arm_vector_alignment_reachable
 728
 729 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 730 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 731   arm_builtin_support_vector_misalignment
 732
 733 #undef TARGET_PREFERRED_RENAME_CLASS
 734 #define TARGET_PREFERRED_RENAME_CLASS \
 735   arm_preferred_rename_class
 736
 737 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 738 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 739   arm_vectorize_vec_perm_const_ok
 740
 741 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 742 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 743   arm_builtin_vectorization_cost
 744 #undef TARGET_VECTORIZE_ADD_STMT_COST
 745 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 746
 747 #undef TARGET_CANONICALIZE_COMPARISON
 748 #define TARGET_CANONICALIZE_COMPARISON \
 749   arm_canonicalize_comparison
 750
 751 #undef TARGET_ASAN_SHADOW_OFFSET
 752 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 753
 754 #undef MAX_INSN_PER_IT_BLOCK
 755 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 756
 757 #undef TARGET_CAN_USE_DOLOOP_P
 758 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 759
 760 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 761 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 762
 763 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 764 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 765
 766 #undef TARGET_SCHED_FUSION_PRIORITY
 767 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 768
 769 #undef  TARGET_ASM_FUNCTION_SECTION
 770 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 771
 772 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 773 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 774
 775 #undef TARGET_SECTION_TYPE_FLAGS
 776 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 777
 778 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 779 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 780
 781 #undef TARGET_C_EXCESS_PRECISION
 782 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 783
 784 /* Although the architecture reserves bits 0 and 1, only the former is
 785    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 786 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 787 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 788
 789 #undef TARGET_FIXED_CONDITION_CODE_REGS
 790 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 791
 792 #undef TARGET_HARD_REGNO_NREGS
 793 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 794 #undef TARGET_HARD_REGNO_MODE_OK
 795 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 796
 797 #undef TARGET_MODES_TIEABLE_P
 798 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 799
 800 #undef TARGET_CAN_CHANGE_MODE_CLASS
 801 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 802
 803 #undef TARGET_CONSTANT_ALIGNMENT
 804 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 805 \f
 806 /* Obstack for minipool constant handling.  */
 807 static struct obstack minipool_obstack;
 808 static char *         minipool_startobj;
 809
 810 /* The maximum number of insns skipped which
 811    will be conditionalised if possible.  */
 812 static int max_insns_skipped = 5;
 813
 814 extern FILE * asm_out_file;
 815
 816 /* True if we are currently building a constant table.  */
 817 int making_const_table;
 818
 819 /* The processor for which instructions should be scheduled.  */
 820 enum processor_type arm_tune = TARGET_CPU_arm_none;
 821
 822 /* The current tuning set.  */
 823 const struct tune_params *current_tune;
 824
 825 /* Which floating point hardware to schedule for.  */
 826 int arm_fpu_attr;
 827
 828 /* Used for Thumb call_via trampolines.  */
 829 rtx thumb_call_via_label[14];
 830 static int thumb_call_reg_needed;
 831
 832 /* The bits in this mask specify which instruction scheduling options should
 833    be used.  */
 834 unsigned int tune_flags = 0;
 835
 836 /* The highest ARM architecture version supported by the
 837    target.  */
 838 enum base_architecture arm_base_arch = BASE_ARCH_0;
 839
 840 /* Active target architecture and tuning.  */
 841
 842 struct arm_build_target arm_active_target;
 843
 844 /* The following are used in the arm.md file as equivalents to bits
 845    in the above two flag variables.  */
 846
 847 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 848 int arm_arch3m = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 851 int arm_arch4 = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 854 int arm_arch4t = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 857 int arm_arch5 = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 860 int arm_arch5e = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 863 int arm_arch5te = 0;
 864
 865 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 866 int arm_arch6 = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6K extensions.  */
 869 int arm_arch6k = 0;
 870
 871 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 872 int arm_arch6kz = 0;
 873
 874 /* Nonzero if instructions present in ARMv6-M can be used.  */
 875 int arm_arch6m = 0;
 876
 877 /* Nonzero if this chip supports the ARM 7 extensions.  */
 878 int arm_arch7 = 0;
 879
 880 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 881 int arm_arch_lpae = 0;
 882
 883 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 884 int arm_arch_notm = 0;
 885
 886 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 887 int arm_arch7em = 0;
 888
 889 /* Nonzero if instructions present in ARMv8 can be used.  */
 890 int arm_arch8 = 0;
 891
 892 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 893 int arm_arch8_1 = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 896 int arm_arch8_2 = 0;
 897
 898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 899    Architecture 8.2.  */
 900 int arm_fp16_inst = 0;
 901
 902 /* Nonzero if this chip can benefit from load scheduling.  */
 903 int arm_ld_sched = 0;
 904
 905 /* Nonzero if this chip is a StrongARM.  */
 906 int arm_tune_strongarm = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 909 int arm_arch_iwmmxt = 0;
 910
 911 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 912 int arm_arch_iwmmxt2 = 0;
 913
 914 /* Nonzero if this chip is an XScale.  */
 915 int arm_arch_xscale = 0;
 916
 917 /* Nonzero if tuning for XScale  */
 918 int arm_tune_xscale = 0;
 919
 920 /* Nonzero if we want to tune for stores that access the write-buffer.
 921    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 922 int arm_tune_wbuf = 0;
 923
 924 /* Nonzero if tuning for Cortex-A9.  */
 925 int arm_tune_cortex_a9 = 0;
 926
 927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 928    preprocessor.
 929    XXX This is a bit of a hack, it's intended to help work around
 930    problems in GLD which doesn't understand that armv5t code is
 931    interworking clean.  */
 932 int arm_cpp_interwork = 0;
 933
 934 /* Nonzero if chip supports Thumb 1.  */
 935 int arm_arch_thumb1;
 936
 937 /* Nonzero if chip supports Thumb 2.  */
 938 int arm_arch_thumb2;
 939
 940 /* Nonzero if chip supports integer division instruction.  */
 941 int arm_arch_arm_hwdiv;
 942 int arm_arch_thumb_hwdiv;
 943
 944 /* Nonzero if chip disallows volatile memory access in IT block.  */
 945 int arm_arch_no_volatile_ce;
 946
 947 /* Nonzero if we should use Neon to handle 64-bits operations rather
 948    than core registers.  */
 949 int prefer_neon_for_64bits = 0;
 950
 951 /* Nonzero if we shouldn't use literal pools.  */
 952 bool arm_disable_literal_pool = false;
 953
 954 /* The register number to be used for the PIC offset register.  */
 955 unsigned arm_pic_register = INVALID_REGNUM;
 956
 957 enum arm_pcs arm_pcs_default;
 958
 959 /* For an explanation of these variables, see final_prescan_insn below.  */
 960 int arm_ccfsm_state;
 961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 962 enum arm_cond_code arm_current_cc;
 963
 964 rtx arm_target_insn;
 965 int arm_target_label;
 966 /* The number of conditionally executed insns, including the current insn.  */
 967 int arm_condexec_count = 0;
 968 /* A bitmask specifying the patterns for the IT block.
 969    Zero means do not output an IT block before this insn. */
 970 int arm_condexec_mask = 0;
 971 /* The number of bits used in arm_condexec_mask.  */
 972 int arm_condexec_masklen = 0;
 973
 974 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 975 int arm_arch_crc = 0;
 976
 977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 978 int arm_arch_dotprod = 0;
 979
 980 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 981 int arm_arch_cmse = 0;
 982
 983 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 984 int arm_m_profile_small_mul = 0;
 985
 986 /* The condition codes of the ARM, and the inverse function.  */
 987 static const char * const arm_condition_codes[] =
 988 {
 989   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 990   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 991 };
 992
 993 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 994 int arm_regs_in_sequence[] =
 995 {
 996   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 997 };
 998
 999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code.  */
1007
1008 struct cpu_tune
1009 {
1010   enum processor_type scheduler;
1011   unsigned int tune_flags;
1012   const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017   {                                                             \
1018     num_slots,                                                  \
1019     l1_size,                                                    \
1020     l1_line_size                                                \
1021   }
1022
1023 /* arm generic vectorizer costs.  */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026   1,                                    /* scalar_stmt_cost.  */
1027   1,                                    /* scalar load_cost.  */
1028   1,                                    /* scalar_store_cost.  */
1029   1,                                    /* vec_stmt_cost.  */
1030   1,                                    /* vec_to_scalar_cost.  */
1031   1,                                    /* scalar_to_vec_cost.  */
1032   1,                                    /* vec_align_load_cost.  */
1033   1,                                    /* vec_unalign_load_cost.  */
1034   1,                                    /* vec_unalign_store_cost.  */
1035   1,                                    /* vec_store_cost.  */
1036   3,                                    /* cond_taken_branch_cost.  */
1037   1,                                    /* cond_not_taken_branch_cost.  */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047   /* ALU */
1048   {
1049     0,                  /* arith.  */
1050     0,                  /* logical.  */
1051     0,                  /* shift.  */
1052     COSTS_N_INSNS (1),  /* shift_reg.  */
1053     COSTS_N_INSNS (1),  /* arith_shift.  */
1054     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1055     0,                  /* log_shift.  */
1056     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1057     COSTS_N_INSNS (1),  /* extend.  */
1058     COSTS_N_INSNS (2),  /* extend_arith.  */
1059     COSTS_N_INSNS (1),  /* bfi.  */
1060     COSTS_N_INSNS (1),  /* bfx.  */
1061     0,                  /* clz.  */
1062     0,                  /* rev.  */
1063     0,                  /* non_exec.  */
1064     true                /* non_exec_costs_exec.  */
1065   },
1066   {
1067     /* MULT SImode */
1068     {
1069       COSTS_N_INSNS (3),        /* simple.  */
1070       COSTS_N_INSNS (3),        /* flag_setting.  */
1071       COSTS_N_INSNS (2),        /* extend.  */
1072       COSTS_N_INSNS (3),        /* add.  */
1073       COSTS_N_INSNS (2),        /* extend_add.  */
1074       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1075     },
1076     /* MULT DImode */
1077     {
1078       0,                        /* simple (N/A).  */
1079       0,                        /* flag_setting (N/A).  */
1080       COSTS_N_INSNS (4),        /* extend.  */
1081       0,                        /* add (N/A).  */
1082       COSTS_N_INSNS (4),        /* extend_add.  */
1083       0                         /* idiv (N/A).  */
1084     }
1085   },
1086   /* LD/ST */
1087   {
1088     COSTS_N_INSNS (2),  /* load.  */
1089     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1090     COSTS_N_INSNS (2),  /* ldrd.  */
1091     COSTS_N_INSNS (2),  /* ldm_1st.  */
1092     1,                  /* ldm_regs_per_insn_1st.  */
1093     2,                  /* ldm_regs_per_insn_subsequent.  */
1094     COSTS_N_INSNS (5),  /* loadf.  */
1095     COSTS_N_INSNS (5),  /* loadd.  */
1096     COSTS_N_INSNS (1),  /* load_unaligned.  */
1097     COSTS_N_INSNS (2),  /* store.  */
1098     COSTS_N_INSNS (2),  /* strd.  */
1099     COSTS_N_INSNS (2),  /* stm_1st.  */
1100     1,                  /* stm_regs_per_insn_1st.  */
1101     2,                  /* stm_regs_per_insn_subsequent.  */
1102     COSTS_N_INSNS (1),  /* storef.  */
1103     COSTS_N_INSNS (1),  /* stored.  */
1104     COSTS_N_INSNS (1),  /* store_unaligned.  */
1105     COSTS_N_INSNS (1),  /* loadv.  */
1106     COSTS_N_INSNS (1)   /* storev.  */
1107   },
1108   {
1109     /* FP SFmode */
1110     {
1111       COSTS_N_INSNS (14),       /* div.  */
1112       COSTS_N_INSNS (4),        /* mult.  */
1113       COSTS_N_INSNS (7),        /* mult_addsub. */
1114       COSTS_N_INSNS (30),       /* fma.  */
1115       COSTS_N_INSNS (3),        /* addsub.  */
1116       COSTS_N_INSNS (1),        /* fpconst.  */
1117       COSTS_N_INSNS (1),        /* neg.  */
1118       COSTS_N_INSNS (3),        /* compare.  */
1119       COSTS_N_INSNS (3),        /* widen.  */
1120       COSTS_N_INSNS (3),        /* narrow.  */
1121       COSTS_N_INSNS (3),        /* toint.  */
1122       COSTS_N_INSNS (3),        /* fromint.  */
1123       COSTS_N_INSNS (3)         /* roundint.  */
1124     },
1125     /* FP DFmode */
1126     {
1127       COSTS_N_INSNS (24),       /* div.  */
1128       COSTS_N_INSNS (5),        /* mult.  */
1129       COSTS_N_INSNS (8),        /* mult_addsub.  */
1130       COSTS_N_INSNS (30),       /* fma.  */
1131       COSTS_N_INSNS (3),        /* addsub.  */
1132       COSTS_N_INSNS (1),        /* fpconst.  */
1133       COSTS_N_INSNS (1),        /* neg.  */
1134       COSTS_N_INSNS (3),        /* compare.  */
1135       COSTS_N_INSNS (3),        /* widen.  */
1136       COSTS_N_INSNS (3),        /* narrow.  */
1137       COSTS_N_INSNS (3),        /* toint.  */
1138       COSTS_N_INSNS (3),        /* fromint.  */
1139       COSTS_N_INSNS (3)         /* roundint.  */
1140     }
1141   },
1142   /* Vector */
1143   {
1144     COSTS_N_INSNS (1)   /* alu.  */
1145   }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150   /* ALU */
1151   {
1152     0,                  /* arith.  */
1153     0,                  /* logical.  */
1154     COSTS_N_INSNS (1),  /* shift.  */
1155     0,                  /* shift_reg.  */
1156     COSTS_N_INSNS (1),  /* arith_shift.  */
1157     0,                  /* arith_shift_reg.  */
1158     COSTS_N_INSNS (1),  /* log_shift.  */
1159     0,                  /* log_shift_reg.  */
1160     0,                  /* extend.  */
1161     0,                  /* extend_arith.  */
1162     0,                  /* bfi.  */
1163     0,                  /* bfx.  */
1164     0,                  /* clz.  */
1165     0,                  /* rev.  */
1166     0,                  /* non_exec.  */
1167     true                /* non_exec_costs_exec.  */
1168   },
1169   {
1170     /* MULT SImode */
1171     {
1172       COSTS_N_INSNS (1),        /* simple.  */
1173       COSTS_N_INSNS (1),        /* flag_setting.  */
1174       COSTS_N_INSNS (1),        /* extend.  */
1175       COSTS_N_INSNS (1),        /* add.  */
1176       COSTS_N_INSNS (1),        /* extend_add.  */
1177       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1178     },
1179     /* MULT DImode */
1180     {
1181       0,                        /* simple (N/A).  */
1182       0,                        /* flag_setting (N/A).  */
1183       COSTS_N_INSNS (2),        /* extend.  */
1184       0,                        /* add (N/A).  */
1185       COSTS_N_INSNS (2),        /* extend_add.  */
1186       0                         /* idiv (N/A).  */
1187     }
1188   },
1189   /* LD/ST */
1190   {
1191     COSTS_N_INSNS (1),  /* load.  */
1192     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1193     COSTS_N_INSNS (1),  /* ldrd.  */
1194     COSTS_N_INSNS (1),  /* ldm_1st.  */
1195     1,                  /* ldm_regs_per_insn_1st.  */
1196     2,                  /* ldm_regs_per_insn_subsequent.  */
1197     COSTS_N_INSNS (1),  /* loadf.  */
1198     COSTS_N_INSNS (1),  /* loadd.  */
1199     COSTS_N_INSNS (1),  /* load_unaligned.  */
1200     COSTS_N_INSNS (1),  /* store.  */
1201     COSTS_N_INSNS (1),  /* strd.  */
1202     COSTS_N_INSNS (1),  /* stm_1st.  */
1203     1,                  /* stm_regs_per_insn_1st.  */
1204     2,                  /* stm_regs_per_insn_subsequent.  */
1205     COSTS_N_INSNS (1),  /* storef.  */
1206     COSTS_N_INSNS (1),  /* stored.  */
1207     COSTS_N_INSNS (1),  /* store_unaligned.  */
1208     COSTS_N_INSNS (1),  /* loadv.  */
1209     COSTS_N_INSNS (1)   /* storev.  */
1210   },
1211   {
1212     /* FP SFmode */
1213     {
1214       COSTS_N_INSNS (36),       /* div.  */
1215       COSTS_N_INSNS (11),       /* mult.  */
1216       COSTS_N_INSNS (20),       /* mult_addsub. */
1217       COSTS_N_INSNS (30),       /* fma.  */
1218       COSTS_N_INSNS (9),        /* addsub.  */
1219       COSTS_N_INSNS (3),        /* fpconst.  */
1220       COSTS_N_INSNS (3),        /* neg.  */
1221       COSTS_N_INSNS (6),        /* compare.  */
1222       COSTS_N_INSNS (4),        /* widen.  */
1223       COSTS_N_INSNS (4),        /* narrow.  */
1224       COSTS_N_INSNS (8),        /* toint.  */
1225       COSTS_N_INSNS (8),        /* fromint.  */
1226       COSTS_N_INSNS (8)         /* roundint.  */
1227     },
1228     /* FP DFmode */
1229     {
1230       COSTS_N_INSNS (64),       /* div.  */
1231       COSTS_N_INSNS (16),       /* mult.  */
1232       COSTS_N_INSNS (25),       /* mult_addsub.  */
1233       COSTS_N_INSNS (30),       /* fma.  */
1234       COSTS_N_INSNS (9),        /* addsub.  */
1235       COSTS_N_INSNS (3),        /* fpconst.  */
1236       COSTS_N_INSNS (3),        /* neg.  */
1237       COSTS_N_INSNS (6),        /* compare.  */
1238       COSTS_N_INSNS (6),        /* widen.  */
1239       COSTS_N_INSNS (6),        /* narrow.  */
1240       COSTS_N_INSNS (8),        /* toint.  */
1241       COSTS_N_INSNS (8),        /* fromint.  */
1242       COSTS_N_INSNS (8)         /* roundint.  */
1243     }
1244   },
1245   /* Vector */
1246   {
1247     COSTS_N_INSNS (1)   /* alu.  */
1248   }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253   /* ALU */
1254   {
1255     0,                  /* arith.  */
1256     0,                  /* logical.  */
1257     COSTS_N_INSNS (1),  /* shift.  */
1258     COSTS_N_INSNS (1),  /* shift_reg.  */
1259     COSTS_N_INSNS (1),  /* arith_shift.  */
1260     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1261     COSTS_N_INSNS (1),  /* log_shift.  */
1262     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1263     COSTS_N_INSNS (1),  /* extend.  */
1264     COSTS_N_INSNS (1),  /* extend_arith.  */
1265     COSTS_N_INSNS (1),  /* bfi.  */
1266     COSTS_N_INSNS (1),  /* bfx.  */
1267     COSTS_N_INSNS (1),  /* clz.  */
1268     COSTS_N_INSNS (1),  /* rev.  */
1269     0,                  /* non_exec.  */
1270     true                /* non_exec_costs_exec.  */
1271   },
1272
1273   {
1274     /* MULT SImode */
1275     {
1276       0,                        /* simple.  */
1277       COSTS_N_INSNS (1),        /* flag_setting.  */
1278       COSTS_N_INSNS (1),        /* extend.  */
1279       COSTS_N_INSNS (1),        /* add.  */
1280       COSTS_N_INSNS (1),        /* extend_add.  */
1281       COSTS_N_INSNS (7)         /* idiv.  */
1282     },
1283     /* MULT DImode */
1284     {
1285       0,                        /* simple (N/A).  */
1286       0,                        /* flag_setting (N/A).  */
1287       COSTS_N_INSNS (1),        /* extend.  */
1288       0,                        /* add.  */
1289       COSTS_N_INSNS (2),        /* extend_add.  */
1290       0                         /* idiv (N/A).  */
1291     }
1292   },
1293   /* LD/ST */
1294   {
1295     COSTS_N_INSNS (1),  /* load.  */
1296     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1297     COSTS_N_INSNS (6),  /* ldrd.  */
1298     COSTS_N_INSNS (1),  /* ldm_1st.  */
1299     1,                  /* ldm_regs_per_insn_1st.  */
1300     2,                  /* ldm_regs_per_insn_subsequent.  */
1301     COSTS_N_INSNS (2),  /* loadf.  */
1302     COSTS_N_INSNS (4),  /* loadd.  */
1303     COSTS_N_INSNS (1),  /* load_unaligned.  */
1304     COSTS_N_INSNS (1),  /* store.  */
1305     COSTS_N_INSNS (3),  /* strd.  */
1306     COSTS_N_INSNS (1),  /* stm_1st.  */
1307     1,                  /* stm_regs_per_insn_1st.  */
1308     2,                  /* stm_regs_per_insn_subsequent.  */
1309     COSTS_N_INSNS (2),  /* storef.  */
1310     COSTS_N_INSNS (2),  /* stored.  */
1311     COSTS_N_INSNS (1),  /* store_unaligned.  */
1312     COSTS_N_INSNS (1),  /* loadv.  */
1313     COSTS_N_INSNS (1)   /* storev.  */
1314   },
1315   {
1316     /* FP SFmode */
1317     {
1318       COSTS_N_INSNS (15),       /* div.  */
1319       COSTS_N_INSNS (3),        /* mult.  */
1320       COSTS_N_INSNS (7),        /* mult_addsub. */
1321       COSTS_N_INSNS (7),        /* fma.  */
1322       COSTS_N_INSNS (3),        /* addsub.  */
1323       COSTS_N_INSNS (3),        /* fpconst.  */
1324       COSTS_N_INSNS (3),        /* neg.  */
1325       COSTS_N_INSNS (3),        /* compare.  */
1326       COSTS_N_INSNS (3),        /* widen.  */
1327       COSTS_N_INSNS (3),        /* narrow.  */
1328       COSTS_N_INSNS (3),        /* toint.  */
1329       COSTS_N_INSNS (3),        /* fromint.  */
1330       COSTS_N_INSNS (3)         /* roundint.  */
1331     },
1332     /* FP DFmode */
1333     {
1334       COSTS_N_INSNS (30),       /* div.  */
1335       COSTS_N_INSNS (6),        /* mult.  */
1336       COSTS_N_INSNS (10),       /* mult_addsub.  */
1337       COSTS_N_INSNS (7),        /* fma.  */
1338       COSTS_N_INSNS (3),        /* addsub.  */
1339       COSTS_N_INSNS (3),        /* fpconst.  */
1340       COSTS_N_INSNS (3),        /* neg.  */
1341       COSTS_N_INSNS (3),        /* compare.  */
1342       COSTS_N_INSNS (3),        /* widen.  */
1343       COSTS_N_INSNS (3),        /* narrow.  */
1344       COSTS_N_INSNS (3),        /* toint.  */
1345       COSTS_N_INSNS (3),        /* fromint.  */
1346       COSTS_N_INSNS (3)         /* roundint.  */
1347     }
1348   },
1349   /* Vector */
1350   {
1351     COSTS_N_INSNS (1)   /* alu.  */
1352   }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358   /* ALU */
1359   {
1360     0,                  /* arith.  */
1361     0,                  /* logical.  */
1362     COSTS_N_INSNS (1),  /* shift.  */
1363     COSTS_N_INSNS (1),  /* shift_reg.  */
1364     COSTS_N_INSNS (1),  /* arith_shift.  */
1365     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1366     COSTS_N_INSNS (1),  /* log_shift.  */
1367     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1368     COSTS_N_INSNS (1),  /* extend.  */
1369     COSTS_N_INSNS (1),  /* extend_arith.  */
1370     COSTS_N_INSNS (1),  /* bfi.  */
1371     COSTS_N_INSNS (1),  /* bfx.  */
1372     COSTS_N_INSNS (1),  /* clz.  */
1373     COSTS_N_INSNS (1),  /* rev.  */
1374     0,                  /* non_exec.  */
1375     true                /* non_exec_costs_exec.  */
1376   },
1377
1378   {
1379     /* MULT SImode */
1380     {
1381       0,                        /* simple.  */
1382       COSTS_N_INSNS (1),        /* flag_setting.  */
1383       COSTS_N_INSNS (1),        /* extend.  */
1384       COSTS_N_INSNS (1),        /* add.  */
1385       COSTS_N_INSNS (1),        /* extend_add.  */
1386       COSTS_N_INSNS (7)         /* idiv.  */
1387     },
1388     /* MULT DImode */
1389     {
1390       0,                        /* simple (N/A).  */
1391       0,                        /* flag_setting (N/A).  */
1392       COSTS_N_INSNS (1),        /* extend.  */
1393       0,                        /* add.  */
1394       COSTS_N_INSNS (2),        /* extend_add.  */
1395       0                         /* idiv (N/A).  */
1396     }
1397   },
1398   /* LD/ST */
1399   {
1400     COSTS_N_INSNS (1),  /* load.  */
1401     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1402     COSTS_N_INSNS (3),  /* ldrd.  */
1403     COSTS_N_INSNS (1),  /* ldm_1st.  */
1404     1,                  /* ldm_regs_per_insn_1st.  */
1405     2,                  /* ldm_regs_per_insn_subsequent.  */
1406     COSTS_N_INSNS (2),  /* loadf.  */
1407     COSTS_N_INSNS (2),  /* loadd.  */
1408     COSTS_N_INSNS (1),  /* load_unaligned.  */
1409     COSTS_N_INSNS (1),  /* store.  */
1410     COSTS_N_INSNS (3),  /* strd.  */
1411     COSTS_N_INSNS (1),  /* stm_1st.  */
1412     1,                  /* stm_regs_per_insn_1st.  */
1413     2,                  /* stm_regs_per_insn_subsequent.  */
1414     COSTS_N_INSNS (2),  /* storef.  */
1415     COSTS_N_INSNS (2),  /* stored.  */
1416     COSTS_N_INSNS (1),  /* store_unaligned.  */
1417     COSTS_N_INSNS (1),  /* loadv.  */
1418     COSTS_N_INSNS (1)   /* storev.  */
1419   },
1420   {
1421     /* FP SFmode */
1422     {
1423       COSTS_N_INSNS (15),       /* div.  */
1424       COSTS_N_INSNS (3),        /* mult.  */
1425       COSTS_N_INSNS (7),        /* mult_addsub. */
1426       COSTS_N_INSNS (7),        /* fma.  */
1427       COSTS_N_INSNS (3),        /* addsub.  */
1428       COSTS_N_INSNS (3),        /* fpconst.  */
1429       COSTS_N_INSNS (3),        /* neg.  */
1430       COSTS_N_INSNS (3),        /* compare.  */
1431       COSTS_N_INSNS (3),        /* widen.  */
1432       COSTS_N_INSNS (3),        /* narrow.  */
1433       COSTS_N_INSNS (3),        /* toint.  */
1434       COSTS_N_INSNS (3),        /* fromint.  */
1435       COSTS_N_INSNS (3)         /* roundint.  */
1436     },
1437     /* FP DFmode */
1438     {
1439       COSTS_N_INSNS (30),       /* div.  */
1440       COSTS_N_INSNS (6),        /* mult.  */
1441       COSTS_N_INSNS (10),       /* mult_addsub.  */
1442       COSTS_N_INSNS (7),        /* fma.  */
1443       COSTS_N_INSNS (3),        /* addsub.  */
1444       COSTS_N_INSNS (3),        /* fpconst.  */
1445       COSTS_N_INSNS (3),        /* neg.  */
1446       COSTS_N_INSNS (3),        /* compare.  */
1447       COSTS_N_INSNS (3),        /* widen.  */
1448       COSTS_N_INSNS (3),        /* narrow.  */
1449       COSTS_N_INSNS (3),        /* toint.  */
1450       COSTS_N_INSNS (3),        /* fromint.  */
1451       COSTS_N_INSNS (3)         /* roundint.  */
1452     }
1453   },
1454   /* Vector */
1455   {
1456     COSTS_N_INSNS (1)   /* alu.  */
1457   }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462   /* ALU */
1463   {
1464     0,                  /* arith.  */
1465     0,                  /* logical.  */
1466     0,                  /* shift.  */
1467     COSTS_N_INSNS (1),  /* shift_reg.  */
1468     COSTS_N_INSNS (1),  /* arith_shift.  */
1469     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1470     COSTS_N_INSNS (1),  /* log_shift.  */
1471     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1472     0,                  /* extend.  */
1473     COSTS_N_INSNS (1),  /* extend_arith.  */
1474     0,                  /* bfi.  */
1475     COSTS_N_INSNS (1),  /* bfx.  */
1476     COSTS_N_INSNS (1),  /* clz.  */
1477     COSTS_N_INSNS (1),  /* rev.  */
1478     0,                  /* non_exec.  */
1479     true                /* non_exec_costs_exec.  */
1480   },
1481   /* MULT SImode */
1482   {
1483     {
1484       COSTS_N_INSNS (2),        /* simple.  */
1485       COSTS_N_INSNS (3),        /* flag_setting.  */
1486       COSTS_N_INSNS (2),        /* extend.  */
1487       COSTS_N_INSNS (3),        /* add.  */
1488       COSTS_N_INSNS (2),        /* extend_add.  */
1489       COSTS_N_INSNS (18)        /* idiv.  */
1490     },
1491     /* MULT DImode */
1492     {
1493       0,                        /* simple (N/A).  */
1494       0,                        /* flag_setting (N/A).  */
1495       COSTS_N_INSNS (3),        /* extend.  */
1496       0,                        /* add (N/A).  */
1497       COSTS_N_INSNS (3),        /* extend_add.  */
1498       0                         /* idiv (N/A).  */
1499     }
1500   },
1501   /* LD/ST */
1502   {
1503     COSTS_N_INSNS (3),  /* load.  */
1504     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1505     COSTS_N_INSNS (3),  /* ldrd.  */
1506     COSTS_N_INSNS (3),  /* ldm_1st.  */
1507     1,                  /* ldm_regs_per_insn_1st.  */
1508     2,                  /* ldm_regs_per_insn_subsequent.  */
1509     COSTS_N_INSNS (3),  /* loadf.  */
1510     COSTS_N_INSNS (3),  /* loadd.  */
1511     0,                  /* load_unaligned.  */
1512     0,                  /* store.  */
1513     0,                  /* strd.  */
1514     0,                  /* stm_1st.  */
1515     1,                  /* stm_regs_per_insn_1st.  */
1516     2,                  /* stm_regs_per_insn_subsequent.  */
1517     COSTS_N_INSNS (2),  /* storef.  */
1518     COSTS_N_INSNS (2),  /* stored.  */
1519     0,                  /* store_unaligned.  */
1520     COSTS_N_INSNS (1),  /* loadv.  */
1521     COSTS_N_INSNS (1)   /* storev.  */
1522   },
1523   {
1524     /* FP SFmode */
1525     {
1526       COSTS_N_INSNS (17),       /* div.  */
1527       COSTS_N_INSNS (4),        /* mult.  */
1528       COSTS_N_INSNS (8),        /* mult_addsub. */
1529       COSTS_N_INSNS (8),        /* fma.  */
1530       COSTS_N_INSNS (4),        /* addsub.  */
1531       COSTS_N_INSNS (2),        /* fpconst. */
1532       COSTS_N_INSNS (2),        /* neg.  */
1533       COSTS_N_INSNS (2),        /* compare.  */
1534       COSTS_N_INSNS (4),        /* widen.  */
1535       COSTS_N_INSNS (4),        /* narrow.  */
1536       COSTS_N_INSNS (4),        /* toint.  */
1537       COSTS_N_INSNS (4),        /* fromint.  */
1538       COSTS_N_INSNS (4)         /* roundint.  */
1539     },
1540     /* FP DFmode */
1541     {
1542       COSTS_N_INSNS (31),       /* div.  */
1543       COSTS_N_INSNS (4),        /* mult.  */
1544       COSTS_N_INSNS (8),        /* mult_addsub.  */
1545       COSTS_N_INSNS (8),        /* fma.  */
1546       COSTS_N_INSNS (4),        /* addsub.  */
1547       COSTS_N_INSNS (2),        /* fpconst.  */
1548       COSTS_N_INSNS (2),        /* neg.  */
1549       COSTS_N_INSNS (2),        /* compare.  */
1550       COSTS_N_INSNS (4),        /* widen.  */
1551       COSTS_N_INSNS (4),        /* narrow.  */
1552       COSTS_N_INSNS (4),        /* toint.  */
1553       COSTS_N_INSNS (4),        /* fromint.  */
1554       COSTS_N_INSNS (4)         /* roundint.  */
1555     }
1556   },
1557   /* Vector */
1558   {
1559     COSTS_N_INSNS (1)   /* alu.  */
1560   }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565   /* ALU */
1566   {
1567     0,                  /* arith.  */
1568     0,                  /* logical.  */
1569     0,                  /* shift.  */
1570     0,                  /* shift_reg.  */
1571     COSTS_N_INSNS (1),  /* arith_shift.  */
1572     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1573     COSTS_N_INSNS (1),  /* log_shift.  */
1574     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1575     0,                  /* extend.  */
1576     COSTS_N_INSNS (1),  /* extend_arith.  */
1577     COSTS_N_INSNS (1),  /* bfi.  */
1578     0,                  /* bfx.  */
1579     0,                  /* clz.  */
1580     0,                  /* rev.  */
1581     0,                  /* non_exec.  */
1582     true                /* non_exec_costs_exec.  */
1583   },
1584   /* MULT SImode */
1585   {
1586     {
1587       COSTS_N_INSNS (2),        /* simple.  */
1588       COSTS_N_INSNS (3),        /* flag_setting.  */
1589       COSTS_N_INSNS (2),        /* extend.  */
1590       COSTS_N_INSNS (2),        /* add.  */
1591       COSTS_N_INSNS (2),        /* extend_add.  */
1592       COSTS_N_INSNS (18)        /* idiv.  */
1593     },
1594     /* MULT DImode */
1595     {
1596       0,                        /* simple (N/A).  */
1597       0,                        /* flag_setting (N/A).  */
1598       COSTS_N_INSNS (3),        /* extend.  */
1599       0,                        /* add (N/A).  */
1600       COSTS_N_INSNS (3),        /* extend_add.  */
1601       0                         /* idiv (N/A).  */
1602     }
1603   },
1604   /* LD/ST */
1605   {
1606     COSTS_N_INSNS (3),  /* load.  */
1607     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1608     COSTS_N_INSNS (3),  /* ldrd.  */
1609     COSTS_N_INSNS (4),  /* ldm_1st.  */
1610     1,                  /* ldm_regs_per_insn_1st.  */
1611     2,                  /* ldm_regs_per_insn_subsequent.  */
1612     COSTS_N_INSNS (4),  /* loadf.  */
1613     COSTS_N_INSNS (4),  /* loadd.  */
1614     0,                  /* load_unaligned.  */
1615     0,                  /* store.  */
1616     0,                  /* strd.  */
1617     COSTS_N_INSNS (1),  /* stm_1st.  */
1618     1,                  /* stm_regs_per_insn_1st.  */
1619     2,                  /* stm_regs_per_insn_subsequent.  */
1620     0,                  /* storef.  */
1621     0,                  /* stored.  */
1622     0,                  /* store_unaligned.  */
1623     COSTS_N_INSNS (1),  /* loadv.  */
1624     COSTS_N_INSNS (1)   /* storev.  */
1625   },
1626   {
1627     /* FP SFmode */
1628     {
1629       COSTS_N_INSNS (17),       /* div.  */
1630       COSTS_N_INSNS (4),        /* mult.  */
1631       COSTS_N_INSNS (8),        /* mult_addsub. */
1632       COSTS_N_INSNS (8),        /* fma.  */
1633       COSTS_N_INSNS (4),        /* addsub.  */
1634       COSTS_N_INSNS (2),        /* fpconst. */
1635       COSTS_N_INSNS (2),        /* neg.  */
1636       COSTS_N_INSNS (5),        /* compare.  */
1637       COSTS_N_INSNS (4),        /* widen.  */
1638       COSTS_N_INSNS (4),        /* narrow.  */
1639       COSTS_N_INSNS (4),        /* toint.  */
1640       COSTS_N_INSNS (4),        /* fromint.  */
1641       COSTS_N_INSNS (4)         /* roundint.  */
1642     },
1643     /* FP DFmode */
1644     {
1645       COSTS_N_INSNS (31),       /* div.  */
1646       COSTS_N_INSNS (4),        /* mult.  */
1647       COSTS_N_INSNS (8),        /* mult_addsub.  */
1648       COSTS_N_INSNS (8),        /* fma.  */
1649       COSTS_N_INSNS (4),        /* addsub.  */
1650       COSTS_N_INSNS (2),        /* fpconst.  */
1651       COSTS_N_INSNS (2),        /* neg.  */
1652       COSTS_N_INSNS (2),        /* compare.  */
1653       COSTS_N_INSNS (4),        /* widen.  */
1654       COSTS_N_INSNS (4),        /* narrow.  */
1655       COSTS_N_INSNS (4),        /* toint.  */
1656       COSTS_N_INSNS (4),        /* fromint.  */
1657       COSTS_N_INSNS (4)         /* roundint.  */
1658     }
1659   },
1660   /* Vector */
1661   {
1662     COSTS_N_INSNS (1)   /* alu.  */
1663   }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668   /* ALU */
1669   {
1670     0,                  /* arith.  */
1671     0,                  /* logical.  */
1672     0,                  /* shift.  */
1673     0,                  /* shift_reg.  */
1674     0,                  /* arith_shift.  */
1675     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1676     0,                  /* log_shift.  */
1677     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1678     0,                  /* extend.  */
1679     COSTS_N_INSNS (1),  /* extend_arith.  */
1680     0,                  /* bfi.  */
1681     0,                  /* bfx.  */
1682     0,                  /* clz.  */
1683     0,                  /* rev.  */
1684     COSTS_N_INSNS (1),  /* non_exec.  */
1685     false               /* non_exec_costs_exec.  */
1686   },
1687   {
1688     /* MULT SImode */
1689     {
1690       COSTS_N_INSNS (1),        /* simple.  */
1691       COSTS_N_INSNS (1),        /* flag_setting.  */
1692       COSTS_N_INSNS (2),        /* extend.  */
1693       COSTS_N_INSNS (1),        /* add.  */
1694       COSTS_N_INSNS (3),        /* extend_add.  */
1695       COSTS_N_INSNS (8)         /* idiv.  */
1696     },
1697     /* MULT DImode */
1698     {
1699       0,                        /* simple (N/A).  */
1700       0,                        /* flag_setting (N/A).  */
1701       COSTS_N_INSNS (2),        /* extend.  */
1702       0,                        /* add (N/A).  */
1703       COSTS_N_INSNS (3),        /* extend_add.  */
1704       0                         /* idiv (N/A).  */
1705     }
1706   },
1707   /* LD/ST */
1708   {
1709     COSTS_N_INSNS (2),  /* load.  */
1710     0,                  /* load_sign_extend.  */
1711     COSTS_N_INSNS (3),  /* ldrd.  */
1712     COSTS_N_INSNS (2),  /* ldm_1st.  */
1713     1,                  /* ldm_regs_per_insn_1st.  */
1714     1,                  /* ldm_regs_per_insn_subsequent.  */
1715     COSTS_N_INSNS (2),  /* loadf.  */
1716     COSTS_N_INSNS (3),  /* loadd.  */
1717     COSTS_N_INSNS (1),  /* load_unaligned.  */
1718     COSTS_N_INSNS (2),  /* store.  */
1719     COSTS_N_INSNS (3),  /* strd.  */
1720     COSTS_N_INSNS (2),  /* stm_1st.  */
1721     1,                  /* stm_regs_per_insn_1st.  */
1722     1,                  /* stm_regs_per_insn_subsequent.  */
1723     COSTS_N_INSNS (2),  /* storef.  */
1724     COSTS_N_INSNS (3),  /* stored.  */
1725     COSTS_N_INSNS (1),  /* store_unaligned.  */
1726     COSTS_N_INSNS (1),  /* loadv.  */
1727     COSTS_N_INSNS (1)   /* storev.  */
1728   },
1729   {
1730     /* FP SFmode */
1731     {
1732       COSTS_N_INSNS (7),        /* div.  */
1733       COSTS_N_INSNS (2),        /* mult.  */
1734       COSTS_N_INSNS (5),        /* mult_addsub.  */
1735       COSTS_N_INSNS (3),        /* fma.  */
1736       COSTS_N_INSNS (1),        /* addsub.  */
1737       0,                        /* fpconst.  */
1738       0,                        /* neg.  */
1739       0,                        /* compare.  */
1740       0,                        /* widen.  */
1741       0,                        /* narrow.  */
1742       0,                        /* toint.  */
1743       0,                        /* fromint.  */
1744       0                         /* roundint.  */
1745     },
1746     /* FP DFmode */
1747     {
1748       COSTS_N_INSNS (15),       /* div.  */
1749       COSTS_N_INSNS (5),        /* mult.  */
1750       COSTS_N_INSNS (7),        /* mult_addsub.  */
1751       COSTS_N_INSNS (7),        /* fma.  */
1752       COSTS_N_INSNS (3),        /* addsub.  */
1753       0,                        /* fpconst.  */
1754       0,                        /* neg.  */
1755       0,                        /* compare.  */
1756       0,                        /* widen.  */
1757       0,                        /* narrow.  */
1758       0,                        /* toint.  */
1759       0,                        /* fromint.  */
1760       0                         /* roundint.  */
1761     }
1762   },
1763   /* Vector */
1764   {
1765     COSTS_N_INSNS (1)   /* alu.  */
1766   }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771   /* int.  */
1772   {
1773     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1774     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1775     COSTS_N_INSNS (0)   /* AMO_WB.  */
1776   },
1777   /* float.  */
1778   {
1779     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)   /* AMO_WB.  */
1782   },
1783   /* vector.  */
1784   {
1785     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)   /* AMO_WB.  */
1788   }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793   &generic_extra_costs,                 /* Insn extra costs.  */
1794   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1795   NULL,                                 /* Sched adj cost.  */
1796   arm_default_branch_cost,
1797   &arm_default_vec_cost,
1798   3,                                            /* Constant limit.  */
1799   5,                                            /* Max cond insns.  */
1800   8,                                            /* Memset max inline.  */
1801   1,                                            /* Issue rate.  */
1802   ARM_PREFETCH_NOT_BENEFICIAL,
1803   tune_params::PREF_CONST_POOL_TRUE,
1804   tune_params::PREF_LDRD_FALSE,
1805   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1806   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1807   tune_params::DISPARAGE_FLAGS_NEITHER,
1808   tune_params::PREF_NEON_64_FALSE,
1809   tune_params::PREF_NEON_STRINGOPS_FALSE,
1810   tune_params::FUSE_NOTHING,
1811   tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816   &generic_extra_costs,                 /* Insn extra costs.  */
1817   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1818   NULL,                                 /* Sched adj cost.  */
1819   arm_default_branch_cost,
1820   &arm_default_vec_cost,
1821   1,                                            /* Constant limit.  */
1822   5,                                            /* Max cond insns.  */
1823   8,                                            /* Memset max inline.  */
1824   1,                                            /* Issue rate.  */
1825   ARM_PREFETCH_NOT_BENEFICIAL,
1826   tune_params::PREF_CONST_POOL_TRUE,
1827   tune_params::PREF_LDRD_FALSE,
1828   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1829   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1830   tune_params::DISPARAGE_FLAGS_NEITHER,
1831   tune_params::PREF_NEON_64_FALSE,
1832   tune_params::PREF_NEON_STRINGOPS_FALSE,
1833   tune_params::FUSE_NOTHING,
1834   tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842   &generic_extra_costs,                 /* Insn extra costs.  */
1843   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1844   NULL,                                 /* Sched adj cost.  */
1845   arm_default_branch_cost,
1846   &arm_default_vec_cost,
1847   1,                                            /* Constant limit.  */
1848   3,                                            /* Max cond insns.  */
1849   8,                                            /* Memset max inline.  */
1850   1,                                            /* Issue rate.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   tune_params::PREF_CONST_POOL_TRUE,
1853   tune_params::PREF_LDRD_FALSE,
1854   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1855   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1856   tune_params::DISPARAGE_FLAGS_NEITHER,
1857   tune_params::PREF_NEON_64_FALSE,
1858   tune_params::PREF_NEON_STRINGOPS_FALSE,
1859   tune_params::FUSE_NOTHING,
1860   tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865   &generic_extra_costs,                 /* Insn extra costs.  */
1866   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1867   xscale_sched_adjust_cost,
1868   arm_default_branch_cost,
1869   &arm_default_vec_cost,
1870   2,                                            /* Constant limit.  */
1871   3,                                            /* Max cond insns.  */
1872   8,                                            /* Memset max inline.  */
1873   1,                                            /* Issue rate.  */
1874   ARM_PREFETCH_NOT_BENEFICIAL,
1875   tune_params::PREF_CONST_POOL_TRUE,
1876   tune_params::PREF_LDRD_FALSE,
1877   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1878   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1879   tune_params::DISPARAGE_FLAGS_NEITHER,
1880   tune_params::PREF_NEON_64_FALSE,
1881   tune_params::PREF_NEON_STRINGOPS_FALSE,
1882   tune_params::FUSE_NOTHING,
1883   tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888   &generic_extra_costs,                 /* Insn extra costs.  */
1889   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1890   NULL,                                 /* Sched adj cost.  */
1891   arm_default_branch_cost,
1892   &arm_default_vec_cost,
1893   1,                                            /* Constant limit.  */
1894   5,                                            /* Max cond insns.  */
1895   8,                                            /* Memset max inline.  */
1896   1,                                            /* Issue rate.  */
1897   ARM_PREFETCH_NOT_BENEFICIAL,
1898   tune_params::PREF_CONST_POOL_TRUE,
1899   tune_params::PREF_LDRD_FALSE,
1900   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1902   tune_params::DISPARAGE_FLAGS_NEITHER,
1903   tune_params::PREF_NEON_64_FALSE,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911   &generic_extra_costs,                 /* Insn extra costs.  */
1912   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1913   NULL,                                 /* Sched adj cost.  */
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   1,                                            /* Constant limit.  */
1917   5,                                            /* Max cond insns.  */
1918   8,                                            /* Memset max inline.  */
1919   2,                                            /* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_64_FALSE,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934   &generic_extra_costs,                 /* Insn extra costs.  */
1935   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1936   NULL,                                 /* Sched adj cost.  */
1937   arm_default_branch_cost,
1938   &arm_default_vec_cost,
1939   1,                                            /* Constant limit.  */
1940   5,                                            /* Max cond insns.  */
1941   8,                                            /* Memset max inline.  */
1942   1,                                            /* Issue rate.  */
1943   ARM_PREFETCH_NOT_BENEFICIAL,
1944   tune_params::PREF_CONST_POOL_FALSE,
1945   tune_params::PREF_LDRD_FALSE,
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1947   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1948   tune_params::DISPARAGE_FLAGS_NEITHER,
1949   tune_params::PREF_NEON_64_FALSE,
1950   tune_params::PREF_NEON_STRINGOPS_FALSE,
1951   tune_params::FUSE_NOTHING,
1952   tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959   &generic_extra_costs,
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   NULL,                                 /* Sched adj cost.  */
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   1,                                            /* Constant limit.  */
1965   5,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   2,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_FALSE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_64_FALSE,
1975   tune_params::PREF_NEON_STRINGOPS_FALSE,
1976   tune_params::FUSE_NOTHING,
1977   tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982   &cortexa8_extra_costs,
1983   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1984   NULL,                                 /* Sched adj cost.  */
1985   arm_default_branch_cost,
1986   &arm_default_vec_cost,
1987   1,                                            /* Constant limit.  */
1988   5,                                            /* Max cond insns.  */
1989   8,                                            /* Memset max inline.  */
1990   2,                                            /* Issue rate.  */
1991   ARM_PREFETCH_NOT_BENEFICIAL,
1992   tune_params::PREF_CONST_POOL_FALSE,
1993   tune_params::PREF_LDRD_FALSE,
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1995   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1996   tune_params::DISPARAGE_FLAGS_NEITHER,
1997   tune_params::PREF_NEON_64_FALSE,
1998   tune_params::PREF_NEON_STRINGOPS_TRUE,
1999   tune_params::FUSE_NOTHING,
2000   tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005   &cortexa7_extra_costs,
2006   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2007   NULL,                                 /* Sched adj cost.  */
2008   arm_default_branch_cost,
2009   &arm_default_vec_cost,
2010   1,                                            /* Constant limit.  */
2011   5,                                            /* Max cond insns.  */
2012   8,                                            /* Memset max inline.  */
2013   2,                                            /* Issue rate.  */
2014   ARM_PREFETCH_NOT_BENEFICIAL,
2015   tune_params::PREF_CONST_POOL_FALSE,
2016   tune_params::PREF_LDRD_FALSE,
2017   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2018   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2019   tune_params::DISPARAGE_FLAGS_NEITHER,
2020   tune_params::PREF_NEON_64_FALSE,
2021   tune_params::PREF_NEON_STRINGOPS_TRUE,
2022   tune_params::FUSE_NOTHING,
2023   tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028   &cortexa15_extra_costs,
2029   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2030   NULL,                                 /* Sched adj cost.  */
2031   arm_default_branch_cost,
2032   &arm_default_vec_cost,
2033   1,                                            /* Constant limit.  */
2034   2,                                            /* Max cond insns.  */
2035   8,                                            /* Memset max inline.  */
2036   3,                                            /* Issue rate.  */
2037   ARM_PREFETCH_NOT_BENEFICIAL,
2038   tune_params::PREF_CONST_POOL_FALSE,
2039   tune_params::PREF_LDRD_TRUE,
2040   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2041   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2042   tune_params::DISPARAGE_FLAGS_ALL,
2043   tune_params::PREF_NEON_64_FALSE,
2044   tune_params::PREF_NEON_STRINGOPS_TRUE,
2045   tune_params::FUSE_NOTHING,
2046   tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051   &cortexa53_extra_costs,
2052   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2053   NULL,                                 /* Sched adj cost.  */
2054   arm_default_branch_cost,
2055   &arm_default_vec_cost,
2056   1,                                            /* Constant limit.  */
2057   5,                                            /* Max cond insns.  */
2058   8,                                            /* Memset max inline.  */
2059   1,                                            /* Issue rate.  */
2060   ARM_PREFETCH_NOT_BENEFICIAL,
2061   tune_params::PREF_CONST_POOL_FALSE,
2062   tune_params::PREF_LDRD_FALSE,
2063   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2064   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2065   tune_params::DISPARAGE_FLAGS_NEITHER,
2066   tune_params::PREF_NEON_64_FALSE,
2067   tune_params::PREF_NEON_STRINGOPS_TRUE,
2068   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069   tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074   &cortexa53_extra_costs,
2075   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2076   NULL,                                 /* Sched adj cost.  */
2077   arm_default_branch_cost,
2078   &arm_default_vec_cost,
2079   1,                                            /* Constant limit.  */
2080   5,                                            /* Max cond insns.  */
2081   8,                                            /* Memset max inline.  */
2082   2,                                            /* Issue rate.  */
2083   ARM_PREFETCH_NOT_BENEFICIAL,
2084   tune_params::PREF_CONST_POOL_FALSE,
2085   tune_params::PREF_LDRD_FALSE,
2086   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2087   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2088   tune_params::DISPARAGE_FLAGS_NEITHER,
2089   tune_params::PREF_NEON_64_FALSE,
2090   tune_params::PREF_NEON_STRINGOPS_TRUE,
2091   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092   tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097   &cortexa57_extra_costs,
2098   &generic_addr_mode_costs,             /* addressing mode costs */
2099   NULL,                                 /* Sched adj cost.  */
2100   arm_default_branch_cost,
2101   &arm_default_vec_cost,
2102   1,                                            /* Constant limit.  */
2103   2,                                            /* Max cond insns.  */
2104   8,                                            /* Memset max inline.  */
2105   3,                                            /* Issue rate.  */
2106   ARM_PREFETCH_NOT_BENEFICIAL,
2107   tune_params::PREF_CONST_POOL_FALSE,
2108   tune_params::PREF_LDRD_TRUE,
2109   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2110   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2111   tune_params::DISPARAGE_FLAGS_ALL,
2112   tune_params::PREF_NEON_64_FALSE,
2113   tune_params::PREF_NEON_STRINGOPS_TRUE,
2114   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115   tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120   &exynosm1_extra_costs,
2121   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2122   NULL,                                         /* Sched adj cost.  */
2123   arm_default_branch_cost,
2124   &arm_default_vec_cost,
2125   1,                                            /* Constant limit.  */
2126   2,                                            /* Max cond insns.  */
2127   8,                                            /* Memset max inline.  */
2128   3,                                            /* Issue rate.  */
2129   ARM_PREFETCH_NOT_BENEFICIAL,
2130   tune_params::PREF_CONST_POOL_FALSE,
2131   tune_params::PREF_LDRD_TRUE,
2132   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2133   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2134   tune_params::DISPARAGE_FLAGS_ALL,
2135   tune_params::PREF_NEON_64_FALSE,
2136   tune_params::PREF_NEON_STRINGOPS_TRUE,
2137   tune_params::FUSE_NOTHING,
2138   tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143   &xgene1_extra_costs,
2144   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2145   NULL,                                 /* Sched adj cost.  */
2146   arm_default_branch_cost,
2147   &arm_default_vec_cost,
2148   1,                                            /* Constant limit.  */
2149   2,                                            /* Max cond insns.  */
2150   32,                                           /* Memset max inline.  */
2151   4,                                            /* Issue rate.  */
2152   ARM_PREFETCH_NOT_BENEFICIAL,
2153   tune_params::PREF_CONST_POOL_FALSE,
2154   tune_params::PREF_LDRD_TRUE,
2155   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2156   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2157   tune_params::DISPARAGE_FLAGS_ALL,
2158   tune_params::PREF_NEON_64_FALSE,
2159   tune_params::PREF_NEON_STRINGOPS_FALSE,
2160   tune_params::FUSE_NOTHING,
2161   tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165    less appealing.  Set max_insns_skipped to a low value.  */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169   &cortexa5_extra_costs,
2170   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2171   NULL,                                 /* Sched adj cost.  */
2172   arm_cortex_a5_branch_cost,
2173   &arm_default_vec_cost,
2174   1,                                            /* Constant limit.  */
2175   1,                                            /* Max cond insns.  */
2176   8,                                            /* Memset max inline.  */
2177   2,                                            /* Issue rate.  */
2178   ARM_PREFETCH_NOT_BENEFICIAL,
2179   tune_params::PREF_CONST_POOL_FALSE,
2180   tune_params::PREF_LDRD_FALSE,
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2182   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2183   tune_params::DISPARAGE_FLAGS_NEITHER,
2184   tune_params::PREF_NEON_64_FALSE,
2185   tune_params::PREF_NEON_STRINGOPS_TRUE,
2186   tune_params::FUSE_NOTHING,
2187   tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192   &cortexa9_extra_costs,
2193   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2194   cortex_a9_sched_adjust_cost,
2195   arm_default_branch_cost,
2196   &arm_default_vec_cost,
2197   1,                                            /* Constant limit.  */
2198   5,                                            /* Max cond insns.  */
2199   8,                                            /* Memset max inline.  */
2200   2,                                            /* Issue rate.  */
2201   ARM_PREFETCH_BENEFICIAL(4,32,32),
2202   tune_params::PREF_CONST_POOL_FALSE,
2203   tune_params::PREF_LDRD_FALSE,
2204   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2206   tune_params::DISPARAGE_FLAGS_NEITHER,
2207   tune_params::PREF_NEON_64_FALSE,
2208   tune_params::PREF_NEON_STRINGOPS_FALSE,
2209   tune_params::FUSE_NOTHING,
2210   tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215   &cortexa12_extra_costs,
2216   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2217   NULL,                                 /* Sched adj cost.  */
2218   arm_default_branch_cost,
2219   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2220   1,                                            /* Constant limit.  */
2221   2,                                            /* Max cond insns.  */
2222   8,                                            /* Memset max inline.  */
2223   2,                                            /* Issue rate.  */
2224   ARM_PREFETCH_NOT_BENEFICIAL,
2225   tune_params::PREF_CONST_POOL_FALSE,
2226   tune_params::PREF_LDRD_TRUE,
2227   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2228   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2229   tune_params::DISPARAGE_FLAGS_ALL,
2230   tune_params::PREF_NEON_64_FALSE,
2231   tune_params::PREF_NEON_STRINGOPS_TRUE,
2232   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233   tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238   &cortexa57_extra_costs,
2239   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2240   NULL,                                         /* Sched adj cost.  */
2241   arm_default_branch_cost,
2242   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2243   1,                                            /* Constant limit.  */
2244   2,                                            /* Max cond insns.  */
2245   8,                                            /* Memset max inline.  */
2246   2,                                            /* Issue rate.  */
2247   ARM_PREFETCH_NOT_BENEFICIAL,
2248   tune_params::PREF_CONST_POOL_FALSE,
2249   tune_params::PREF_LDRD_TRUE,
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2251   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2252   tune_params::DISPARAGE_FLAGS_ALL,
2253   tune_params::PREF_NEON_64_FALSE,
2254   tune_params::PREF_NEON_STRINGOPS_TRUE,
2255   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256   tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2260    cycle to execute each.  An LDR from the constant pool also takes two cycles
2261    to execute, but mildly increases pipelining opportunity (consecutive
2262    loads/stores can be pipelined together, saving one cycle), and may also
2263    improve icache utilisation.  Hence we prefer the constant pool for such
2264    processors.  */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268   &v7m_extra_costs,
2269   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2270   NULL,                                 /* Sched adj cost.  */
2271   arm_cortex_m_branch_cost,
2272   &arm_default_vec_cost,
2273   1,                                            /* Constant limit.  */
2274   2,                                            /* Max cond insns.  */
2275   8,                                            /* Memset max inline.  */
2276   1,                                            /* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_TRUE,
2279   tune_params::PREF_LDRD_FALSE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_NEITHER,
2283   tune_params::PREF_NEON_64_FALSE,
2284   tune_params::PREF_NEON_STRINGOPS_FALSE,
2285   tune_params::FUSE_NOTHING,
2286   tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning.  */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293   &v7m_extra_costs,
2294   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2295   NULL,                                 /* Sched adj cost.  */
2296   arm_cortex_m7_branch_cost,
2297   &arm_default_vec_cost,
2298   0,                                            /* Constant limit.  */
2299   1,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316    cortex-m23.  */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319   &generic_extra_costs,                 /* Insn extra costs.  */
2320   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2321   NULL,                                 /* Sched adj cost.  */
2322   arm_default_branch_cost,
2323   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2324   1,                                            /* Constant limit.  */
2325   5,                                            /* Max cond insns.  */
2326   8,                                            /* Memset max inline.  */
2327   1,                                            /* Issue rate.  */
2328   ARM_PREFETCH_NOT_BENEFICIAL,
2329   tune_params::PREF_CONST_POOL_FALSE,
2330   tune_params::PREF_LDRD_FALSE,
2331   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2332   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2333   tune_params::DISPARAGE_FLAGS_NEITHER,
2334   tune_params::PREF_NEON_64_FALSE,
2335   tune_params::PREF_NEON_STRINGOPS_FALSE,
2336   tune_params::FUSE_NOTHING,
2337   tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342   &generic_extra_costs,                         /* Insn extra costs.  */
2343   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2344   fa726te_sched_adjust_cost,
2345   arm_default_branch_cost,
2346   &arm_default_vec_cost,
2347   1,                                            /* Constant limit.  */
2348   5,                                            /* Max cond insns.  */
2349   8,                                            /* Memset max inline.  */
2350   2,                                            /* Issue rate.  */
2351   ARM_PREFETCH_NOT_BENEFICIAL,
2352   tune_params::PREF_CONST_POOL_TRUE,
2353   tune_params::PREF_LDRD_FALSE,
2354   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2355   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2356   tune_params::DISPARAGE_FLAGS_NEITHER,
2357   tune_params::PREF_NEON_64_FALSE,
2358   tune_params::PREF_NEON_STRINGOPS_FALSE,
2359   tune_params::FUSE_NOTHING,
2360   tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables.  */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2367    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368    is thus chosen to be big enough to hold the longest architecture name.  */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations.  */
2373
2374 enum tls_reloc {
2375   TLS_GD32,
2376   TLS_LDM32,
2377   TLS_LDO32,
2378   TLS_IE32,
2379   TLS_LE32,
2380   TLS_DESCSEQ   /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant.  */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387   return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set.  Both the operands must be known
2391    to be valid.  */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395   return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE.  */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402   unsigned long count = 0;
2403
2404   while (value)
2405     {
2406       count++;
2407       value &= value - 1;  /* Clear the least-significant set bit.  */
2408     }
2409
2410   return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP.  */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417   unsigned int count = 0;
2418   unsigned int n = 0;
2419   sbitmap_iterator sbi;
2420
2421   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422     count++;
2423   return count;
2424 }
2425
2426 typedef struct
2427 {
2428   machine_mode mode;
2429   const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs.  */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436                              const char *funcname, const char *modename,
2437                              int num_suffix)
2438 {
2439   char buffer[50];
2440
2441   if (num_suffix == 0)
2442     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443   else
2444     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446   set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451                             machine_mode from, const char *funcname,
2452                             const char *toname, const char *fromname)
2453 {
2454   char buffer[50];
2455   const char *maybe_suffix_2 = "";
2456
2457   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2458   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461     maybe_suffix_2 = "2";
2462
2463   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464            maybe_suffix_2);
2465
2466   set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM.  */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474   /* For Linux, we have access to kernel support for atomic operations.  */
2475   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478   /* There are no special library functions unless we are using the
2479      ARM BPABI.  */
2480   if (!TARGET_BPABI)
2481     return;
2482
2483   /* The functions below are described in Section 4 of the "Run-Time
2484      ABI for the ARM architecture", Version 1.0.  */
2485
2486   /* Double-precision floating-point arithmetic.  Table 2.  */
2487   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493   /* Double-precision comparisons.  Table 3.  */
2494   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495   set_optab_libfunc (ne_optab, DFmode, NULL);
2496   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502   /* Single-precision floating-point arithmetic.  Table 4.  */
2503   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509   /* Single-precision comparisons.  Table 5.  */
2510   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511   set_optab_libfunc (ne_optab, SFmode, NULL);
2512   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518   /* Floating-point to integer conversions.  Table 6.  */
2519   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528   /* Conversions between floating types.  Table 7.  */
2529   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532   /* Integer to floating-point conversions.  Table 8.  */
2533   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542   /* Long long.  Table 9.  */
2543   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552   /* Integer (32/32->32) division.  \S 4.3.1.  */
2553   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556   /* The divmod functions are designed so that they can be used for
2557      plain division, even though they return both the quotient and the
2558      remainder.  The quotient is returned in the usual location (i.e.,
2559      r0 for SImode, {r0, r1} for DImode), just as would be expected
2560      for an ordinary division routine.  Because the AAPCS calling
2561      conventions specify that all of { r0, r1, r2, r3 } are
2562      callee-saved registers, there is no need to tell the compiler
2563      explicitly that those registers are clobbered by these
2564      routines.  */
2565   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568   /* For SImode division the ABI provides div-without-mod routines,
2569      which are faster.  */
2570   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2574      divmod libcalls instead.  */
2575   set_optab_libfunc (smod_optab, DImode, NULL);
2576   set_optab_libfunc (umod_optab, DImode, NULL);
2577   set_optab_libfunc (smod_optab, SImode, NULL);
2578   set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580   /* Half-precision float operations.  The compiler handles all operations
2581      with NULL libfuncs by converting the SFmode.  */
2582   switch (arm_fp16_format)
2583     {
2584     case ARM_FP16_FORMAT_IEEE:
2585     case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587       /* Conversions.  */
2588       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590                          ? "__gnu_f2h_ieee"
2591                          : "__gnu_f2h_alternative"));
2592       set_conv_libfunc (sext_optab, SFmode, HFmode,
2593                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594                          ? "__gnu_h2f_ieee"
2595                          : "__gnu_h2f_alternative"));
2596
2597       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599                          ? "__gnu_d2h_ieee"
2600                          : "__gnu_d2h_alternative"));
2601
2602       /* Arithmetic.  */
2603       set_optab_libfunc (add_optab, HFmode, NULL);
2604       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605       set_optab_libfunc (smul_optab, HFmode, NULL);
2606       set_optab_libfunc (neg_optab, HFmode, NULL);
2607       set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609       /* Comparisons.  */
2610       set_optab_libfunc (eq_optab, HFmode, NULL);
2611       set_optab_libfunc (ne_optab, HFmode, NULL);
2612       set_optab_libfunc (lt_optab, HFmode, NULL);
2613       set_optab_libfunc (le_optab, HFmode, NULL);
2614       set_optab_libfunc (ge_optab, HFmode, NULL);
2615       set_optab_libfunc (gt_optab, HFmode, NULL);
2616       set_optab_libfunc (unord_optab, HFmode, NULL);
2617       break;
2618
2619     default:
2620       break;
2621     }
2622
2623   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2624   {
2625     const arm_fixed_mode_set fixed_arith_modes[] =
2626       {
2627         { E_QQmode, "qq" },
2628         { E_UQQmode, "uqq" },
2629         { E_HQmode, "hq" },
2630         { E_UHQmode, "uhq" },
2631         { E_SQmode, "sq" },
2632         { E_USQmode, "usq" },
2633         { E_DQmode, "dq" },
2634         { E_UDQmode, "udq" },
2635         { E_TQmode, "tq" },
2636         { E_UTQmode, "utq" },
2637         { E_HAmode, "ha" },
2638         { E_UHAmode, "uha" },
2639         { E_SAmode, "sa" },
2640         { E_USAmode, "usa" },
2641         { E_DAmode, "da" },
2642         { E_UDAmode, "uda" },
2643         { E_TAmode, "ta" },
2644         { E_UTAmode, "uta" }
2645       };
2646     const arm_fixed_mode_set fixed_conv_modes[] =
2647       {
2648         { E_QQmode, "qq" },
2649         { E_UQQmode, "uqq" },
2650         { E_HQmode, "hq" },
2651         { E_UHQmode, "uhq" },
2652         { E_SQmode, "sq" },
2653         { E_USQmode, "usq" },
2654         { E_DQmode, "dq" },
2655         { E_UDQmode, "udq" },
2656         { E_TQmode, "tq" },
2657         { E_UTQmode, "utq" },
2658         { E_HAmode, "ha" },
2659         { E_UHAmode, "uha" },
2660         { E_SAmode, "sa" },
2661         { E_USAmode, "usa" },
2662         { E_DAmode, "da" },
2663         { E_UDAmode, "uda" },
2664         { E_TAmode, "ta" },
2665         { E_UTAmode, "uta" },
2666         { E_QImode, "qi" },
2667         { E_HImode, "hi" },
2668         { E_SImode, "si" },
2669         { E_DImode, "di" },
2670         { E_TImode, "ti" },
2671         { E_SFmode, "sf" },
2672         { E_DFmode, "df" }
2673       };
2674     unsigned int i, j;
2675
2676     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677       {
2678         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679                                      "add", fixed_arith_modes[i].name, 3);
2680         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681                                      "ssadd", fixed_arith_modes[i].name, 3);
2682         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683                                      "usadd", fixed_arith_modes[i].name, 3);
2684         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685                                      "sub", fixed_arith_modes[i].name, 3);
2686         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687                                      "sssub", fixed_arith_modes[i].name, 3);
2688         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689                                      "ussub", fixed_arith_modes[i].name, 3);
2690         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691                                      "mul", fixed_arith_modes[i].name, 3);
2692         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693                                      "ssmul", fixed_arith_modes[i].name, 3);
2694         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695                                      "usmul", fixed_arith_modes[i].name, 3);
2696         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697                                      "div", fixed_arith_modes[i].name, 3);
2698         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699                                      "udiv", fixed_arith_modes[i].name, 3);
2700         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701                                      "ssdiv", fixed_arith_modes[i].name, 3);
2702         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703                                      "usdiv", fixed_arith_modes[i].name, 3);
2704         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705                                      "neg", fixed_arith_modes[i].name, 2);
2706         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707                                      "ssneg", fixed_arith_modes[i].name, 2);
2708         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709                                      "usneg", fixed_arith_modes[i].name, 2);
2710         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711                                      "ashl", fixed_arith_modes[i].name, 3);
2712         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713                                      "ashr", fixed_arith_modes[i].name, 3);
2714         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715                                      "lshr", fixed_arith_modes[i].name, 3);
2716         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717                                      "ssashl", fixed_arith_modes[i].name, 3);
2718         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719                                      "usashl", fixed_arith_modes[i].name, 3);
2720         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721                                      "cmp", fixed_arith_modes[i].name, 2);
2722       }
2723
2724     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726         {
2727           if (i == j
2728               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730             continue;
2731
2732           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733                                       fixed_conv_modes[j].mode, "fract",
2734                                       fixed_conv_modes[i].name,
2735                                       fixed_conv_modes[j].name);
2736           arm_set_fixed_conv_libfunc (satfract_optab,
2737                                       fixed_conv_modes[i].mode,
2738                                       fixed_conv_modes[j].mode, "satfract",
2739                                       fixed_conv_modes[i].name,
2740                                       fixed_conv_modes[j].name);
2741           arm_set_fixed_conv_libfunc (fractuns_optab,
2742                                       fixed_conv_modes[i].mode,
2743                                       fixed_conv_modes[j].mode, "fractuns",
2744                                       fixed_conv_modes[i].name,
2745                                       fixed_conv_modes[j].name);
2746           arm_set_fixed_conv_libfunc (satfractuns_optab,
2747                                       fixed_conv_modes[i].mode,
2748                                       fixed_conv_modes[j].mode, "satfractuns",
2749                                       fixed_conv_modes[i].name,
2750                                       fixed_conv_modes[j].name);
2751         }
2752   }
2753
2754   if (TARGET_AAPCS_BASED)
2755     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list".  */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list.  */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765   tree va_list_name;
2766   tree ap_field;
2767
2768   if (!TARGET_AAPCS_BASED)
2769     return std_build_builtin_va_list ();
2770
2771   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772      defined as:
2773
2774        struct __va_list
2775        {
2776          void *__ap;
2777        };
2778
2779      The C Library ABI further reinforces this definition in \S
2780      4.1.
2781
2782      We must follow this definition exactly.  The structure tag
2783      name is visible in C++ mangled names, and thus forms a part
2784      of the ABI.  The field name may be used by people who
2785      #include <stdarg.h>.  */
2786   /* Create the type.  */
2787   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788   /* Give it the required name.  */
2789   va_list_name = build_decl (BUILTINS_LOCATION,
2790                              TYPE_DECL,
2791                              get_identifier ("__va_list"),
2792                              va_list_type);
2793   DECL_ARTIFICIAL (va_list_name) = 1;
2794   TYPE_NAME (va_list_type) = va_list_name;
2795   TYPE_STUB_DECL (va_list_type) = va_list_name;
2796   /* Create the __ap field.  */
2797   ap_field = build_decl (BUILTINS_LOCATION,
2798                          FIELD_DECL,
2799                          get_identifier ("__ap"),
2800                          ptr_type_node);
2801   DECL_ARTIFICIAL (ap_field) = 1;
2802   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803   TYPE_FIELDS (va_list_type) = ap_field;
2804   /* Compute its layout.  */
2805   layout_type (va_list_type);
2806
2807   return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811    available argument in a variable-argument list.  VALIST is the
2812    user-level va_list object, of type __builtin_va_list.  */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816   if (TREE_TYPE (valist) == error_mark_node)
2817     return error_mark_node;
2818
2819   /* On an AAPCS target, the pointer is stored within "struct
2820      va_list".  */
2821   if (TARGET_AAPCS_BASED)
2822     {
2823       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825                        valist, ap_field, NULL_TREE);
2826     }
2827
2828   return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835   valist = arm_extract_valist_ptr (valist);
2836   std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842                           gimple_seq *post_p)
2843 {
2844   valist = arm_extract_valist_ptr (valist);
2845   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified.  */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852   int flags = opts->x_target_flags;
2853
2854   /* iWMMXt and NEON are incompatible.  */
2855   if (TARGET_IWMMXT
2856       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857     error ("iWMMXt and NEON are incompatible");
2858
2859   /* Make sure that the processor choice does not conflict with any of the
2860      other command line choices.  */
2861   if (TARGET_ARM_P (flags)
2862       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863     error ("target CPU does not support ARM mode");
2864
2865   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2866   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872   /* If this target is normally configured to use APCS frames, warn if they
2873      are turned off and debugging is turned on.  */
2874   if (TARGET_ARM_P (flags)
2875       && write_symbols != NO_DEBUG
2876       && !TARGET_APCS_FRAME
2877       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880   /* iWMMXt unsupported under Thumb mode.  */
2881   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882     error ("iWMMXt unsupported under Thumb mode");
2883
2884   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885     error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888     {
2889       error ("RTP PIC is incompatible with Thumb");
2890       flag_pic = 0;
2891     }
2892
2893   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894      with MOVT.  */
2895   if ((target_pure_code || target_slow_flash_data)
2896       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897     {
2898       const char *flag = (target_pure_code ? "-mpure-code" :
2899                                              "-mslow-flash-data");
2900       error ("%s only supports non-pic code on M-profile targets with the "
2901              "MOVT instruction", flag);
2902     }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options.  */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911   /* If we are not using the default (ARM mode) section anchor offset
2912      ranges, then set the correct ranges now.  */
2913   if (TARGET_THUMB1)
2914     {
2915       /* Thumb-1 LDR instructions cannot have negative offsets.
2916          Permissible positive offset ranges are 5-bit (for byte loads),
2917          6-bit (for halfword loads), or 7-bit (for word loads).
2918          Empirical results suggest a 7-bit anchor range gives the best
2919          overall code size.  */
2920       targetm.min_anchor_offset = 0;
2921       targetm.max_anchor_offset = 127;
2922     }
2923   else if (TARGET_THUMB2)
2924     {
2925       /* The minimum is set such that the total size of the block
2926          for a particular anchor is 248 + 1 + 4095 bytes, which is
2927          divisible by eight, ensuring natural spacing of anchors.  */
2928       targetm.min_anchor_offset = -248;
2929       targetm.max_anchor_offset = 4095;
2930     }
2931   else
2932     {
2933       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935     }
2936
2937   /* Increase the number of conditional instructions with -Os.  */
2938   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2941   if (TARGET_THUMB2)
2942     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946    mode, false if it should next add an attribute for the opposite mode.  */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override.  */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955   if (opts->x_align_functions <= 0)
2956     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2957       && opts->x_optimize_size ? 2 : 4;
2958 }
2959
2960 /* Implement targetm.override_options_after_change.  */
2961
2962 static void
2963 arm_override_options_after_change (void)
2964 {
2965   arm_configure_build_target (&arm_active_target,
2966                               TREE_TARGET_OPTION (target_option_default_node),
2967                               &global_options_set, false);
2968
2969   arm_override_options_after_change_1 (&global_options);
2970 }
2971
2972 /* Implement TARGET_OPTION_SAVE.  */
2973 static void
2974 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2975 {
2976   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2977   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2978   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2979 }
2980
2981 /* Implement TARGET_OPTION_RESTORE.  */
2982 static void
2983 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2984 {
2985   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2986   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2987   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2988   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2989                               false);
2990 }
2991
2992 /* Reset options between modes that the user has specified.  */
2993 static void
2994 arm_option_override_internal (struct gcc_options *opts,
2995                               struct gcc_options *opts_set)
2996 {
2997   arm_override_options_after_change_1 (opts);
2998
2999   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3000     {
3001       /* The default is to enable interworking, so this warning message would
3002          be confusing to users who have just compiled with, eg, -march=armv3.  */
3003       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3004       opts->x_target_flags &= ~MASK_INTERWORK;
3005     }
3006
3007   if (TARGET_THUMB_P (opts->x_target_flags)
3008       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3009     {
3010       warning (0, "target CPU does not support THUMB instructions");
3011       opts->x_target_flags &= ~MASK_THUMB;
3012     }
3013
3014   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3015     {
3016       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3017       opts->x_target_flags &= ~MASK_APCS_FRAME;
3018     }
3019
3020   /* Callee super interworking implies thumb interworking.  Adding
3021      this to the flags here simplifies the logic elsewhere.  */
3022   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3023     opts->x_target_flags |= MASK_INTERWORK;
3024
3025   /* need to remember initial values so combinaisons of options like
3026      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3027   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3028
3029   if (! opts_set->x_arm_restrict_it)
3030     opts->x_arm_restrict_it = arm_arch8;
3031
3032   /* ARM execution state and M profile don't have [restrict] IT.  */
3033   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3034     opts->x_arm_restrict_it = 0;
3035
3036   /* Enable -munaligned-access by default for
3037      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3038      i.e. Thumb2 and ARM state only.
3039      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3040      - ARMv8 architecture-base processors.
3041
3042      Disable -munaligned-access by default for
3043      - all pre-ARMv6 architecture-based processors
3044      - ARMv6-M architecture-based processors
3045      - ARMv8-M Baseline processors.  */
3046
3047   if (! opts_set->x_unaligned_access)
3048     {
3049       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3050                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3051     }
3052   else if (opts->x_unaligned_access == 1
3053            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3054     {
3055       warning (0, "target CPU does not support unaligned accesses");
3056      opts->x_unaligned_access = 0;
3057     }
3058
3059   /* Don't warn since it's on by default in -O2.  */
3060   if (TARGET_THUMB1_P (opts->x_target_flags))
3061     opts->x_flag_schedule_insns = 0;
3062   else
3063     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3064
3065   /* Disable shrink-wrap when optimizing function for size, since it tends to
3066      generate additional returns.  */
3067   if (optimize_function_for_size_p (cfun)
3068       && TARGET_THUMB2_P (opts->x_target_flags))
3069     opts->x_flag_shrink_wrap = false;
3070   else
3071     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3072
3073   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3074      - epilogue_insns - does not accurately model the corresponding insns
3075      emitted in the asm file.  In particular, see the comment in thumb_exit
3076      'Find out how many of the (return) argument registers we can corrupt'.
3077      As a consequence, the epilogue may clobber registers without fipa-ra
3078      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3079      TODO: Accurately model clobbers for epilogue_insns and reenable
3080      fipa-ra.  */
3081   if (TARGET_THUMB1_P (opts->x_target_flags))
3082     opts->x_flag_ipa_ra = 0;
3083   else
3084     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3085
3086   /* Thumb2 inline assembly code should always use unified syntax.
3087      This will apply to ARM and Thumb1 eventually.  */
3088   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3089
3090 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3091   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3092 #endif
3093 }
3094
3095 static sbitmap isa_all_fpubits;
3096 static sbitmap isa_quirkbits;
3097
3098 /* Configure a build target TARGET from the user-specified options OPTS and
3099    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3100    architecture have been specified, but the two are not identical.  */
3101 void
3102 arm_configure_build_target (struct arm_build_target *target,
3103                             struct cl_target_option *opts,
3104                             struct gcc_options *opts_set,
3105                             bool warn_compatible)
3106 {
3107   const cpu_option *arm_selected_tune = NULL;
3108   const arch_option *arm_selected_arch = NULL;
3109   const cpu_option *arm_selected_cpu = NULL;
3110   const arm_fpu_desc *arm_selected_fpu = NULL;
3111   const char *tune_opts = NULL;
3112   const char *arch_opts = NULL;
3113   const char *cpu_opts = NULL;
3114
3115   bitmap_clear (target->isa);
3116   target->core_name = NULL;
3117   target->arch_name = NULL;
3118
3119   if (opts_set->x_arm_arch_string)
3120     {
3121       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3122                                                       "-march",
3123                                                       opts->x_arm_arch_string);
3124       arch_opts = strchr (opts->x_arm_arch_string, '+');
3125     }
3126
3127   if (opts_set->x_arm_cpu_string)
3128     {
3129       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3130                                                     opts->x_arm_cpu_string);
3131       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3132       arm_selected_tune = arm_selected_cpu;
3133       /* If taking the tuning from -mcpu, we don't need to rescan the
3134          options for tuning.  */
3135     }
3136
3137   if (opts_set->x_arm_tune_string)
3138     {
3139       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3140                                                      opts->x_arm_tune_string);
3141       tune_opts = strchr (opts->x_arm_tune_string, '+');
3142     }
3143
3144   if (arm_selected_arch)
3145     {
3146       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3147       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3148                                  arch_opts);
3149
3150       if (arm_selected_cpu)
3151         {
3152           auto_sbitmap cpu_isa (isa_num_bits);
3153           auto_sbitmap isa_delta (isa_num_bits);
3154
3155           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3156           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3157                                      cpu_opts);
3158           bitmap_xor (isa_delta, cpu_isa, target->isa);
3159           /* Ignore any bits that are quirk bits.  */
3160           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3161           /* Ignore (for now) any bits that might be set by -mfpu.  */
3162           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3163
3164           if (!bitmap_empty_p (isa_delta))
3165             {
3166               if (warn_compatible)
3167                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3168                          arm_selected_cpu->common.name,
3169                          arm_selected_arch->common.name);
3170               /* -march wins for code generation.
3171                  -mcpu wins for default tuning.  */
3172               if (!arm_selected_tune)
3173                 arm_selected_tune = arm_selected_cpu;
3174
3175               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3176               target->arch_name = arm_selected_arch->common.name;
3177             }
3178           else
3179             {
3180               /* Architecture and CPU are essentially the same.
3181                  Prefer the CPU setting.  */
3182               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3183               target->core_name = arm_selected_cpu->common.name;
3184               /* Copy the CPU's capabilities, so that we inherit the
3185                  appropriate extensions and quirks.  */
3186               bitmap_copy (target->isa, cpu_isa);
3187             }
3188         }
3189       else
3190         {
3191           /* Pick a CPU based on the architecture.  */
3192           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3193           target->arch_name = arm_selected_arch->common.name;
3194           /* Note: target->core_name is left unset in this path.  */
3195         }
3196     }
3197   else if (arm_selected_cpu)
3198     {
3199       target->core_name = arm_selected_cpu->common.name;
3200       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3201       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3202                                  cpu_opts);
3203       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3204     }
3205   /* If the user did not specify a processor or architecture, choose
3206      one for them.  */
3207   else
3208     {
3209       const cpu_option *sel;
3210       auto_sbitmap sought_isa (isa_num_bits);
3211       bitmap_clear (sought_isa);
3212       auto_sbitmap default_isa (isa_num_bits);
3213
3214       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3215                                                     TARGET_CPU_DEFAULT);
3216       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3217       gcc_assert (arm_selected_cpu->common.name);
3218
3219       /* RWE: All of the selection logic below (to the end of this
3220          'if' clause) looks somewhat suspect.  It appears to be mostly
3221          there to support forcing thumb support when the default CPU
3222          does not have thumb (somewhat dubious in terms of what the
3223          user might be expecting).  I think it should be removed once
3224          support for the pre-thumb era cores is removed.  */
3225       sel = arm_selected_cpu;
3226       arm_initialize_isa (default_isa, sel->common.isa_bits);
3227       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3228                                  cpu_opts);
3229
3230       /* Now check to see if the user has specified any command line
3231          switches that require certain abilities from the cpu.  */
3232
3233       if (TARGET_INTERWORK || TARGET_THUMB)
3234         {
3235           bitmap_set_bit (sought_isa, isa_bit_thumb);
3236           bitmap_set_bit (sought_isa, isa_bit_mode32);
3237
3238           /* There are no ARM processors that support both APCS-26 and
3239              interworking.  Therefore we forcibly remove MODE26 from
3240              from the isa features here (if it was set), so that the
3241              search below will always be able to find a compatible
3242              processor.  */
3243           bitmap_clear_bit (default_isa, isa_bit_mode26);
3244         }
3245
3246       /* If there are such requirements and the default CPU does not
3247          satisfy them, we need to run over the complete list of
3248          cores looking for one that is satisfactory.  */
3249       if (!bitmap_empty_p (sought_isa)
3250           && !bitmap_subset_p (sought_isa, default_isa))
3251         {
3252           auto_sbitmap candidate_isa (isa_num_bits);
3253           /* We're only interested in a CPU with at least the
3254              capabilities of the default CPU and the required
3255              additional features.  */
3256           bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258           /* Try to locate a CPU type that supports all of the abilities
3259              of the default CPU, plus the extra abilities requested by
3260              the user.  */
3261           for (sel = all_cores; sel->common.name != NULL; sel++)
3262             {
3263               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264               /* An exact match?  */
3265               if (bitmap_equal_p (default_isa, candidate_isa))
3266                 break;
3267             }
3268
3269           if (sel->common.name == NULL)
3270             {
3271               unsigned current_bit_count = isa_num_bits;
3272               const cpu_option *best_fit = NULL;
3273
3274               /* Ideally we would like to issue an error message here
3275                  saying that it was not possible to find a CPU compatible
3276                  with the default CPU, but which also supports the command
3277                  line options specified by the programmer, and so they
3278                  ought to use the -mcpu=<name> command line option to
3279                  override the default CPU type.
3280
3281                  If we cannot find a CPU that has exactly the
3282                  characteristics of the default CPU and the given
3283                  command line options we scan the array again looking
3284                  for a best match.  The best match must have at least
3285                  the capabilities of the perfect match.  */
3286               for (sel = all_cores; sel->common.name != NULL; sel++)
3287                 {
3288                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290                   if (bitmap_subset_p (default_isa, candidate_isa))
3291                     {
3292                       unsigned count;
3293
3294                       bitmap_and_compl (candidate_isa, candidate_isa,
3295                                         default_isa);
3296                       count = bitmap_popcount (candidate_isa);
3297
3298                       if (count < current_bit_count)
3299                         {
3300                           best_fit = sel;
3301                           current_bit_count = count;
3302                         }
3303                     }
3304
3305                   gcc_assert (best_fit);
3306                   sel = best_fit;
3307                 }
3308             }
3309           arm_selected_cpu = sel;
3310         }
3311
3312       /* Now we know the CPU, we can finally initialize the target
3313          structure.  */
3314       target->core_name = arm_selected_cpu->common.name;
3315       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317                                  cpu_opts);
3318       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319     }
3320
3321   gcc_assert (arm_selected_cpu);
3322   gcc_assert (arm_selected_arch);
3323
3324   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325     {
3326       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327       auto_sbitmap fpu_bits (isa_num_bits);
3328
3329       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331       bitmap_ior (target->isa, target->isa, fpu_bits);
3332     }
3333
3334   if (!arm_selected_tune)
3335     arm_selected_tune = arm_selected_cpu;
3336   else /* Validate the features passed to -mtune.  */
3337     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341   /* Finish initializing the target structure.  */
3342   target->arch_pp_name = arm_selected_arch->arch;
3343   target->base_arch = arm_selected_arch->base_arch;
3344   target->profile = arm_selected_arch->profile;
3345
3346   target->tune_flags = tune_data->tune_flags;
3347   target->tune = tune_data->tune;
3348   target->tune_core = tune_data->scheduler;
3349   arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified.  */
3353 static void
3354 arm_option_override (void)
3355 {
3356   static const enum isa_feature fpu_bitlist[]
3357     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359   cl_target_option opts;
3360
3361   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369   if (!global_options_set.x_arm_fpu_index)
3370     {
3371       bool ok;
3372       int fpu_index;
3373
3374       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375                                   CL_TARGET);
3376       gcc_assert (ok);
3377       arm_fpu_index = (enum fpu_type) fpu_index;
3378     }
3379
3380   cl_target_option_save (&opts, &global_options);
3381   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382                               true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385   SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388   /* Initialize boolean versions of the architectural flags, for use
3389      in the arm.md file and for enabling feature flags.  */
3390   arm_option_reconfigure_globals ();
3391
3392   arm_tune = arm_active_target.tune_core;
3393   tune_flags = arm_active_target.tune_flags;
3394   current_tune = arm_active_target.tune;
3395
3396   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3397   if (TARGET_APCS_FRAME)
3398     flag_shrink_wrap = false;
3399
3400   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401     {
3402       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403       target_flags |= MASK_APCS_FRAME;
3404     }
3405
3406   if (TARGET_POKE_FUNCTION_NAME)
3407     target_flags |= MASK_APCS_FRAME;
3408
3409   if (TARGET_APCS_REENT && flag_pic)
3410     error ("-fpic and -mapcs-reent are incompatible");
3411
3412   if (TARGET_APCS_REENT)
3413     warning (0, "APCS reentrant code not supported.  Ignored");
3414
3415   /* Set up some tuning parameters.  */
3416   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423   /* For arm2/3 there is no need to do any scheduling if we are doing
3424      software floating-point.  */
3425   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428   /* Override the default structure alignment for AAPCS ABI.  */
3429   if (!global_options_set.x_arm_structure_size_boundary)
3430     {
3431       if (TARGET_AAPCS_BASED)
3432         arm_structure_size_boundary = 8;
3433     }
3434   else
3435     {
3436       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438       if (arm_structure_size_boundary != 8
3439           && arm_structure_size_boundary != 32
3440           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441         {
3442           if (ARM_DOUBLEWORD_ALIGN)
3443             warning (0,
3444                      "structure size boundary can only be set to 8, 32 or 64");
3445           else
3446             warning (0, "structure size boundary can only be set to 8 or 32");
3447           arm_structure_size_boundary
3448             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449         }
3450     }
3451
3452   if (TARGET_VXWORKS_RTP)
3453     {
3454       if (!global_options_set.x_arm_pic_data_is_text_relative)
3455         arm_pic_data_is_text_relative = 0;
3456     }
3457   else if (flag_pic
3458            && !arm_pic_data_is_text_relative
3459            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460     /* When text & data segments don't have a fixed displacement, the
3461        intended use is with a single, read only, pic base register.
3462        Unless the user explicitly requested not to do that, set
3463        it.  */
3464     target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466   /* If stack checking is disabled, we can use r10 as the PIC register,
3467      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3468   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469     {
3470       if (TARGET_VXWORKS_RTP)
3471         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473     }
3474
3475   if (flag_pic && TARGET_VXWORKS_RTP)
3476     arm_pic_register = 9;
3477
3478   if (arm_pic_register_string != NULL)
3479     {
3480       int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482       if (!flag_pic)
3483         warning (0, "-mpic-register= is useless without -fpic");
3484
3485       /* Prevent the user from choosing an obviously stupid PIC register.  */
3486       else if (pic_register < 0 || call_used_regs[pic_register]
3487                || pic_register == HARD_FRAME_POINTER_REGNUM
3488                || pic_register == STACK_POINTER_REGNUM
3489                || pic_register >= PC_REGNUM
3490                || (TARGET_VXWORKS_RTP
3491                    && (unsigned int) pic_register != arm_pic_register))
3492         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493       else
3494         arm_pic_register = pic_register;
3495     }
3496
3497   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3498   if (fix_cm3_ldrd == 2)
3499     {
3500       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3501         fix_cm3_ldrd = 1;
3502       else
3503         fix_cm3_ldrd = 0;
3504     }
3505
3506   /* Hot/Cold partitioning is not currently supported, since we can't
3507      handle literal pool placement in that case.  */
3508   if (flag_reorder_blocks_and_partition)
3509     {
3510       inform (input_location,
3511               "-freorder-blocks-and-partition not supported on this architecture");
3512       flag_reorder_blocks_and_partition = 0;
3513       flag_reorder_blocks = 1;
3514     }
3515
3516   if (flag_pic)
3517     /* Hoisting PIC address calculations more aggressively provides a small,
3518        but measurable, size reduction for PIC code.  Therefore, we decrease
3519        the bar for unrestricted expression hoisting to the cost of PIC address
3520        calculation, which is 2 instructions.  */
3521     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3522                            global_options.x_param_values,
3523                            global_options_set.x_param_values);
3524
3525   /* ARM EABI defaults to strict volatile bitfields.  */
3526   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3527       && abi_version_at_least(2))
3528     flag_strict_volatile_bitfields = 1;
3529
3530   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531      have deemed it beneficial (signified by setting
3532      prefetch.num_slots to 1 or more).  */
3533   if (flag_prefetch_loop_arrays < 0
3534       && HAVE_prefetch
3535       && optimize >= 3
3536       && current_tune->prefetch.num_slots > 0)
3537     flag_prefetch_loop_arrays = 1;
3538
3539   /* Set up parameters to be used in prefetching algorithm.  Do not
3540      override the defaults unless we are tuning for a core we have
3541      researched values for.  */
3542   if (current_tune->prefetch.num_slots > 0)
3543     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3544                            current_tune->prefetch.num_slots,
3545                            global_options.x_param_values,
3546                            global_options_set.x_param_values);
3547   if (current_tune->prefetch.l1_cache_line_size >= 0)
3548     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3549                            current_tune->prefetch.l1_cache_line_size,
3550                            global_options.x_param_values,
3551                            global_options_set.x_param_values);
3552   if (current_tune->prefetch.l1_cache_size >= 0)
3553     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3554                            current_tune->prefetch.l1_cache_size,
3555                            global_options.x_param_values,
3556                            global_options_set.x_param_values);
3557
3558   /* Use Neon to perform 64-bits operations rather than core
3559      registers.  */
3560   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3561   if (use_neon_for_64bits == 1)
3562      prefer_neon_for_64bits = true;
3563
3564   /* Use the alternative scheduling-pressure algorithm by default.  */
3565   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3566                          global_options.x_param_values,
3567                          global_options_set.x_param_values);
3568
3569   /* Look through ready list and all of queue for instructions
3570      relevant for L2 auto-prefetcher.  */
3571   int param_sched_autopref_queue_depth;
3572
3573   switch (current_tune->sched_autopref)
3574     {
3575     case tune_params::SCHED_AUTOPREF_OFF:
3576       param_sched_autopref_queue_depth = -1;
3577       break;
3578
3579     case tune_params::SCHED_AUTOPREF_RANK:
3580       param_sched_autopref_queue_depth = 0;
3581       break;
3582
3583     case tune_params::SCHED_AUTOPREF_FULL:
3584       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3585       break;
3586
3587     default:
3588       gcc_unreachable ();
3589     }
3590
3591   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3592                          param_sched_autopref_queue_depth,
3593                          global_options.x_param_values,
3594                          global_options_set.x_param_values);
3595
3596   /* Currently, for slow flash data, we just disable literal pools.  We also
3597      disable it for pure-code.  */
3598   if (target_slow_flash_data || target_pure_code)
3599     arm_disable_literal_pool = true;
3600
3601   /* Disable scheduling fusion by default if it's not armv7 processor
3602      or doesn't prefer ldrd/strd.  */
3603   if (flag_schedule_fusion == 2
3604       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3605     flag_schedule_fusion = 0;
3606
3607   /* Need to remember initial options before they are overriden.  */
3608   init_optimize = build_optimization_node (&global_options);
3609
3610   arm_options_perform_arch_sanity_checks ();
3611   arm_option_override_internal (&global_options, &global_options_set);
3612   arm_option_check_internal (&global_options);
3613   arm_option_params_internal ();
3614
3615   /* Create the default target_options structure.  */
3616   target_option_default_node = target_option_current_node
3617     = build_target_option_node (&global_options);
3618
3619   /* Register global variables with the garbage collector.  */
3620   arm_add_gc_roots ();
3621
3622   /* Init initial mode for testing.  */
3623   thumb_flipper = TARGET_THUMB;
3624 }
3625
3626
3627 /* Reconfigure global status flags from the active_target.isa.  */
3628 void
3629 arm_option_reconfigure_globals (void)
3630 {
3631   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3632   arm_base_arch = arm_active_target.base_arch;
3633
3634   /* Initialize boolean versions of the architectural flags, for use
3635      in the arm.md file.  */
3636   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3637   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3638   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3639   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3640   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3641   arm_arch5te = arm_arch5e
3642     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3643   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3644   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3645   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3646   arm_arch6m = arm_arch6 && !arm_arch_notm;
3647   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3648   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3649   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3650   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3651   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3652   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3653   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3654   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3655   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3656   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3657   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3658   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3659   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3660   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3661   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3662   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3663   if (arm_fp16_inst)
3664     {
3665       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3666         error ("selected fp16 options are incompatible");
3667       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3668     }
3669
3670   /* And finally, set up some quirks.  */
3671   arm_arch_no_volatile_ce
3672     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3673   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3674                                             isa_bit_quirk_armv6kz);
3675
3676   /* Use the cp15 method if it is available.  */
3677   if (target_thread_pointer == TP_AUTO)
3678     {
3679       if (arm_arch6k && !TARGET_THUMB1)
3680         target_thread_pointer = TP_CP15;
3681       else
3682         target_thread_pointer = TP_SOFT;
3683     }
3684 }
3685
3686 /* Perform some validation between the desired architecture and the rest of the
3687    options.  */
3688 void
3689 arm_options_perform_arch_sanity_checks (void)
3690 {
3691   /* V5 code we generate is completely interworking capable, so we turn off
3692      TARGET_INTERWORK here to avoid many tests later on.  */
3693
3694   /* XXX However, we must pass the right pre-processor defines to CPP
3695      or GLD can get confused.  This is a hack.  */
3696   if (TARGET_INTERWORK)
3697     arm_cpp_interwork = 1;
3698
3699   if (arm_arch5)
3700     target_flags &= ~MASK_INTERWORK;
3701
3702   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3703     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3704
3705   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3706     error ("iwmmxt abi requires an iwmmxt capable cpu");
3707
3708   /* BPABI targets use linker tricks to allow interworking on cores
3709      without thumb support.  */
3710   if (TARGET_INTERWORK
3711       && !TARGET_BPABI
3712       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3713     {
3714       warning (0, "target CPU does not support interworking" );
3715       target_flags &= ~MASK_INTERWORK;
3716     }
3717
3718   /* If soft-float is specified then don't use FPU.  */
3719   if (TARGET_SOFT_FLOAT)
3720     arm_fpu_attr = FPU_NONE;
3721   else
3722     arm_fpu_attr = FPU_VFP;
3723
3724   if (TARGET_AAPCS_BASED)
3725     {
3726       if (TARGET_CALLER_INTERWORKING)
3727         error ("AAPCS does not support -mcaller-super-interworking");
3728       else
3729         if (TARGET_CALLEE_INTERWORKING)
3730           error ("AAPCS does not support -mcallee-super-interworking");
3731     }
3732
3733   /* __fp16 support currently assumes the core has ldrh.  */
3734   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3735     sorry ("__fp16 and no ldrh");
3736
3737   if (use_cmse && !arm_arch_cmse)
3738     error ("target CPU does not support ARMv8-M Security Extensions");
3739
3740   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3741      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3742   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3743     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3744
3745
3746   if (TARGET_AAPCS_BASED)
3747     {
3748       if (arm_abi == ARM_ABI_IWMMXT)
3749         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3750       else if (TARGET_HARD_FLOAT_ABI)
3751         {
3752           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3753           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3754             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3755         }
3756       else
3757         arm_pcs_default = ARM_PCS_AAPCS;
3758     }
3759   else
3760     {
3761       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3762         sorry ("-mfloat-abi=hard and VFP");
3763
3764       if (arm_abi == ARM_ABI_APCS)
3765         arm_pcs_default = ARM_PCS_APCS;
3766       else
3767         arm_pcs_default = ARM_PCS_ATPCS;
3768     }
3769 }
3770
3771 static void
3772 arm_add_gc_roots (void)
3773 {
3774   gcc_obstack_init(&minipool_obstack);
3775   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3776 }
3777 \f
3778 /* A table of known ARM exception types.
3779    For use with the interrupt function attribute.  */
3780
3781 typedef struct
3782 {
3783   const char *const arg;
3784   const unsigned long return_value;
3785 }
3786 isr_attribute_arg;
3787
3788 static const isr_attribute_arg isr_attribute_args [] =
3789 {
3790   { "IRQ",   ARM_FT_ISR },
3791   { "irq",   ARM_FT_ISR },
3792   { "FIQ",   ARM_FT_FIQ },
3793   { "fiq",   ARM_FT_FIQ },
3794   { "ABORT", ARM_FT_ISR },
3795   { "abort", ARM_FT_ISR },
3796   { "ABORT", ARM_FT_ISR },
3797   { "abort", ARM_FT_ISR },
3798   { "UNDEF", ARM_FT_EXCEPTION },
3799   { "undef", ARM_FT_EXCEPTION },
3800   { "SWI",   ARM_FT_EXCEPTION },
3801   { "swi",   ARM_FT_EXCEPTION },
3802   { NULL,    ARM_FT_NORMAL }
3803 };
3804
3805 /* Returns the (interrupt) function type of the current
3806    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3807
3808 static unsigned long
3809 arm_isr_value (tree argument)
3810 {
3811   const isr_attribute_arg * ptr;
3812   const char *              arg;
3813
3814   if (!arm_arch_notm)
3815     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3816
3817   /* No argument - default to IRQ.  */
3818   if (argument == NULL_TREE)
3819     return ARM_FT_ISR;
3820
3821   /* Get the value of the argument.  */
3822   if (TREE_VALUE (argument) == NULL_TREE
3823       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3824     return ARM_FT_UNKNOWN;
3825
3826   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3827
3828   /* Check it against the list of known arguments.  */
3829   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3830     if (streq (arg, ptr->arg))
3831       return ptr->return_value;
3832
3833   /* An unrecognized interrupt type.  */
3834   return ARM_FT_UNKNOWN;
3835 }
3836
3837 /* Computes the type of the current function.  */
3838
3839 static unsigned long
3840 arm_compute_func_type (void)
3841 {
3842   unsigned long type = ARM_FT_UNKNOWN;
3843   tree a;
3844   tree attr;
3845
3846   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3847
3848   /* Decide if the current function is volatile.  Such functions
3849      never return, and many memory cycles can be saved by not storing
3850      register values that will never be needed again.  This optimization
3851      was added to speed up context switching in a kernel application.  */
3852   if (optimize > 0
3853       && (TREE_NOTHROW (current_function_decl)
3854           || !(flag_unwind_tables
3855                || (flag_exceptions
3856                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3857       && TREE_THIS_VOLATILE (current_function_decl))
3858     type |= ARM_FT_VOLATILE;
3859
3860   if (cfun->static_chain_decl != NULL)
3861     type |= ARM_FT_NESTED;
3862
3863   attr = DECL_ATTRIBUTES (current_function_decl);
3864
3865   a = lookup_attribute ("naked", attr);
3866   if (a != NULL_TREE)
3867     type |= ARM_FT_NAKED;
3868
3869   a = lookup_attribute ("isr", attr);
3870   if (a == NULL_TREE)
3871     a = lookup_attribute ("interrupt", attr);
3872
3873   if (a == NULL_TREE)
3874     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3875   else
3876     type |= arm_isr_value (TREE_VALUE (a));
3877
3878   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3879     type |= ARM_FT_CMSE_ENTRY;
3880
3881   return type;
3882 }
3883
3884 /* Returns the type of the current function.  */
3885
3886 unsigned long
3887 arm_current_func_type (void)
3888 {
3889   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3890     cfun->machine->func_type = arm_compute_func_type ();
3891
3892   return cfun->machine->func_type;
3893 }
3894
3895 bool
3896 arm_allocate_stack_slots_for_args (void)
3897 {
3898   /* Naked functions should not allocate stack slots for arguments.  */
3899   return !IS_NAKED (arm_current_func_type ());
3900 }
3901
3902 static bool
3903 arm_warn_func_return (tree decl)
3904 {
3905   /* Naked functions are implemented entirely in assembly, including the
3906      return sequence, so suppress warnings about this.  */
3907   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3908 }
3909
3910 \f
3911 /* Output assembler code for a block containing the constant parts
3912    of a trampoline, leaving space for the variable parts.
3913
3914    On the ARM, (if r8 is the static chain regnum, and remembering that
3915    referencing pc adds an offset of 8) the trampoline looks like:
3916            ldr          r8, [pc, #0]
3917            ldr          pc, [pc]
3918            .word        static chain value
3919            .word        function's address
3920    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3921
3922 static void
3923 arm_asm_trampoline_template (FILE *f)
3924 {
3925   fprintf (f, "\t.syntax unified\n");
3926
3927   if (TARGET_ARM)
3928     {
3929       fprintf (f, "\t.arm\n");
3930       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3931       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3932     }
3933   else if (TARGET_THUMB2)
3934     {
3935       fprintf (f, "\t.thumb\n");
3936       /* The Thumb-2 trampoline is similar to the arm implementation.
3937          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3938       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3939                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3940       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3941     }
3942   else
3943     {
3944       ASM_OUTPUT_ALIGN (f, 2);
3945       fprintf (f, "\t.code\t16\n");
3946       fprintf (f, ".Ltrampoline_start:\n");
3947       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3948       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3949       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3950       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3952       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3953     }
3954   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3955   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3956 }
3957
3958 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3959
3960 static void
3961 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3962 {
3963   rtx fnaddr, mem, a_tramp;
3964
3965   emit_block_move (m_tramp, assemble_trampoline_template (),
3966                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3967
3968   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3969   emit_move_insn (mem, chain_value);
3970
3971   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3972   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3973   emit_move_insn (mem, fnaddr);
3974
3975   a_tramp = XEXP (m_tramp, 0);
3976   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3977                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3978                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3979 }
3980
3981 /* Thumb trampolines should be entered in thumb mode, so set
3982    the bottom bit of the address.  */
3983
3984 static rtx
3985 arm_trampoline_adjust_address (rtx addr)
3986 {
3987   if (TARGET_THUMB)
3988     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3989                                 NULL, 0, OPTAB_LIB_WIDEN);
3990   return addr;
3991 }
3992 \f
3993 /* Return 1 if it is possible to return using a single instruction.
3994    If SIBLING is non-null, this is a test for a return before a sibling
3995    call.  SIBLING is the call insn, so we can examine its register usage.  */
3996
3997 int
3998 use_return_insn (int iscond, rtx sibling)
3999 {
4000   int regno;
4001   unsigned int func_type;
4002   unsigned long saved_int_regs;
4003   unsigned HOST_WIDE_INT stack_adjust;
4004   arm_stack_offsets *offsets;
4005
4006   /* Never use a return instruction before reload has run.  */
4007   if (!reload_completed)
4008     return 0;
4009
4010   func_type = arm_current_func_type ();
4011
4012   /* Naked, volatile and stack alignment functions need special
4013      consideration.  */
4014   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4015     return 0;
4016
4017   /* So do interrupt functions that use the frame pointer and Thumb
4018      interrupt functions.  */
4019   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4020     return 0;
4021
4022   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4023       && !optimize_function_for_size_p (cfun))
4024     return 0;
4025
4026   offsets = arm_get_frame_offsets ();
4027   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4028
4029   /* As do variadic functions.  */
4030   if (crtl->args.pretend_args_size
4031       || cfun->machine->uses_anonymous_args
4032       /* Or if the function calls __builtin_eh_return () */
4033       || crtl->calls_eh_return
4034       /* Or if the function calls alloca */
4035       || cfun->calls_alloca
4036       /* Or if there is a stack adjustment.  However, if the stack pointer
4037          is saved on the stack, we can use a pre-incrementing stack load.  */
4038       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4039                                  && stack_adjust == 4))
4040       /* Or if the static chain register was saved above the frame, under the
4041          assumption that the stack pointer isn't saved on the stack.  */
4042       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4043           && arm_compute_static_chain_stack_bytes() != 0))
4044     return 0;
4045
4046   saved_int_regs = offsets->saved_regs_mask;
4047
4048   /* Unfortunately, the insn
4049
4050        ldmib sp, {..., sp, ...}
4051
4052      triggers a bug on most SA-110 based devices, such that the stack
4053      pointer won't be correctly restored if the instruction takes a
4054      page fault.  We work around this problem by popping r3 along with
4055      the other registers, since that is never slower than executing
4056      another instruction.
4057
4058      We test for !arm_arch5 here, because code for any architecture
4059      less than this could potentially be run on one of the buggy
4060      chips.  */
4061   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4062     {
4063       /* Validate that r3 is a call-clobbered register (always true in
4064          the default abi) ...  */
4065       if (!call_used_regs[3])
4066         return 0;
4067
4068       /* ... that it isn't being used for a return value ... */
4069       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4070         return 0;
4071
4072       /* ... or for a tail-call argument ...  */
4073       if (sibling)
4074         {
4075           gcc_assert (CALL_P (sibling));
4076
4077           if (find_regno_fusage (sibling, USE, 3))
4078             return 0;
4079         }
4080
4081       /* ... and that there are no call-saved registers in r0-r2
4082          (always true in the default ABI).  */
4083       if (saved_int_regs & 0x7)
4084         return 0;
4085     }
4086
4087   /* Can't be done if interworking with Thumb, and any registers have been
4088      stacked.  */
4089   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4090     return 0;
4091
4092   /* On StrongARM, conditional returns are expensive if they aren't
4093      taken and multiple registers have been stacked.  */
4094   if (iscond && arm_tune_strongarm)
4095     {
4096       /* Conditional return when just the LR is stored is a simple
4097          conditional-load instruction, that's not expensive.  */
4098       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4099         return 0;
4100
4101       if (flag_pic
4102           && arm_pic_register != INVALID_REGNUM
4103           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4104         return 0;
4105     }
4106
4107   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4108      several instructions if anything needs to be popped.  */
4109   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4110     return 0;
4111
4112   /* If there are saved registers but the LR isn't saved, then we need
4113      two instructions for the return.  */
4114   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4115     return 0;
4116
4117   /* Can't be done if any of the VFP regs are pushed,
4118      since this also requires an insn.  */
4119   if (TARGET_HARD_FLOAT)
4120     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4121       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4122         return 0;
4123
4124   if (TARGET_REALLY_IWMMXT)
4125     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4126       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4127         return 0;
4128
4129   return 1;
4130 }
4131
4132 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4133    shrink-wrapping if possible.  This is the case if we need to emit a
4134    prologue, which we can test by looking at the offsets.  */
4135 bool
4136 use_simple_return_p (void)
4137 {
4138   arm_stack_offsets *offsets;
4139
4140   /* Note this function can be called before or after reload.  */
4141   if (!reload_completed)
4142     arm_compute_frame_layout ();
4143
4144   offsets = arm_get_frame_offsets ();
4145   return offsets->outgoing_args != 0;
4146 }
4147
4148 /* Return TRUE if int I is a valid immediate ARM constant.  */
4149
4150 int
4151 const_ok_for_arm (HOST_WIDE_INT i)
4152 {
4153   int lowbit;
4154
4155   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4156      be all zero, or all one.  */
4157   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4158       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4159           != ((~(unsigned HOST_WIDE_INT) 0)
4160               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4161     return FALSE;
4162
4163   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4164
4165   /* Fast return for 0 and small values.  We must do this for zero, since
4166      the code below can't handle that one case.  */
4167   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4168     return TRUE;
4169
4170   /* Get the number of trailing zeros.  */
4171   lowbit = ffs((int) i) - 1;
4172
4173   /* Only even shifts are allowed in ARM mode so round down to the
4174      nearest even number.  */
4175   if (TARGET_ARM)
4176     lowbit &= ~1;
4177
4178   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4179     return TRUE;
4180
4181   if (TARGET_ARM)
4182     {
4183       /* Allow rotated constants in ARM mode.  */
4184       if (lowbit <= 4
4185            && ((i & ~0xc000003f) == 0
4186                || (i & ~0xf000000f) == 0
4187                || (i & ~0xfc000003) == 0))
4188         return TRUE;
4189     }
4190   else if (TARGET_THUMB2)
4191     {
4192       HOST_WIDE_INT v;
4193
4194       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4195       v = i & 0xff;
4196       v |= v << 16;
4197       if (i == v || i == (v | (v << 8)))
4198         return TRUE;
4199
4200       /* Allow repeated pattern 0xXY00XY00.  */
4201       v = i & 0xff00;
4202       v |= v << 16;
4203       if (i == v)
4204         return TRUE;
4205     }
4206   else if (TARGET_HAVE_MOVT)
4207     {
4208       /* Thumb-1 Targets with MOVT.  */
4209       if (i > 0xffff)
4210         return FALSE;
4211       else
4212         return TRUE;
4213     }
4214
4215   return FALSE;
4216 }
4217
4218 /* Return true if I is a valid constant for the operation CODE.  */
4219 int
4220 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4221 {
4222   if (const_ok_for_arm (i))
4223     return 1;
4224
4225   switch (code)
4226     {
4227     case SET:
4228       /* See if we can use movw.  */
4229       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4230         return 1;
4231       else
4232         /* Otherwise, try mvn.  */
4233         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4234
4235     case PLUS:
4236       /* See if we can use addw or subw.  */
4237       if (TARGET_THUMB2
4238           && ((i & 0xfffff000) == 0
4239               || ((-i) & 0xfffff000) == 0))
4240         return 1;
4241       /* Fall through.  */
4242     case COMPARE:
4243     case EQ:
4244     case NE:
4245     case GT:
4246     case LE:
4247     case LT:
4248     case GE:
4249     case GEU:
4250     case LTU:
4251     case GTU:
4252     case LEU:
4253     case UNORDERED:
4254     case ORDERED:
4255     case UNEQ:
4256     case UNGE:
4257     case UNLT:
4258     case UNGT:
4259     case UNLE:
4260       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4261
4262     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4263     case XOR:
4264       return 0;
4265
4266     case IOR:
4267       if (TARGET_THUMB2)
4268         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4269       return 0;
4270
4271     case AND:
4272       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4273
4274     default:
4275       gcc_unreachable ();
4276     }
4277 }
4278
4279 /* Return true if I is a valid di mode constant for the operation CODE.  */
4280 int
4281 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4282 {
4283   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4284   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4285   rtx hi = GEN_INT (hi_val);
4286   rtx lo = GEN_INT (lo_val);
4287
4288   if (TARGET_THUMB1)
4289     return 0;
4290
4291   switch (code)
4292     {
4293     case AND:
4294     case IOR:
4295     case XOR:
4296       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4297               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4298     case PLUS:
4299       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4300
4301     default:
4302       return 0;
4303     }
4304 }
4305
4306 /* Emit a sequence of insns to handle a large constant.
4307    CODE is the code of the operation required, it can be any of SET, PLUS,
4308    IOR, AND, XOR, MINUS;
4309    MODE is the mode in which the operation is being performed;
4310    VAL is the integer to operate on;
4311    SOURCE is the other operand (a register, or a null-pointer for SET);
4312    SUBTARGETS means it is safe to create scratch registers if that will
4313    either produce a simpler sequence, or we will want to cse the values.
4314    Return value is the number of insns emitted.  */
4315
4316 /* ??? Tweak this for thumb2.  */
4317 int
4318 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4319                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4320 {
4321   rtx cond;
4322
4323   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4324     cond = COND_EXEC_TEST (PATTERN (insn));
4325   else
4326     cond = NULL_RTX;
4327
4328   if (subtargets || code == SET
4329       || (REG_P (target) && REG_P (source)
4330           && REGNO (target) != REGNO (source)))
4331     {
4332       /* After arm_reorg has been called, we can't fix up expensive
4333          constants by pushing them into memory so we must synthesize
4334          them in-line, regardless of the cost.  This is only likely to
4335          be more costly on chips that have load delay slots and we are
4336          compiling without running the scheduler (so no splitting
4337          occurred before the final instruction emission).
4338
4339          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4340       */
4341       if (!cfun->machine->after_arm_reorg
4342           && !cond
4343           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4344                                 1, 0)
4345               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4346                  + (code != SET))))
4347         {
4348           if (code == SET)
4349             {
4350               /* Currently SET is the only monadic value for CODE, all
4351                  the rest are diadic.  */
4352               if (TARGET_USE_MOVT)
4353                 arm_emit_movpair (target, GEN_INT (val));
4354               else
4355                 emit_set_insn (target, GEN_INT (val));
4356
4357               return 1;
4358             }
4359           else
4360             {
4361               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4362
4363               if (TARGET_USE_MOVT)
4364                 arm_emit_movpair (temp, GEN_INT (val));
4365               else
4366                 emit_set_insn (temp, GEN_INT (val));
4367
4368               /* For MINUS, the value is subtracted from, since we never
4369                  have subtraction of a constant.  */
4370               if (code == MINUS)
4371                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4372               else
4373                 emit_set_insn (target,
4374                                gen_rtx_fmt_ee (code, mode, source, temp));
4375               return 2;
4376             }
4377         }
4378     }
4379
4380   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4381                            1);
4382 }
4383
4384 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4385    ARM/THUMB2 immediates, and add up to VAL.
4386    Thr function return value gives the number of insns required.  */
4387 static int
4388 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4389                             struct four_ints *return_sequence)
4390 {
4391   int best_consecutive_zeros = 0;
4392   int i;
4393   int best_start = 0;
4394   int insns1, insns2;
4395   struct four_ints tmp_sequence;
4396
4397   /* If we aren't targeting ARM, the best place to start is always at
4398      the bottom, otherwise look more closely.  */
4399   if (TARGET_ARM)
4400     {
4401       for (i = 0; i < 32; i += 2)
4402         {
4403           int consecutive_zeros = 0;
4404
4405           if (!(val & (3 << i)))
4406             {
4407               while ((i < 32) && !(val & (3 << i)))
4408                 {
4409                   consecutive_zeros += 2;
4410                   i += 2;
4411                 }
4412               if (consecutive_zeros > best_consecutive_zeros)
4413                 {
4414                   best_consecutive_zeros = consecutive_zeros;
4415                   best_start = i - consecutive_zeros;
4416                 }
4417               i -= 2;
4418             }
4419         }
4420     }
4421
4422   /* So long as it won't require any more insns to do so, it's
4423      desirable to emit a small constant (in bits 0...9) in the last
4424      insn.  This way there is more chance that it can be combined with
4425      a later addressing insn to form a pre-indexed load or store
4426      operation.  Consider:
4427
4428            *((volatile int *)0xe0000100) = 1;
4429            *((volatile int *)0xe0000110) = 2;
4430
4431      We want this to wind up as:
4432
4433             mov rA, #0xe0000000
4434             mov rB, #1
4435             str rB, [rA, #0x100]
4436             mov rB, #2
4437             str rB, [rA, #0x110]
4438
4439      rather than having to synthesize both large constants from scratch.
4440
4441      Therefore, we calculate how many insns would be required to emit
4442      the constant starting from `best_start', and also starting from
4443      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4444      yield a shorter sequence, we may as well use zero.  */
4445   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4446   if (best_start != 0
4447       && ((HOST_WIDE_INT_1U << best_start) < val))
4448     {
4449       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4450       if (insns2 <= insns1)
4451         {
4452           *return_sequence = tmp_sequence;
4453           insns1 = insns2;
4454         }
4455     }
4456
4457   return insns1;
4458 }
4459
4460 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4461 static int
4462 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4463                              struct four_ints *return_sequence, int i)
4464 {
4465   int remainder = val & 0xffffffff;
4466   int insns = 0;
4467
4468   /* Try and find a way of doing the job in either two or three
4469      instructions.
4470
4471      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4472      location.  We start at position I.  This may be the MSB, or
4473      optimial_immediate_sequence may have positioned it at the largest block
4474      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4475      wrapping around to the top of the word when we drop off the bottom.
4476      In the worst case this code should produce no more than four insns.
4477
4478      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4479      constants, shifted to any arbitrary location.  We should always start
4480      at the MSB.  */
4481   do
4482     {
4483       int end;
4484       unsigned int b1, b2, b3, b4;
4485       unsigned HOST_WIDE_INT result;
4486       int loc;
4487
4488       gcc_assert (insns < 4);
4489
4490       if (i <= 0)
4491         i += 32;
4492
4493       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4494       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4495         {
4496           loc = i;
4497           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4498             /* We can use addw/subw for the last 12 bits.  */
4499             result = remainder;
4500           else
4501             {
4502               /* Use an 8-bit shifted/rotated immediate.  */
4503               end = i - 8;
4504               if (end < 0)
4505                 end += 32;
4506               result = remainder & ((0x0ff << end)
4507                                    | ((i < end) ? (0xff >> (32 - end))
4508                                                 : 0));
4509               i -= 8;
4510             }
4511         }
4512       else
4513         {
4514           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4515              arbitrary shifts.  */
4516           i -= TARGET_ARM ? 2 : 1;
4517           continue;
4518         }
4519
4520       /* Next, see if we can do a better job with a thumb2 replicated
4521          constant.
4522
4523          We do it this way around to catch the cases like 0x01F001E0 where
4524          two 8-bit immediates would work, but a replicated constant would
4525          make it worse.
4526
4527          TODO: 16-bit constants that don't clear all the bits, but still win.
4528          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4529       if (TARGET_THUMB2)
4530         {
4531           b1 = (remainder & 0xff000000) >> 24;
4532           b2 = (remainder & 0x00ff0000) >> 16;
4533           b3 = (remainder & 0x0000ff00) >> 8;
4534           b4 = remainder & 0xff;
4535
4536           if (loc > 24)
4537             {
4538               /* The 8-bit immediate already found clears b1 (and maybe b2),
4539                  but must leave b3 and b4 alone.  */
4540
4541               /* First try to find a 32-bit replicated constant that clears
4542                  almost everything.  We can assume that we can't do it in one,
4543                  or else we wouldn't be here.  */
4544               unsigned int tmp = b1 & b2 & b3 & b4;
4545               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4546                                   + (tmp << 24);
4547               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4548                                             + (tmp == b3) + (tmp == b4);
4549               if (tmp
4550                   && (matching_bytes >= 3
4551                       || (matching_bytes == 2
4552                           && const_ok_for_op (remainder & ~tmp2, code))))
4553                 {
4554                   /* At least 3 of the bytes match, and the fourth has at
4555                      least as many bits set, or two of the bytes match
4556                      and it will only require one more insn to finish.  */
4557                   result = tmp2;
4558                   i = tmp != b1 ? 32
4559                       : tmp != b2 ? 24
4560                       : tmp != b3 ? 16
4561                       : 8;
4562                 }
4563
4564               /* Second, try to find a 16-bit replicated constant that can
4565                  leave three of the bytes clear.  If b2 or b4 is already
4566                  zero, then we can.  If the 8-bit from above would not
4567                  clear b2 anyway, then we still win.  */
4568               else if (b1 == b3 && (!b2 || !b4
4569                                || (remainder & 0x00ff0000 & ~result)))
4570                 {
4571                   result = remainder & 0xff00ff00;
4572                   i = 24;
4573                 }
4574             }
4575           else if (loc > 16)
4576             {
4577               /* The 8-bit immediate already found clears b2 (and maybe b3)
4578                  and we don't get here unless b1 is alredy clear, but it will
4579                  leave b4 unchanged.  */
4580
4581               /* If we can clear b2 and b4 at once, then we win, since the
4582                  8-bits couldn't possibly reach that far.  */
4583               if (b2 == b4)
4584                 {
4585                   result = remainder & 0x00ff00ff;
4586                   i = 16;
4587                 }
4588             }
4589         }
4590
4591       return_sequence->i[insns++] = result;
4592       remainder &= ~result;
4593
4594       if (code == SET || code == MINUS)
4595         code = PLUS;
4596     }
4597   while (remainder);
4598
4599   return insns;
4600 }
4601
4602 /* Emit an instruction with the indicated PATTERN.  If COND is
4603    non-NULL, conditionalize the execution of the instruction on COND
4604    being true.  */
4605
4606 static void
4607 emit_constant_insn (rtx cond, rtx pattern)
4608 {
4609   if (cond)
4610     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4611   emit_insn (pattern);
4612 }
4613
4614 /* As above, but extra parameter GENERATE which, if clear, suppresses
4615    RTL generation.  */
4616
4617 static int
4618 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4619                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4620                   int subtargets, int generate)
4621 {
4622   int can_invert = 0;
4623   int can_negate = 0;
4624   int final_invert = 0;
4625   int i;
4626   int set_sign_bit_copies = 0;
4627   int clear_sign_bit_copies = 0;
4628   int clear_zero_bit_copies = 0;
4629   int set_zero_bit_copies = 0;
4630   int insns = 0, neg_insns, inv_insns;
4631   unsigned HOST_WIDE_INT temp1, temp2;
4632   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4633   struct four_ints *immediates;
4634   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4635
4636   /* Find out which operations are safe for a given CODE.  Also do a quick
4637      check for degenerate cases; these can occur when DImode operations
4638      are split.  */
4639   switch (code)
4640     {
4641     case SET:
4642       can_invert = 1;
4643       break;
4644
4645     case PLUS:
4646       can_negate = 1;
4647       break;
4648
4649     case IOR:
4650       if (remainder == 0xffffffff)
4651         {
4652           if (generate)
4653             emit_constant_insn (cond,
4654                                 gen_rtx_SET (target,
4655                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4656           return 1;
4657         }
4658
4659       if (remainder == 0)
4660         {
4661           if (reload_completed && rtx_equal_p (target, source))
4662             return 0;
4663
4664           if (generate)
4665             emit_constant_insn (cond, gen_rtx_SET (target, source));
4666           return 1;
4667         }
4668       break;
4669
4670     case AND:
4671       if (remainder == 0)
4672         {
4673           if (generate)
4674             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4675           return 1;
4676         }
4677       if (remainder == 0xffffffff)
4678         {
4679           if (reload_completed && rtx_equal_p (target, source))
4680             return 0;
4681           if (generate)
4682             emit_constant_insn (cond, gen_rtx_SET (target, source));
4683           return 1;
4684         }
4685       can_invert = 1;
4686       break;
4687
4688     case XOR:
4689       if (remainder == 0)
4690         {
4691           if (reload_completed && rtx_equal_p (target, source))
4692             return 0;
4693           if (generate)
4694             emit_constant_insn (cond, gen_rtx_SET (target, source));
4695           return 1;
4696         }
4697
4698       if (remainder == 0xffffffff)
4699         {
4700           if (generate)
4701             emit_constant_insn (cond,
4702                                 gen_rtx_SET (target,
4703                                              gen_rtx_NOT (mode, source)));
4704           return 1;
4705         }
4706       final_invert = 1;
4707       break;
4708
4709     case MINUS:
4710       /* We treat MINUS as (val - source), since (source - val) is always
4711          passed as (source + (-val)).  */
4712       if (remainder == 0)
4713         {
4714           if (generate)
4715             emit_constant_insn (cond,
4716                                 gen_rtx_SET (target,
4717                                              gen_rtx_NEG (mode, source)));
4718           return 1;
4719         }
4720       if (const_ok_for_arm (val))
4721         {
4722           if (generate)
4723             emit_constant_insn (cond,
4724                                 gen_rtx_SET (target,
4725                                              gen_rtx_MINUS (mode, GEN_INT (val),
4726                                                             source)));
4727           return 1;
4728         }
4729
4730       break;
4731
4732     default:
4733       gcc_unreachable ();
4734     }
4735
4736   /* If we can do it in one insn get out quickly.  */
4737   if (const_ok_for_op (val, code))
4738     {
4739       if (generate)
4740         emit_constant_insn (cond,
4741                             gen_rtx_SET (target,
4742                                          (source
4743                                           ? gen_rtx_fmt_ee (code, mode, source,
4744                                                             GEN_INT (val))
4745                                           : GEN_INT (val))));
4746       return 1;
4747     }
4748
4749   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4750      insn.  */
4751   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4752       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4753     {
4754       if (generate)
4755         {
4756           if (mode == SImode && i == 16)
4757             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4758                smaller insn.  */
4759             emit_constant_insn (cond,
4760                                 gen_zero_extendhisi2
4761                                 (target, gen_lowpart (HImode, source)));
4762           else
4763             /* Extz only supports SImode, but we can coerce the operands
4764                into that mode.  */
4765             emit_constant_insn (cond,
4766                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4767                                               gen_lowpart (SImode, source),
4768                                               GEN_INT (i), const0_rtx));
4769         }
4770
4771       return 1;
4772     }
4773
4774   /* Calculate a few attributes that may be useful for specific
4775      optimizations.  */
4776   /* Count number of leading zeros.  */
4777   for (i = 31; i >= 0; i--)
4778     {
4779       if ((remainder & (1 << i)) == 0)
4780         clear_sign_bit_copies++;
4781       else
4782         break;
4783     }
4784
4785   /* Count number of leading 1's.  */
4786   for (i = 31; i >= 0; i--)
4787     {
4788       if ((remainder & (1 << i)) != 0)
4789         set_sign_bit_copies++;
4790       else
4791         break;
4792     }
4793
4794   /* Count number of trailing zero's.  */
4795   for (i = 0; i <= 31; i++)
4796     {
4797       if ((remainder & (1 << i)) == 0)
4798         clear_zero_bit_copies++;
4799       else
4800         break;
4801     }
4802
4803   /* Count number of trailing 1's.  */
4804   for (i = 0; i <= 31; i++)
4805     {
4806       if ((remainder & (1 << i)) != 0)
4807         set_zero_bit_copies++;
4808       else
4809         break;
4810     }
4811
4812   switch (code)
4813     {
4814     case SET:
4815       /* See if we can do this by sign_extending a constant that is known
4816          to be negative.  This is a good, way of doing it, since the shift
4817          may well merge into a subsequent insn.  */
4818       if (set_sign_bit_copies > 1)
4819         {
4820           if (const_ok_for_arm
4821               (temp1 = ARM_SIGN_EXTEND (remainder
4822                                         << (set_sign_bit_copies - 1))))
4823             {
4824               if (generate)
4825                 {
4826                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4827                   emit_constant_insn (cond,
4828                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4829                   emit_constant_insn (cond,
4830                                       gen_ashrsi3 (target, new_src,
4831                                                    GEN_INT (set_sign_bit_copies - 1)));
4832                 }
4833               return 2;
4834             }
4835           /* For an inverted constant, we will need to set the low bits,
4836              these will be shifted out of harm's way.  */
4837           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4838           if (const_ok_for_arm (~temp1))
4839             {
4840               if (generate)
4841                 {
4842                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4843                   emit_constant_insn (cond,
4844                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4845                   emit_constant_insn (cond,
4846                                       gen_ashrsi3 (target, new_src,
4847                                                    GEN_INT (set_sign_bit_copies - 1)));
4848                 }
4849               return 2;
4850             }
4851         }
4852
4853       /* See if we can calculate the value as the difference between two
4854          valid immediates.  */
4855       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4856         {
4857           int topshift = clear_sign_bit_copies & ~1;
4858
4859           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4860                                    & (0xff000000 >> topshift));
4861
4862           /* If temp1 is zero, then that means the 9 most significant
4863              bits of remainder were 1 and we've caused it to overflow.
4864              When topshift is 0 we don't need to do anything since we
4865              can borrow from 'bit 32'.  */
4866           if (temp1 == 0 && topshift != 0)
4867             temp1 = 0x80000000 >> (topshift - 1);
4868
4869           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4870
4871           if (const_ok_for_arm (temp2))
4872             {
4873               if (generate)
4874                 {
4875                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4876                   emit_constant_insn (cond,
4877                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4878                   emit_constant_insn (cond,
4879                                       gen_addsi3 (target, new_src,
4880                                                   GEN_INT (-temp2)));
4881                 }
4882
4883               return 2;
4884             }
4885         }
4886
4887       /* See if we can generate this by setting the bottom (or the top)
4888          16 bits, and then shifting these into the other half of the
4889          word.  We only look for the simplest cases, to do more would cost
4890          too much.  Be careful, however, not to generate this when the
4891          alternative would take fewer insns.  */
4892       if (val & 0xffff0000)
4893         {
4894           temp1 = remainder & 0xffff0000;
4895           temp2 = remainder & 0x0000ffff;
4896
4897           /* Overlaps outside this range are best done using other methods.  */
4898           for (i = 9; i < 24; i++)
4899             {
4900               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4901                   && !const_ok_for_arm (temp2))
4902                 {
4903                   rtx new_src = (subtargets
4904                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4905                                  : target);
4906                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4907                                             source, subtargets, generate);
4908                   source = new_src;
4909                   if (generate)
4910                     emit_constant_insn
4911                       (cond,
4912                        gen_rtx_SET
4913                        (target,
4914                         gen_rtx_IOR (mode,
4915                                      gen_rtx_ASHIFT (mode, source,
4916                                                      GEN_INT (i)),
4917                                      source)));
4918                   return insns + 1;
4919                 }
4920             }
4921
4922           /* Don't duplicate cases already considered.  */
4923           for (i = 17; i < 24; i++)
4924             {
4925               if (((temp1 | (temp1 >> i)) == remainder)
4926                   && !const_ok_for_arm (temp1))
4927                 {
4928                   rtx new_src = (subtargets
4929                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4930                                  : target);
4931                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4932                                             source, subtargets, generate);
4933                   source = new_src;
4934                   if (generate)
4935                     emit_constant_insn
4936                       (cond,
4937                        gen_rtx_SET (target,
4938                                     gen_rtx_IOR
4939                                     (mode,
4940                                      gen_rtx_LSHIFTRT (mode, source,
4941                                                        GEN_INT (i)),
4942                                      source)));
4943                   return insns + 1;
4944                 }
4945             }
4946         }
4947       break;
4948
4949     case IOR:
4950     case XOR:
4951       /* If we have IOR or XOR, and the constant can be loaded in a
4952          single instruction, and we can find a temporary to put it in,
4953          then this can be done in two instructions instead of 3-4.  */
4954       if (subtargets
4955           /* TARGET can't be NULL if SUBTARGETS is 0 */
4956           || (reload_completed && !reg_mentioned_p (target, source)))
4957         {
4958           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4959             {
4960               if (generate)
4961                 {
4962                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963
4964                   emit_constant_insn (cond,
4965                                       gen_rtx_SET (sub, GEN_INT (val)));
4966                   emit_constant_insn (cond,
4967                                       gen_rtx_SET (target,
4968                                                    gen_rtx_fmt_ee (code, mode,
4969                                                                    source, sub)));
4970                 }
4971               return 2;
4972             }
4973         }
4974
4975       if (code == XOR)
4976         break;
4977
4978       /*  Convert.
4979           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4980                              and the remainder 0s for e.g. 0xfff00000)
4981           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4982
4983           This can be done in 2 instructions by using shifts with mov or mvn.
4984           e.g. for
4985           x = x | 0xfff00000;
4986           we generate.
4987           mvn   r0, r0, asl #12
4988           mvn   r0, r0, lsr #12  */
4989       if (set_sign_bit_copies > 8
4990           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4991         {
4992           if (generate)
4993             {
4994               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4995               rtx shift = GEN_INT (set_sign_bit_copies);
4996
4997               emit_constant_insn
4998                 (cond,
4999                  gen_rtx_SET (sub,
5000                               gen_rtx_NOT (mode,
5001                                            gen_rtx_ASHIFT (mode,
5002                                                            source,
5003                                                            shift))));
5004               emit_constant_insn
5005                 (cond,
5006                  gen_rtx_SET (target,
5007                               gen_rtx_NOT (mode,
5008                                            gen_rtx_LSHIFTRT (mode, sub,
5009                                                              shift))));
5010             }
5011           return 2;
5012         }
5013
5014       /* Convert
5015           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5016            to
5017           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5018
5019           For eg. r0 = r0 | 0xfff
5020                mvn      r0, r0, lsr #12
5021                mvn      r0, r0, asl #12
5022
5023       */
5024       if (set_zero_bit_copies > 8
5025           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5026         {
5027           if (generate)
5028             {
5029               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5030               rtx shift = GEN_INT (set_zero_bit_copies);
5031
5032               emit_constant_insn
5033                 (cond,
5034                  gen_rtx_SET (sub,
5035                               gen_rtx_NOT (mode,
5036                                            gen_rtx_LSHIFTRT (mode,
5037                                                              source,
5038                                                              shift))));
5039               emit_constant_insn
5040                 (cond,
5041                  gen_rtx_SET (target,
5042                               gen_rtx_NOT (mode,
5043                                            gen_rtx_ASHIFT (mode, sub,
5044                                                            shift))));
5045             }
5046           return 2;
5047         }
5048
5049       /* This will never be reached for Thumb2 because orn is a valid
5050          instruction. This is for Thumb1 and the ARM 32 bit cases.
5051
5052          x = y | constant (such that ~constant is a valid constant)
5053          Transform this to
5054          x = ~(~y & ~constant).
5055       */
5056       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5057         {
5058           if (generate)
5059             {
5060               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5061               emit_constant_insn (cond,
5062                                   gen_rtx_SET (sub,
5063                                                gen_rtx_NOT (mode, source)));
5064               source = sub;
5065               if (subtargets)
5066                 sub = gen_reg_rtx (mode);
5067               emit_constant_insn (cond,
5068                                   gen_rtx_SET (sub,
5069                                                gen_rtx_AND (mode, source,
5070                                                             GEN_INT (temp1))));
5071               emit_constant_insn (cond,
5072                                   gen_rtx_SET (target,
5073                                                gen_rtx_NOT (mode, sub)));
5074             }
5075           return 3;
5076         }
5077       break;
5078
5079     case AND:
5080       /* See if two shifts will do 2 or more insn's worth of work.  */
5081       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5082         {
5083           HOST_WIDE_INT shift_mask = ((0xffffffff
5084                                        << (32 - clear_sign_bit_copies))
5085                                       & 0xffffffff);
5086
5087           if ((remainder | shift_mask) != 0xffffffff)
5088             {
5089               HOST_WIDE_INT new_val
5090                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5091
5092               if (generate)
5093                 {
5094                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5095                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5096                                             new_src, source, subtargets, 1);
5097                   source = new_src;
5098                 }
5099               else
5100                 {
5101                   rtx targ = subtargets ? NULL_RTX : target;
5102                   insns = arm_gen_constant (AND, mode, cond, new_val,
5103                                             targ, source, subtargets, 0);
5104                 }
5105             }
5106
5107           if (generate)
5108             {
5109               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5110               rtx shift = GEN_INT (clear_sign_bit_copies);
5111
5112               emit_insn (gen_ashlsi3 (new_src, source, shift));
5113               emit_insn (gen_lshrsi3 (target, new_src, shift));
5114             }
5115
5116           return insns + 2;
5117         }
5118
5119       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5120         {
5121           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5122
5123           if ((remainder | shift_mask) != 0xffffffff)
5124             {
5125               HOST_WIDE_INT new_val
5126                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5127               if (generate)
5128                 {
5129                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5130
5131                   insns = arm_gen_constant (AND, mode, cond, new_val,
5132                                             new_src, source, subtargets, 1);
5133                   source = new_src;
5134                 }
5135               else
5136                 {
5137                   rtx targ = subtargets ? NULL_RTX : target;
5138
5139                   insns = arm_gen_constant (AND, mode, cond, new_val,
5140                                             targ, source, subtargets, 0);
5141                 }
5142             }
5143
5144           if (generate)
5145             {
5146               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5147               rtx shift = GEN_INT (clear_zero_bit_copies);
5148
5149               emit_insn (gen_lshrsi3 (new_src, source, shift));
5150               emit_insn (gen_ashlsi3 (target, new_src, shift));
5151             }
5152
5153           return insns + 2;
5154         }
5155
5156       break;
5157
5158     default:
5159       break;
5160     }
5161
5162   /* Calculate what the instruction sequences would be if we generated it
5163      normally, negated, or inverted.  */
5164   if (code == AND)
5165     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5166     insns = 99;
5167   else
5168     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5169
5170   if (can_negate)
5171     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5172                                             &neg_immediates);
5173   else
5174     neg_insns = 99;
5175
5176   if (can_invert || final_invert)
5177     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5178                                             &inv_immediates);
5179   else
5180     inv_insns = 99;
5181
5182   immediates = &pos_immediates;
5183
5184   /* Is the negated immediate sequence more efficient?  */
5185   if (neg_insns < insns && neg_insns <= inv_insns)
5186     {
5187       insns = neg_insns;
5188       immediates = &neg_immediates;
5189     }
5190   else
5191     can_negate = 0;
5192
5193   /* Is the inverted immediate sequence more efficient?
5194      We must allow for an extra NOT instruction for XOR operations, although
5195      there is some chance that the final 'mvn' will get optimized later.  */
5196   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5197     {
5198       insns = inv_insns;
5199       immediates = &inv_immediates;
5200     }
5201   else
5202     {
5203       can_invert = 0;
5204       final_invert = 0;
5205     }
5206
5207   /* Now output the chosen sequence as instructions.  */
5208   if (generate)
5209     {
5210       for (i = 0; i < insns; i++)
5211         {
5212           rtx new_src, temp1_rtx;
5213
5214           temp1 = immediates->i[i];
5215
5216           if (code == SET || code == MINUS)
5217             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5218           else if ((final_invert || i < (insns - 1)) && subtargets)
5219             new_src = gen_reg_rtx (mode);
5220           else
5221             new_src = target;
5222
5223           if (can_invert)
5224             temp1 = ~temp1;
5225           else if (can_negate)
5226             temp1 = -temp1;
5227
5228           temp1 = trunc_int_for_mode (temp1, mode);
5229           temp1_rtx = GEN_INT (temp1);
5230
5231           if (code == SET)
5232             ;
5233           else if (code == MINUS)
5234             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5235           else
5236             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5237
5238           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5239           source = new_src;
5240
5241           if (code == SET)
5242             {
5243               can_negate = can_invert;
5244               can_invert = 0;
5245               code = PLUS;
5246             }
5247           else if (code == MINUS)
5248             code = PLUS;
5249         }
5250     }
5251
5252   if (final_invert)
5253     {
5254       if (generate)
5255         emit_constant_insn (cond, gen_rtx_SET (target,
5256                                                gen_rtx_NOT (mode, source)));
5257       insns++;
5258     }
5259
5260   return insns;
5261 }
5262
5263 /* Canonicalize a comparison so that we are more likely to recognize it.
5264    This can be done for a few constant compares, where we can make the
5265    immediate value easier to load.  */
5266
5267 static void
5268 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5269                              bool op0_preserve_value)
5270 {
5271   machine_mode mode;
5272   unsigned HOST_WIDE_INT i, maxval;
5273
5274   mode = GET_MODE (*op0);
5275   if (mode == VOIDmode)
5276     mode = GET_MODE (*op1);
5277
5278   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5279
5280   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5281      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5282      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5283      for GTU/LEU in Thumb mode.  */
5284   if (mode == DImode)
5285     {
5286
5287       if (*code == GT || *code == LE
5288           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5289         {
5290           /* Missing comparison.  First try to use an available
5291              comparison.  */
5292           if (CONST_INT_P (*op1))
5293             {
5294               i = INTVAL (*op1);
5295               switch (*code)
5296                 {
5297                 case GT:
5298                 case LE:
5299                   if (i != maxval
5300                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5301                     {
5302                       *op1 = GEN_INT (i + 1);
5303                       *code = *code == GT ? GE : LT;
5304                       return;
5305                     }
5306                   break;
5307                 case GTU:
5308                 case LEU:
5309                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5310                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5311                     {
5312                       *op1 = GEN_INT (i + 1);
5313                       *code = *code == GTU ? GEU : LTU;
5314                       return;
5315                     }
5316                   break;
5317                 default:
5318                   gcc_unreachable ();
5319                 }
5320             }
5321
5322           /* If that did not work, reverse the condition.  */
5323           if (!op0_preserve_value)
5324             {
5325               std::swap (*op0, *op1);
5326               *code = (int)swap_condition ((enum rtx_code)*code);
5327             }
5328         }
5329       return;
5330     }
5331
5332   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5333      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5334      to facilitate possible combining with a cmp into 'ands'.  */
5335   if (mode == SImode
5336       && GET_CODE (*op0) == ZERO_EXTEND
5337       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5338       && GET_MODE (XEXP (*op0, 0)) == QImode
5339       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5340       && subreg_lowpart_p (XEXP (*op0, 0))
5341       && *op1 == const0_rtx)
5342     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5343                         GEN_INT (255));
5344
5345   /* Comparisons smaller than DImode.  Only adjust comparisons against
5346      an out-of-range constant.  */
5347   if (!CONST_INT_P (*op1)
5348       || const_ok_for_arm (INTVAL (*op1))
5349       || const_ok_for_arm (- INTVAL (*op1)))
5350     return;
5351
5352   i = INTVAL (*op1);
5353
5354   switch (*code)
5355     {
5356     case EQ:
5357     case NE:
5358       return;
5359
5360     case GT:
5361     case LE:
5362       if (i != maxval
5363           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5364         {
5365           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5366           *code = *code == GT ? GE : LT;
5367           return;
5368         }
5369       break;
5370
5371     case GE:
5372     case LT:
5373       if (i != ~maxval
5374           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5375         {
5376           *op1 = GEN_INT (i - 1);
5377           *code = *code == GE ? GT : LE;
5378           return;
5379         }
5380       break;
5381
5382     case GTU:
5383     case LEU:
5384       if (i != ~((unsigned HOST_WIDE_INT) 0)
5385           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5386         {
5387           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5388           *code = *code == GTU ? GEU : LTU;
5389           return;
5390         }
5391       break;
5392
5393     case GEU:
5394     case LTU:
5395       if (i != 0
5396           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5397         {
5398           *op1 = GEN_INT (i - 1);
5399           *code = *code == GEU ? GTU : LEU;
5400           return;
5401         }
5402       break;
5403
5404     default:
5405       gcc_unreachable ();
5406     }
5407 }
5408
5409
5410 /* Define how to find the value returned by a function.  */
5411
5412 static rtx
5413 arm_function_value(const_tree type, const_tree func,
5414                    bool outgoing ATTRIBUTE_UNUSED)
5415 {
5416   machine_mode mode;
5417   int unsignedp ATTRIBUTE_UNUSED;
5418   rtx r ATTRIBUTE_UNUSED;
5419
5420   mode = TYPE_MODE (type);
5421
5422   if (TARGET_AAPCS_BASED)
5423     return aapcs_allocate_return_reg (mode, type, func);
5424
5425   /* Promote integer types.  */
5426   if (INTEGRAL_TYPE_P (type))
5427     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5428
5429   /* Promotes small structs returned in a register to full-word size
5430      for big-endian AAPCS.  */
5431   if (arm_return_in_msb (type))
5432     {
5433       HOST_WIDE_INT size = int_size_in_bytes (type);
5434       if (size % UNITS_PER_WORD != 0)
5435         {
5436           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5437           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5438         }
5439     }
5440
5441   return arm_libcall_value_1 (mode);
5442 }
5443
5444 /* libcall hashtable helpers.  */
5445
5446 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5447 {
5448   static inline hashval_t hash (const rtx_def *);
5449   static inline bool equal (const rtx_def *, const rtx_def *);
5450   static inline void remove (rtx_def *);
5451 };
5452
5453 inline bool
5454 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5455 {
5456   return rtx_equal_p (p1, p2);
5457 }
5458
5459 inline hashval_t
5460 libcall_hasher::hash (const rtx_def *p1)
5461 {
5462   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5463 }
5464
5465 typedef hash_table<libcall_hasher> libcall_table_type;
5466
5467 static void
5468 add_libcall (libcall_table_type *htab, rtx libcall)
5469 {
5470   *htab->find_slot (libcall, INSERT) = libcall;
5471 }
5472
5473 static bool
5474 arm_libcall_uses_aapcs_base (const_rtx libcall)
5475 {
5476   static bool init_done = false;
5477   static libcall_table_type *libcall_htab = NULL;
5478
5479   if (!init_done)
5480     {
5481       init_done = true;
5482
5483       libcall_htab = new libcall_table_type (31);
5484       add_libcall (libcall_htab,
5485                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5486       add_libcall (libcall_htab,
5487                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5488       add_libcall (libcall_htab,
5489                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5490       add_libcall (libcall_htab,
5491                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5492
5493       add_libcall (libcall_htab,
5494                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5495       add_libcall (libcall_htab,
5496                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5497       add_libcall (libcall_htab,
5498                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5499       add_libcall (libcall_htab,
5500                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5501
5502       add_libcall (libcall_htab,
5503                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5504       add_libcall (libcall_htab,
5505                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5506       add_libcall (libcall_htab,
5507                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5508       add_libcall (libcall_htab,
5509                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5510       add_libcall (libcall_htab,
5511                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5512       add_libcall (libcall_htab,
5513                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5514       add_libcall (libcall_htab,
5515                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5516       add_libcall (libcall_htab,
5517                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5518
5519       /* Values from double-precision helper functions are returned in core
5520          registers if the selected core only supports single-precision
5521          arithmetic, even if we are using the hard-float ABI.  The same is
5522          true for single-precision helpers, but we will never be using the
5523          hard-float ABI on a CPU which doesn't support single-precision
5524          operations in hardware.  */
5525       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5526       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5527       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5528       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5529       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5530       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5531       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5532       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5533       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5534       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5535       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5536       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5537                                                         SFmode));
5538       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5539                                                         DFmode));
5540       add_libcall (libcall_htab,
5541                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5542     }
5543
5544   return libcall && libcall_htab->find (libcall) != NULL;
5545 }
5546
5547 static rtx
5548 arm_libcall_value_1 (machine_mode mode)
5549 {
5550   if (TARGET_AAPCS_BASED)
5551     return aapcs_libcall_value (mode);
5552   else if (TARGET_IWMMXT_ABI
5553            && arm_vector_mode_supported_p (mode))
5554     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5555   else
5556     return gen_rtx_REG (mode, ARG_REGISTER (1));
5557 }
5558
5559 /* Define how to find the value returned by a library function
5560    assuming the value has mode MODE.  */
5561
5562 static rtx
5563 arm_libcall_value (machine_mode mode, const_rtx libcall)
5564 {
5565   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5566       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5567     {
5568       /* The following libcalls return their result in integer registers,
5569          even though they return a floating point value.  */
5570       if (arm_libcall_uses_aapcs_base (libcall))
5571         return gen_rtx_REG (mode, ARG_REGISTER(1));
5572
5573     }
5574
5575   return arm_libcall_value_1 (mode);
5576 }
5577
5578 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5579
5580 static bool
5581 arm_function_value_regno_p (const unsigned int regno)
5582 {
5583   if (regno == ARG_REGISTER (1)
5584       || (TARGET_32BIT
5585           && TARGET_AAPCS_BASED
5586           && TARGET_HARD_FLOAT
5587           && regno == FIRST_VFP_REGNUM)
5588       || (TARGET_IWMMXT_ABI
5589           && regno == FIRST_IWMMXT_REGNUM))
5590     return true;
5591
5592   return false;
5593 }
5594
5595 /* Determine the amount of memory needed to store the possible return
5596    registers of an untyped call.  */
5597 int
5598 arm_apply_result_size (void)
5599 {
5600   int size = 16;
5601
5602   if (TARGET_32BIT)
5603     {
5604       if (TARGET_HARD_FLOAT_ABI)
5605         size += 32;
5606       if (TARGET_IWMMXT_ABI)
5607         size += 8;
5608     }
5609
5610   return size;
5611 }
5612
5613 /* Decide whether TYPE should be returned in memory (true)
5614    or in a register (false).  FNTYPE is the type of the function making
5615    the call.  */
5616 static bool
5617 arm_return_in_memory (const_tree type, const_tree fntype)
5618 {
5619   HOST_WIDE_INT size;
5620
5621   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5622
5623   if (TARGET_AAPCS_BASED)
5624     {
5625       /* Simple, non-aggregate types (ie not including vectors and
5626          complex) are always returned in a register (or registers).
5627          We don't care about which register here, so we can short-cut
5628          some of the detail.  */
5629       if (!AGGREGATE_TYPE_P (type)
5630           && TREE_CODE (type) != VECTOR_TYPE
5631           && TREE_CODE (type) != COMPLEX_TYPE)
5632         return false;
5633
5634       /* Any return value that is no larger than one word can be
5635          returned in r0.  */
5636       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5637         return false;
5638
5639       /* Check any available co-processors to see if they accept the
5640          type as a register candidate (VFP, for example, can return
5641          some aggregates in consecutive registers).  These aren't
5642          available if the call is variadic.  */
5643       if (aapcs_select_return_coproc (type, fntype) >= 0)
5644         return false;
5645
5646       /* Vector values should be returned using ARM registers, not
5647          memory (unless they're over 16 bytes, which will break since
5648          we only have four call-clobbered registers to play with).  */
5649       if (TREE_CODE (type) == VECTOR_TYPE)
5650         return (size < 0 || size > (4 * UNITS_PER_WORD));
5651
5652       /* The rest go in memory.  */
5653       return true;
5654     }
5655
5656   if (TREE_CODE (type) == VECTOR_TYPE)
5657     return (size < 0 || size > (4 * UNITS_PER_WORD));
5658
5659   if (!AGGREGATE_TYPE_P (type) &&
5660       (TREE_CODE (type) != VECTOR_TYPE))
5661     /* All simple types are returned in registers.  */
5662     return false;
5663
5664   if (arm_abi != ARM_ABI_APCS)
5665     {
5666       /* ATPCS and later return aggregate types in memory only if they are
5667          larger than a word (or are variable size).  */
5668       return (size < 0 || size > UNITS_PER_WORD);
5669     }
5670
5671   /* For the arm-wince targets we choose to be compatible with Microsoft's
5672      ARM and Thumb compilers, which always return aggregates in memory.  */
5673 #ifndef ARM_WINCE
5674   /* All structures/unions bigger than one word are returned in memory.
5675      Also catch the case where int_size_in_bytes returns -1.  In this case
5676      the aggregate is either huge or of variable size, and in either case
5677      we will want to return it via memory and not in a register.  */
5678   if (size < 0 || size > UNITS_PER_WORD)
5679     return true;
5680
5681   if (TREE_CODE (type) == RECORD_TYPE)
5682     {
5683       tree field;
5684
5685       /* For a struct the APCS says that we only return in a register
5686          if the type is 'integer like' and every addressable element
5687          has an offset of zero.  For practical purposes this means
5688          that the structure can have at most one non bit-field element
5689          and that this element must be the first one in the structure.  */
5690
5691       /* Find the first field, ignoring non FIELD_DECL things which will
5692          have been created by C++.  */
5693       for (field = TYPE_FIELDS (type);
5694            field && TREE_CODE (field) != FIELD_DECL;
5695            field = DECL_CHAIN (field))
5696         continue;
5697
5698       if (field == NULL)
5699         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5700
5701       /* Check that the first field is valid for returning in a register.  */
5702
5703       /* ... Floats are not allowed */
5704       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5705         return true;
5706
5707       /* ... Aggregates that are not themselves valid for returning in
5708          a register are not allowed.  */
5709       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5710         return true;
5711
5712       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5713          since they are not addressable.  */
5714       for (field = DECL_CHAIN (field);
5715            field;
5716            field = DECL_CHAIN (field))
5717         {
5718           if (TREE_CODE (field) != FIELD_DECL)
5719             continue;
5720
5721           if (!DECL_BIT_FIELD_TYPE (field))
5722             return true;
5723         }
5724
5725       return false;
5726     }
5727
5728   if (TREE_CODE (type) == UNION_TYPE)
5729     {
5730       tree field;
5731
5732       /* Unions can be returned in registers if every element is
5733          integral, or can be returned in an integer register.  */
5734       for (field = TYPE_FIELDS (type);
5735            field;
5736            field = DECL_CHAIN (field))
5737         {
5738           if (TREE_CODE (field) != FIELD_DECL)
5739             continue;
5740
5741           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5742             return true;
5743
5744           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5745             return true;
5746         }
5747
5748       return false;
5749     }
5750 #endif /* not ARM_WINCE */
5751
5752   /* Return all other types in memory.  */
5753   return true;
5754 }
5755
5756 const struct pcs_attribute_arg
5757 {
5758   const char *arg;
5759   enum arm_pcs value;
5760 } pcs_attribute_args[] =
5761   {
5762     {"aapcs", ARM_PCS_AAPCS},
5763     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5764 #if 0
5765     /* We could recognize these, but changes would be needed elsewhere
5766      * to implement them.  */
5767     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5768     {"atpcs", ARM_PCS_ATPCS},
5769     {"apcs", ARM_PCS_APCS},
5770 #endif
5771     {NULL, ARM_PCS_UNKNOWN}
5772   };
5773
5774 static enum arm_pcs
5775 arm_pcs_from_attribute (tree attr)
5776 {
5777   const struct pcs_attribute_arg *ptr;
5778   const char *arg;
5779
5780   /* Get the value of the argument.  */
5781   if (TREE_VALUE (attr) == NULL_TREE
5782       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5783     return ARM_PCS_UNKNOWN;
5784
5785   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5786
5787   /* Check it against the list of known arguments.  */
5788   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5789     if (streq (arg, ptr->arg))
5790       return ptr->value;
5791
5792   /* An unrecognized interrupt type.  */
5793   return ARM_PCS_UNKNOWN;
5794 }
5795
5796 /* Get the PCS variant to use for this call.  TYPE is the function's type
5797    specification, DECL is the specific declartion.  DECL may be null if
5798    the call could be indirect or if this is a library call.  */
5799 static enum arm_pcs
5800 arm_get_pcs_model (const_tree type, const_tree decl)
5801 {
5802   bool user_convention = false;
5803   enum arm_pcs user_pcs = arm_pcs_default;
5804   tree attr;
5805
5806   gcc_assert (type);
5807
5808   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5809   if (attr)
5810     {
5811       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5812       user_convention = true;
5813     }
5814
5815   if (TARGET_AAPCS_BASED)
5816     {
5817       /* Detect varargs functions.  These always use the base rules
5818          (no argument is ever a candidate for a co-processor
5819          register).  */
5820       bool base_rules = stdarg_p (type);
5821
5822       if (user_convention)
5823         {
5824           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5825             sorry ("non-AAPCS derived PCS variant");
5826           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5827             error ("variadic functions must use the base AAPCS variant");
5828         }
5829
5830       if (base_rules)
5831         return ARM_PCS_AAPCS;
5832       else if (user_convention)
5833         return user_pcs;
5834       else if (decl && flag_unit_at_a_time)
5835         {
5836           /* Local functions never leak outside this compilation unit,
5837              so we are free to use whatever conventions are
5838              appropriate.  */
5839           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5840           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841           if (i && i->local)
5842             return ARM_PCS_AAPCS_LOCAL;
5843         }
5844     }
5845   else if (user_convention && user_pcs != arm_pcs_default)
5846     sorry ("PCS variant");
5847
5848   /* For everything else we use the target's default.  */
5849   return arm_pcs_default;
5850 }
5851
5852
5853 static void
5854 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5855                     const_tree fntype ATTRIBUTE_UNUSED,
5856                     rtx libcall ATTRIBUTE_UNUSED,
5857                     const_tree fndecl ATTRIBUTE_UNUSED)
5858 {
5859   /* Record the unallocated VFP registers.  */
5860   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5861   pcum->aapcs_vfp_reg_alloc = 0;
5862 }
5863
5864 /* Walk down the type tree of TYPE counting consecutive base elements.
5865    If *MODEP is VOIDmode, then set it to the first valid floating point
5866    type.  If a non-floating point type is found, or if a floating point
5867    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5868    otherwise return the count in the sub-tree.  */
5869 static int
5870 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5871 {
5872   machine_mode mode;
5873   HOST_WIDE_INT size;
5874
5875   switch (TREE_CODE (type))
5876     {
5877     case REAL_TYPE:
5878       mode = TYPE_MODE (type);
5879       if (mode != DFmode && mode != SFmode && mode != HFmode)
5880         return -1;
5881
5882       if (*modep == VOIDmode)
5883         *modep = mode;
5884
5885       if (*modep == mode)
5886         return 1;
5887
5888       break;
5889
5890     case COMPLEX_TYPE:
5891       mode = TYPE_MODE (TREE_TYPE (type));
5892       if (mode != DFmode && mode != SFmode)
5893         return -1;
5894
5895       if (*modep == VOIDmode)
5896         *modep = mode;
5897
5898       if (*modep == mode)
5899         return 2;
5900
5901       break;
5902
5903     case VECTOR_TYPE:
5904       /* Use V2SImode and V4SImode as representatives of all 64-bit
5905          and 128-bit vector types, whether or not those modes are
5906          supported with the present options.  */
5907       size = int_size_in_bytes (type);
5908       switch (size)
5909         {
5910         case 8:
5911           mode = V2SImode;
5912           break;
5913         case 16:
5914           mode = V4SImode;
5915           break;
5916         default:
5917           return -1;
5918         }
5919
5920       if (*modep == VOIDmode)
5921         *modep = mode;
5922
5923       /* Vector modes are considered to be opaque: two vectors are
5924          equivalent for the purposes of being homogeneous aggregates
5925          if they are the same size.  */
5926       if (*modep == mode)
5927         return 1;
5928
5929       break;
5930
5931     case ARRAY_TYPE:
5932       {
5933         int count;
5934         tree index = TYPE_DOMAIN (type);
5935
5936         /* Can't handle incomplete types nor sizes that are not
5937            fixed.  */
5938         if (!COMPLETE_TYPE_P (type)
5939             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5940           return -1;
5941
5942         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5943         if (count == -1
5944             || !index
5945             || !TYPE_MAX_VALUE (index)
5946             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5947             || !TYPE_MIN_VALUE (index)
5948             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5949             || count < 0)
5950           return -1;
5951
5952         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5953                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5954
5955         /* There must be no padding.  */
5956         if (wi::to_wide (TYPE_SIZE (type))
5957             != count * GET_MODE_BITSIZE (*modep))
5958           return -1;
5959
5960         return count;
5961       }
5962
5963     case RECORD_TYPE:
5964       {
5965         int count = 0;
5966         int sub_count;
5967         tree field;
5968
5969         /* Can't handle incomplete types nor sizes that are not
5970            fixed.  */
5971         if (!COMPLETE_TYPE_P (type)
5972             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5973           return -1;
5974
5975         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5976           {
5977             if (TREE_CODE (field) != FIELD_DECL)
5978               continue;
5979
5980             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5981             if (sub_count < 0)
5982               return -1;
5983             count += sub_count;
5984           }
5985
5986         /* There must be no padding.  */
5987         if (wi::to_wide (TYPE_SIZE (type))
5988             != count * GET_MODE_BITSIZE (*modep))
5989           return -1;
5990
5991         return count;
5992       }
5993
5994     case UNION_TYPE:
5995     case QUAL_UNION_TYPE:
5996       {
5997         /* These aren't very interesting except in a degenerate case.  */
5998         int count = 0;
5999         int sub_count;
6000         tree field;
6001
6002         /* Can't handle incomplete types nor sizes that are not
6003            fixed.  */
6004         if (!COMPLETE_TYPE_P (type)
6005             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6006           return -1;
6007
6008         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6009           {
6010             if (TREE_CODE (field) != FIELD_DECL)
6011               continue;
6012
6013             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6014             if (sub_count < 0)
6015               return -1;
6016             count = count > sub_count ? count : sub_count;
6017           }
6018
6019         /* There must be no padding.  */
6020         if (wi::to_wide (TYPE_SIZE (type))
6021             != count * GET_MODE_BITSIZE (*modep))
6022           return -1;
6023
6024         return count;
6025       }
6026
6027     default:
6028       break;
6029     }
6030
6031   return -1;
6032 }
6033
6034 /* Return true if PCS_VARIANT should use VFP registers.  */
6035 static bool
6036 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6037 {
6038   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6039     {
6040       static bool seen_thumb1_vfp = false;
6041
6042       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6043         {
6044           sorry ("Thumb-1 hard-float VFP ABI");
6045           /* sorry() is not immediately fatal, so only display this once.  */
6046           seen_thumb1_vfp = true;
6047         }
6048
6049       return true;
6050     }
6051
6052   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6053     return false;
6054
6055   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6056           (TARGET_VFP_DOUBLE || !is_double));
6057 }
6058
6059 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6060    suitable for passing or returning in VFP registers for the PCS
6061    variant selected.  If it is, then *BASE_MODE is updated to contain
6062    a machine mode describing each element of the argument's type and
6063    *COUNT to hold the number of such elements.  */
6064 static bool
6065 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6066                                        machine_mode mode, const_tree type,
6067                                        machine_mode *base_mode, int *count)
6068 {
6069   machine_mode new_mode = VOIDmode;
6070
6071   /* If we have the type information, prefer that to working things
6072      out from the mode.  */
6073   if (type)
6074     {
6075       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6076
6077       if (ag_count > 0 && ag_count <= 4)
6078         *count = ag_count;
6079       else
6080         return false;
6081     }
6082   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6083            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6084            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6085     {
6086       *count = 1;
6087       new_mode = mode;
6088     }
6089   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6090     {
6091       *count = 2;
6092       new_mode = (mode == DCmode ? DFmode : SFmode);
6093     }
6094   else
6095     return false;
6096
6097
6098   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6099     return false;
6100
6101   *base_mode = new_mode;
6102   return true;
6103 }
6104
6105 static bool
6106 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6107                                machine_mode mode, const_tree type)
6108 {
6109   int count ATTRIBUTE_UNUSED;
6110   machine_mode ag_mode ATTRIBUTE_UNUSED;
6111
6112   if (!use_vfp_abi (pcs_variant, false))
6113     return false;
6114   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6115                                                 &ag_mode, &count);
6116 }
6117
6118 static bool
6119 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6120                              const_tree type)
6121 {
6122   if (!use_vfp_abi (pcum->pcs_variant, false))
6123     return false;
6124
6125   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6126                                                 &pcum->aapcs_vfp_rmode,
6127                                                 &pcum->aapcs_vfp_rcount);
6128 }
6129
6130 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6131    for the behaviour of this function.  */
6132
6133 static bool
6134 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6135                     const_tree type  ATTRIBUTE_UNUSED)
6136 {
6137   int rmode_size
6138     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6139   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6140   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6141   int regno;
6142
6143   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6144     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6145       {
6146         pcum->aapcs_vfp_reg_alloc = mask << regno;
6147         if (mode == BLKmode
6148             || (mode == TImode && ! TARGET_NEON)
6149             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6150           {
6151             int i;
6152             int rcount = pcum->aapcs_vfp_rcount;
6153             int rshift = shift;
6154             machine_mode rmode = pcum->aapcs_vfp_rmode;
6155             rtx par;
6156             if (!TARGET_NEON)
6157               {
6158                 /* Avoid using unsupported vector modes.  */
6159                 if (rmode == V2SImode)
6160                   rmode = DImode;
6161                 else if (rmode == V4SImode)
6162                   {
6163                     rmode = DImode;
6164                     rcount *= 2;
6165                     rshift /= 2;
6166                   }
6167               }
6168             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6169             for (i = 0; i < rcount; i++)
6170               {
6171                 rtx tmp = gen_rtx_REG (rmode,
6172                                        FIRST_VFP_REGNUM + regno + i * rshift);
6173                 tmp = gen_rtx_EXPR_LIST
6174                   (VOIDmode, tmp,
6175                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6176                 XVECEXP (par, 0, i) = tmp;
6177               }
6178
6179             pcum->aapcs_reg = par;
6180           }
6181         else
6182           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6183         return true;
6184       }
6185   return false;
6186 }
6187
6188 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6189    comment there for the behaviour of this function.  */
6190
6191 static rtx
6192 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6193                                machine_mode mode,
6194                                const_tree type ATTRIBUTE_UNUSED)
6195 {
6196   if (!use_vfp_abi (pcs_variant, false))
6197     return NULL;
6198
6199   if (mode == BLKmode
6200       || (GET_MODE_CLASS (mode) == MODE_INT
6201           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6202           && !TARGET_NEON))
6203     {
6204       int count;
6205       machine_mode ag_mode;
6206       int i;
6207       rtx par;
6208       int shift;
6209
6210       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6211                                              &ag_mode, &count);
6212
6213       if (!TARGET_NEON)
6214         {
6215           if (ag_mode == V2SImode)
6216             ag_mode = DImode;
6217           else if (ag_mode == V4SImode)
6218             {
6219               ag_mode = DImode;
6220               count *= 2;
6221             }
6222         }
6223       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6224       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6225       for (i = 0; i < count; i++)
6226         {
6227           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6228           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6229                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6230           XVECEXP (par, 0, i) = tmp;
6231         }
6232
6233       return par;
6234     }
6235
6236   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6237 }
6238
6239 static void
6240 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6241                    machine_mode mode  ATTRIBUTE_UNUSED,
6242                    const_tree type  ATTRIBUTE_UNUSED)
6243 {
6244   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6245   pcum->aapcs_vfp_reg_alloc = 0;
6246   return;
6247 }
6248
6249 #define AAPCS_CP(X)                             \
6250   {                                             \
6251     aapcs_ ## X ## _cum_init,                   \
6252     aapcs_ ## X ## _is_call_candidate,          \
6253     aapcs_ ## X ## _allocate,                   \
6254     aapcs_ ## X ## _is_return_candidate,        \
6255     aapcs_ ## X ## _allocate_return_reg,        \
6256     aapcs_ ## X ## _advance                     \
6257   }
6258
6259 /* Table of co-processors that can be used to pass arguments in
6260    registers.  Idealy no arugment should be a candidate for more than
6261    one co-processor table entry, but the table is processed in order
6262    and stops after the first match.  If that entry then fails to put
6263    the argument into a co-processor register, the argument will go on
6264    the stack.  */
6265 static struct
6266 {
6267   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6268   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6269
6270   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6271      BLKmode) is a candidate for this co-processor's registers; this
6272      function should ignore any position-dependent state in
6273      CUMULATIVE_ARGS and only use call-type dependent information.  */
6274   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6275
6276   /* Return true if the argument does get a co-processor register; it
6277      should set aapcs_reg to an RTX of the register allocated as is
6278      required for a return from FUNCTION_ARG.  */
6279   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6280
6281   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6282      be returned in this co-processor's registers.  */
6283   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6284
6285   /* Allocate and return an RTX element to hold the return type of a call.  This
6286      routine must not fail and will only be called if is_return_candidate
6287      returned true with the same parameters.  */
6288   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6289
6290   /* Finish processing this argument and prepare to start processing
6291      the next one.  */
6292   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6294   {
6295     AAPCS_CP(vfp)
6296   };
6297
6298 #undef AAPCS_CP
6299
6300 static int
6301 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6302                           const_tree type)
6303 {
6304   int i;
6305
6306   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6307     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6308       return i;
6309
6310   return -1;
6311 }
6312
6313 static int
6314 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6315 {
6316   /* We aren't passed a decl, so we can't check that a call is local.
6317      However, it isn't clear that that would be a win anyway, since it
6318      might limit some tail-calling opportunities.  */
6319   enum arm_pcs pcs_variant;
6320
6321   if (fntype)
6322     {
6323       const_tree fndecl = NULL_TREE;
6324
6325       if (TREE_CODE (fntype) == FUNCTION_DECL)
6326         {
6327           fndecl = fntype;
6328           fntype = TREE_TYPE (fntype);
6329         }
6330
6331       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6332     }
6333   else
6334     pcs_variant = arm_pcs_default;
6335
6336   if (pcs_variant != ARM_PCS_AAPCS)
6337     {
6338       int i;
6339
6340       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6341         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6342                                                         TYPE_MODE (type),
6343                                                         type))
6344           return i;
6345     }
6346   return -1;
6347 }
6348
6349 static rtx
6350 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6351                            const_tree fntype)
6352 {
6353   /* We aren't passed a decl, so we can't check that a call is local.
6354      However, it isn't clear that that would be a win anyway, since it
6355      might limit some tail-calling opportunities.  */
6356   enum arm_pcs pcs_variant;
6357   int unsignedp ATTRIBUTE_UNUSED;
6358
6359   if (fntype)
6360     {
6361       const_tree fndecl = NULL_TREE;
6362
6363       if (TREE_CODE (fntype) == FUNCTION_DECL)
6364         {
6365           fndecl = fntype;
6366           fntype = TREE_TYPE (fntype);
6367         }
6368
6369       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6370     }
6371   else
6372     pcs_variant = arm_pcs_default;
6373
6374   /* Promote integer types.  */
6375   if (type && INTEGRAL_TYPE_P (type))
6376     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6377
6378   if (pcs_variant != ARM_PCS_AAPCS)
6379     {
6380       int i;
6381
6382       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6383         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6384                                                         type))
6385           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6386                                                              mode, type);
6387     }
6388
6389   /* Promotes small structs returned in a register to full-word size
6390      for big-endian AAPCS.  */
6391   if (type && arm_return_in_msb (type))
6392     {
6393       HOST_WIDE_INT size = int_size_in_bytes (type);
6394       if (size % UNITS_PER_WORD != 0)
6395         {
6396           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6397           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6398         }
6399     }
6400
6401   return gen_rtx_REG (mode, R0_REGNUM);
6402 }
6403
6404 static rtx
6405 aapcs_libcall_value (machine_mode mode)
6406 {
6407   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6408       && GET_MODE_SIZE (mode) <= 4)
6409     mode = SImode;
6410
6411   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6412 }
6413
6414 /* Lay out a function argument using the AAPCS rules.  The rule
6415    numbers referred to here are those in the AAPCS.  */
6416 static void
6417 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6418                   const_tree type, bool named)
6419 {
6420   int nregs, nregs2;
6421   int ncrn;
6422
6423   /* We only need to do this once per argument.  */
6424   if (pcum->aapcs_arg_processed)
6425     return;
6426
6427   pcum->aapcs_arg_processed = true;
6428
6429   /* Special case: if named is false then we are handling an incoming
6430      anonymous argument which is on the stack.  */
6431   if (!named)
6432     return;
6433
6434   /* Is this a potential co-processor register candidate?  */
6435   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6436     {
6437       int slot = aapcs_select_call_coproc (pcum, mode, type);
6438       pcum->aapcs_cprc_slot = slot;
6439
6440       /* We don't have to apply any of the rules from part B of the
6441          preparation phase, these are handled elsewhere in the
6442          compiler.  */
6443
6444       if (slot >= 0)
6445         {
6446           /* A Co-processor register candidate goes either in its own
6447              class of registers or on the stack.  */
6448           if (!pcum->aapcs_cprc_failed[slot])
6449             {
6450               /* C1.cp - Try to allocate the argument to co-processor
6451                  registers.  */
6452               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6453                 return;
6454
6455               /* C2.cp - Put the argument on the stack and note that we
6456                  can't assign any more candidates in this slot.  We also
6457                  need to note that we have allocated stack space, so that
6458                  we won't later try to split a non-cprc candidate between
6459                  core registers and the stack.  */
6460               pcum->aapcs_cprc_failed[slot] = true;
6461               pcum->can_split = false;
6462             }
6463
6464           /* We didn't get a register, so this argument goes on the
6465              stack.  */
6466           gcc_assert (pcum->can_split == false);
6467           return;
6468         }
6469     }
6470
6471   /* C3 - For double-word aligned arguments, round the NCRN up to the
6472      next even number.  */
6473   ncrn = pcum->aapcs_ncrn;
6474   if (ncrn & 1)
6475     {
6476       int res = arm_needs_doubleword_align (mode, type);
6477       /* Only warn during RTL expansion of call stmts, otherwise we would
6478          warn e.g. during gimplification even on functions that will be
6479          always inlined, and we'd warn multiple times.  Don't warn when
6480          called in expand_function_start either, as we warn instead in
6481          arm_function_arg_boundary in that case.  */
6482       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6483         inform (input_location, "parameter passing for argument of type "
6484                 "%qT changed in GCC 7.1", type);
6485       else if (res > 0)
6486         ncrn++;
6487     }
6488
6489   nregs = ARM_NUM_REGS2(mode, type);
6490
6491   /* Sigh, this test should really assert that nregs > 0, but a GCC
6492      extension allows empty structs and then gives them empty size; it
6493      then allows such a structure to be passed by value.  For some of
6494      the code below we have to pretend that such an argument has
6495      non-zero size so that we 'locate' it correctly either in
6496      registers or on the stack.  */
6497   gcc_assert (nregs >= 0);
6498
6499   nregs2 = nregs ? nregs : 1;
6500
6501   /* C4 - Argument fits entirely in core registers.  */
6502   if (ncrn + nregs2 <= NUM_ARG_REGS)
6503     {
6504       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6505       pcum->aapcs_next_ncrn = ncrn + nregs;
6506       return;
6507     }
6508
6509   /* C5 - Some core registers left and there are no arguments already
6510      on the stack: split this argument between the remaining core
6511      registers and the stack.  */
6512   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6513     {
6514       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6515       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6516       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6517       return;
6518     }
6519
6520   /* C6 - NCRN is set to 4.  */
6521   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6522
6523   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6524   return;
6525 }
6526
6527 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6528    for a call to a function whose data type is FNTYPE.
6529    For a library call, FNTYPE is NULL.  */
6530 void
6531 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6532                           rtx libname,
6533                           tree fndecl ATTRIBUTE_UNUSED)
6534 {
6535   /* Long call handling.  */
6536   if (fntype)
6537     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6538   else
6539     pcum->pcs_variant = arm_pcs_default;
6540
6541   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6542     {
6543       if (arm_libcall_uses_aapcs_base (libname))
6544         pcum->pcs_variant = ARM_PCS_AAPCS;
6545
6546       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6547       pcum->aapcs_reg = NULL_RTX;
6548       pcum->aapcs_partial = 0;
6549       pcum->aapcs_arg_processed = false;
6550       pcum->aapcs_cprc_slot = -1;
6551       pcum->can_split = true;
6552
6553       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6554         {
6555           int i;
6556
6557           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6558             {
6559               pcum->aapcs_cprc_failed[i] = false;
6560               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6561             }
6562         }
6563       return;
6564     }
6565
6566   /* Legacy ABIs */
6567
6568   /* On the ARM, the offset starts at 0.  */
6569   pcum->nregs = 0;
6570   pcum->iwmmxt_nregs = 0;
6571   pcum->can_split = true;
6572
6573   /* Varargs vectors are treated the same as long long.
6574      named_count avoids having to change the way arm handles 'named' */
6575   pcum->named_count = 0;
6576   pcum->nargs = 0;
6577
6578   if (TARGET_REALLY_IWMMXT && fntype)
6579     {
6580       tree fn_arg;
6581
6582       for (fn_arg = TYPE_ARG_TYPES (fntype);
6583            fn_arg;
6584            fn_arg = TREE_CHAIN (fn_arg))
6585         pcum->named_count += 1;
6586
6587       if (! pcum->named_count)
6588         pcum->named_count = INT_MAX;
6589     }
6590 }
6591
6592 /* Return 1 if double word alignment is required for argument passing.
6593    Return -1 if double word alignment used to be required for argument
6594    passing before PR77728 ABI fix, but is not required anymore.
6595    Return 0 if double word alignment is not required and wasn't requried
6596    before either.  */
6597 static int
6598 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6599 {
6600   if (!type)
6601     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6602
6603   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6604   if (!AGGREGATE_TYPE_P (type))
6605     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6606
6607   /* Array types: Use member alignment of element type.  */
6608   if (TREE_CODE (type) == ARRAY_TYPE)
6609     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6610
6611   int ret = 0;
6612   /* Record/aggregate types: Use greatest member alignment of any member.  */
6613   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6614     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6615       {
6616         if (TREE_CODE (field) == FIELD_DECL)
6617           return 1;
6618         else
6619           /* Before PR77728 fix, we were incorrectly considering also
6620              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6621              Make sure we can warn about that with -Wpsabi.  */
6622           ret = -1;
6623       }
6624
6625   return ret;
6626 }
6627
6628
6629 /* Determine where to put an argument to a function.
6630    Value is zero to push the argument on the stack,
6631    or a hard register in which to store the argument.
6632
6633    MODE is the argument's machine mode.
6634    TYPE is the data type of the argument (as a tree).
6635     This is null for libcalls where that information may
6636     not be available.
6637    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6638     the preceding args and about the function being called.
6639    NAMED is nonzero if this argument is a named parameter
6640     (otherwise it is an extra parameter matching an ellipsis).
6641
6642    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6643    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6644    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6645    defined), say it is passed in the stack (function_prologue will
6646    indeed make it pass in the stack if necessary).  */
6647
6648 static rtx
6649 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6650                   const_tree type, bool named)
6651 {
6652   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6653   int nregs;
6654
6655   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6656      a call insn (op3 of a call_value insn).  */
6657   if (mode == VOIDmode)
6658     return const0_rtx;
6659
6660   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661     {
6662       aapcs_layout_arg (pcum, mode, type, named);
6663       return pcum->aapcs_reg;
6664     }
6665
6666   /* Varargs vectors are treated the same as long long.
6667      named_count avoids having to change the way arm handles 'named' */
6668   if (TARGET_IWMMXT_ABI
6669       && arm_vector_mode_supported_p (mode)
6670       && pcum->named_count > pcum->nargs + 1)
6671     {
6672       if (pcum->iwmmxt_nregs <= 9)
6673         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6674       else
6675         {
6676           pcum->can_split = false;
6677           return NULL_RTX;
6678         }
6679     }
6680
6681   /* Put doubleword aligned quantities in even register pairs.  */
6682   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6683     {
6684       int res = arm_needs_doubleword_align (mode, type);
6685       if (res < 0 && warn_psabi)
6686         inform (input_location, "parameter passing for argument of type "
6687                 "%qT changed in GCC 7.1", type);
6688       else if (res > 0)
6689         pcum->nregs++;
6690     }
6691
6692   /* Only allow splitting an arg between regs and memory if all preceding
6693      args were allocated to regs.  For args passed by reference we only count
6694      the reference pointer.  */
6695   if (pcum->can_split)
6696     nregs = 1;
6697   else
6698     nregs = ARM_NUM_REGS2 (mode, type);
6699
6700   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6701     return NULL_RTX;
6702
6703   return gen_rtx_REG (mode, pcum->nregs);
6704 }
6705
6706 static unsigned int
6707 arm_function_arg_boundary (machine_mode mode, const_tree type)
6708 {
6709   if (!ARM_DOUBLEWORD_ALIGN)
6710     return PARM_BOUNDARY;
6711
6712   int res = arm_needs_doubleword_align (mode, type);
6713   if (res < 0 && warn_psabi)
6714     inform (input_location, "parameter passing for argument of type %qT "
6715             "changed in GCC 7.1", type);
6716
6717   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6718 }
6719
6720 static int
6721 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6722                        tree type, bool named)
6723 {
6724   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6725   int nregs = pcum->nregs;
6726
6727   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6728     {
6729       aapcs_layout_arg (pcum, mode, type, named);
6730       return pcum->aapcs_partial;
6731     }
6732
6733   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6734     return 0;
6735
6736   if (NUM_ARG_REGS > nregs
6737       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6738       && pcum->can_split)
6739     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6740
6741   return 0;
6742 }
6743
6744 /* Update the data in PCUM to advance over an argument
6745    of mode MODE and data type TYPE.
6746    (TYPE is null for libcalls where that information may not be available.)  */
6747
6748 static void
6749 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6750                           const_tree type, bool named)
6751 {
6752   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6753
6754   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6755     {
6756       aapcs_layout_arg (pcum, mode, type, named);
6757
6758       if (pcum->aapcs_cprc_slot >= 0)
6759         {
6760           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6761                                                               type);
6762           pcum->aapcs_cprc_slot = -1;
6763         }
6764
6765       /* Generic stuff.  */
6766       pcum->aapcs_arg_processed = false;
6767       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6768       pcum->aapcs_reg = NULL_RTX;
6769       pcum->aapcs_partial = 0;
6770     }
6771   else
6772     {
6773       pcum->nargs += 1;
6774       if (arm_vector_mode_supported_p (mode)
6775           && pcum->named_count > pcum->nargs
6776           && TARGET_IWMMXT_ABI)
6777         pcum->iwmmxt_nregs += 1;
6778       else
6779         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6780     }
6781 }
6782
6783 /* Variable sized types are passed by reference.  This is a GCC
6784    extension to the ARM ABI.  */
6785
6786 static bool
6787 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6788                        machine_mode mode ATTRIBUTE_UNUSED,
6789                        const_tree type, bool named ATTRIBUTE_UNUSED)
6790 {
6791   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6792 }
6793 \f
6794 /* Encode the current state of the #pragma [no_]long_calls.  */
6795 typedef enum
6796 {
6797   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6798   LONG,         /* #pragma long_calls is in effect.  */
6799   SHORT         /* #pragma no_long_calls is in effect.  */
6800 } arm_pragma_enum;
6801
6802 static arm_pragma_enum arm_pragma_long_calls = OFF;
6803
6804 void
6805 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6806 {
6807   arm_pragma_long_calls = LONG;
6808 }
6809
6810 void
6811 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6812 {
6813   arm_pragma_long_calls = SHORT;
6814 }
6815
6816 void
6817 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6818 {
6819   arm_pragma_long_calls = OFF;
6820 }
6821 \f
6822 /* Handle an attribute requiring a FUNCTION_DECL;
6823    arguments as in struct attribute_spec.handler.  */
6824 static tree
6825 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6826                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6827 {
6828   if (TREE_CODE (*node) != FUNCTION_DECL)
6829     {
6830       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6831                name);
6832       *no_add_attrs = true;
6833     }
6834
6835   return NULL_TREE;
6836 }
6837
6838 /* Handle an "interrupt" or "isr" attribute;
6839    arguments as in struct attribute_spec.handler.  */
6840 static tree
6841 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6842                           bool *no_add_attrs)
6843 {
6844   if (DECL_P (*node))
6845     {
6846       if (TREE_CODE (*node) != FUNCTION_DECL)
6847         {
6848           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6849                    name);
6850           *no_add_attrs = true;
6851         }
6852       /* FIXME: the argument if any is checked for type attributes;
6853          should it be checked for decl ones?  */
6854     }
6855   else
6856     {
6857       if (TREE_CODE (*node) == FUNCTION_TYPE
6858           || TREE_CODE (*node) == METHOD_TYPE)
6859         {
6860           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6861             {
6862               warning (OPT_Wattributes, "%qE attribute ignored",
6863                        name);
6864               *no_add_attrs = true;
6865             }
6866         }
6867       else if (TREE_CODE (*node) == POINTER_TYPE
6868                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6869                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6870                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6871         {
6872           *node = build_variant_type_copy (*node);
6873           TREE_TYPE (*node) = build_type_attribute_variant
6874             (TREE_TYPE (*node),
6875              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6876           *no_add_attrs = true;
6877         }
6878       else
6879         {
6880           /* Possibly pass this attribute on from the type to a decl.  */
6881           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6882                        | (int) ATTR_FLAG_FUNCTION_NEXT
6883                        | (int) ATTR_FLAG_ARRAY_NEXT))
6884             {
6885               *no_add_attrs = true;
6886               return tree_cons (name, args, NULL_TREE);
6887             }
6888           else
6889             {
6890               warning (OPT_Wattributes, "%qE attribute ignored",
6891                        name);
6892             }
6893         }
6894     }
6895
6896   return NULL_TREE;
6897 }
6898
6899 /* Handle a "pcs" attribute; arguments as in struct
6900    attribute_spec.handler.  */
6901 static tree
6902 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6903                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6904 {
6905   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6906     {
6907       warning (OPT_Wattributes, "%qE attribute ignored", name);
6908       *no_add_attrs = true;
6909     }
6910   return NULL_TREE;
6911 }
6912
6913 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6914 /* Handle the "notshared" attribute.  This attribute is another way of
6915    requesting hidden visibility.  ARM's compiler supports
6916    "__declspec(notshared)"; we support the same thing via an
6917    attribute.  */
6918
6919 static tree
6920 arm_handle_notshared_attribute (tree *node,
6921                                 tree name ATTRIBUTE_UNUSED,
6922                                 tree args ATTRIBUTE_UNUSED,
6923                                 int flags ATTRIBUTE_UNUSED,
6924                                 bool *no_add_attrs)
6925 {
6926   tree decl = TYPE_NAME (*node);
6927
6928   if (decl)
6929     {
6930       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6931       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6932       *no_add_attrs = false;
6933     }
6934   return NULL_TREE;
6935 }
6936 #endif
6937
6938 /* This function returns true if a function with declaration FNDECL and type
6939    FNTYPE uses the stack to pass arguments or return variables and false
6940    otherwise.  This is used for functions with the attributes
6941    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6942    diagnostic messages if the stack is used.  NAME is the name of the attribute
6943    used.  */
6944
6945 static bool
6946 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6947 {
6948   function_args_iterator args_iter;
6949   CUMULATIVE_ARGS args_so_far_v;
6950   cumulative_args_t args_so_far;
6951   bool first_param = true;
6952   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6953
6954   /* Error out if any argument is passed on the stack.  */
6955   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6956   args_so_far = pack_cumulative_args (&args_so_far_v);
6957   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6958     {
6959       rtx arg_rtx;
6960       machine_mode arg_mode = TYPE_MODE (arg_type);
6961
6962       prev_arg_type = arg_type;
6963       if (VOID_TYPE_P (arg_type))
6964         continue;
6965
6966       if (!first_param)
6967         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6968       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6969       if (!arg_rtx
6970           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6971         {
6972           error ("%qE attribute not available to functions with arguments "
6973                  "passed on the stack", name);
6974           return true;
6975         }
6976       first_param = false;
6977     }
6978
6979   /* Error out for variadic functions since we cannot control how many
6980      arguments will be passed and thus stack could be used.  stdarg_p () is not
6981      used for the checking to avoid browsing arguments twice.  */
6982   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6983     {
6984       error ("%qE attribute not available to functions with variable number "
6985              "of arguments", name);
6986       return true;
6987     }
6988
6989   /* Error out if return value is passed on the stack.  */
6990   ret_type = TREE_TYPE (fntype);
6991   if (arm_return_in_memory (ret_type, fntype))
6992     {
6993       error ("%qE attribute not available to functions that return value on "
6994              "the stack", name);
6995       return true;
6996     }
6997   return false;
6998 }
6999
7000 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7001    function will check whether the attribute is allowed here and will add the
7002    attribute to the function declaration tree or otherwise issue a warning.  */
7003
7004 static tree
7005 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7006                                  tree /* args */,
7007                                  int /* flags */,
7008                                  bool *no_add_attrs)
7009 {
7010   tree fndecl;
7011
7012   if (!use_cmse)
7013     {
7014       *no_add_attrs = true;
7015       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7016                name);
7017       return NULL_TREE;
7018     }
7019
7020   /* Ignore attribute for function types.  */
7021   if (TREE_CODE (*node) != FUNCTION_DECL)
7022     {
7023       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7024                name);
7025       *no_add_attrs = true;
7026       return NULL_TREE;
7027     }
7028
7029   fndecl = *node;
7030
7031   /* Warn for static linkage functions.  */
7032   if (!TREE_PUBLIC (fndecl))
7033     {
7034       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7035                "with static linkage", name);
7036       *no_add_attrs = true;
7037       return NULL_TREE;
7038     }
7039
7040   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7041                                                 TREE_TYPE (fndecl));
7042   return NULL_TREE;
7043 }
7044
7045
7046 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7047    function will check whether the attribute is allowed here and will add the
7048    attribute to the function type tree or otherwise issue a diagnostic.  The
7049    reason we check this at declaration time is to only allow the use of the
7050    attribute with declarations of function pointers and not function
7051    declarations.  This function checks NODE is of the expected type and issues
7052    diagnostics otherwise using NAME.  If it is not of the expected type
7053    *NO_ADD_ATTRS will be set to true.  */
7054
7055 static tree
7056 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7057                                  tree /* args */,
7058                                  int /* flags */,
7059                                  bool *no_add_attrs)
7060 {
7061   tree decl = NULL_TREE, fntype = NULL_TREE;
7062   tree type;
7063
7064   if (!use_cmse)
7065     {
7066       *no_add_attrs = true;
7067       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7068                name);
7069       return NULL_TREE;
7070     }
7071
7072   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7073     {
7074       decl = *node;
7075       fntype = TREE_TYPE (decl);
7076     }
7077
7078   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7079     fntype = TREE_TYPE (fntype);
7080
7081   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7082     {
7083         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7084                  "function pointer", name);
7085         *no_add_attrs = true;
7086         return NULL_TREE;
7087     }
7088
7089   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7090
7091   if (*no_add_attrs)
7092     return NULL_TREE;
7093
7094   /* Prevent trees being shared among function types with and without
7095      cmse_nonsecure_call attribute.  */
7096   type = TREE_TYPE (decl);
7097
7098   type = build_distinct_type_copy (type);
7099   TREE_TYPE (decl) = type;
7100   fntype = type;
7101
7102   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7103     {
7104       type = fntype;
7105       fntype = TREE_TYPE (fntype);
7106       fntype = build_distinct_type_copy (fntype);
7107       TREE_TYPE (type) = fntype;
7108     }
7109
7110   /* Construct a type attribute and add it to the function type.  */
7111   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7112                           TYPE_ATTRIBUTES (fntype));
7113   TYPE_ATTRIBUTES (fntype) = attrs;
7114   return NULL_TREE;
7115 }
7116
7117 /* Return 0 if the attributes for two types are incompatible, 1 if they
7118    are compatible, and 2 if they are nearly compatible (which causes a
7119    warning to be generated).  */
7120 static int
7121 arm_comp_type_attributes (const_tree type1, const_tree type2)
7122 {
7123   int l1, l2, s1, s2;
7124
7125   /* Check for mismatch of non-default calling convention.  */
7126   if (TREE_CODE (type1) != FUNCTION_TYPE)
7127     return 1;
7128
7129   /* Check for mismatched call attributes.  */
7130   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7131   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7132   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134
7135   /* Only bother to check if an attribute is defined.  */
7136   if (l1 | l2 | s1 | s2)
7137     {
7138       /* If one type has an attribute, the other must have the same attribute.  */
7139       if ((l1 != l2) || (s1 != s2))
7140         return 0;
7141
7142       /* Disallow mixed attributes.  */
7143       if ((l1 & s2) || (l2 & s1))
7144         return 0;
7145     }
7146
7147   /* Check for mismatched ISR attribute.  */
7148   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7149   if (! l1)
7150     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7151   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7152   if (! l2)
7153     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7154   if (l1 != l2)
7155     return 0;
7156
7157   l1 = lookup_attribute ("cmse_nonsecure_call",
7158                          TYPE_ATTRIBUTES (type1)) != NULL;
7159   l2 = lookup_attribute ("cmse_nonsecure_call",
7160                          TYPE_ATTRIBUTES (type2)) != NULL;
7161
7162   if (l1 != l2)
7163     return 0;
7164
7165   return 1;
7166 }
7167
7168 /*  Assigns default attributes to newly defined type.  This is used to
7169     set short_call/long_call attributes for function types of
7170     functions defined inside corresponding #pragma scopes.  */
7171 static void
7172 arm_set_default_type_attributes (tree type)
7173 {
7174   /* Add __attribute__ ((long_call)) to all functions, when
7175      inside #pragma long_calls or __attribute__ ((short_call)),
7176      when inside #pragma no_long_calls.  */
7177   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7178     {
7179       tree type_attr_list, attr_name;
7180       type_attr_list = TYPE_ATTRIBUTES (type);
7181
7182       if (arm_pragma_long_calls == LONG)
7183         attr_name = get_identifier ("long_call");
7184       else if (arm_pragma_long_calls == SHORT)
7185         attr_name = get_identifier ("short_call");
7186       else
7187         return;
7188
7189       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7190       TYPE_ATTRIBUTES (type) = type_attr_list;
7191     }
7192 }
7193 \f
7194 /* Return true if DECL is known to be linked into section SECTION.  */
7195
7196 static bool
7197 arm_function_in_section_p (tree decl, section *section)
7198 {
7199   /* We can only be certain about the prevailing symbol definition.  */
7200   if (!decl_binds_to_current_def_p (decl))
7201     return false;
7202
7203   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7204   if (!DECL_SECTION_NAME (decl))
7205     {
7206       /* Make sure that we will not create a unique section for DECL.  */
7207       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7208         return false;
7209     }
7210
7211   return function_section (decl) == section;
7212 }
7213
7214 /* Return nonzero if a 32-bit "long_call" should be generated for
7215    a call from the current function to DECL.  We generate a long_call
7216    if the function:
7217
7218         a.  has an __attribute__((long call))
7219      or b.  is within the scope of a #pragma long_calls
7220      or c.  the -mlong-calls command line switch has been specified
7221
7222    However we do not generate a long call if the function:
7223
7224         d.  has an __attribute__ ((short_call))
7225      or e.  is inside the scope of a #pragma no_long_calls
7226      or f.  is defined in the same section as the current function.  */
7227
7228 bool
7229 arm_is_long_call_p (tree decl)
7230 {
7231   tree attrs;
7232
7233   if (!decl)
7234     return TARGET_LONG_CALLS;
7235
7236   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7237   if (lookup_attribute ("short_call", attrs))
7238     return false;
7239
7240   /* For "f", be conservative, and only cater for cases in which the
7241      whole of the current function is placed in the same section.  */
7242   if (!flag_reorder_blocks_and_partition
7243       && TREE_CODE (decl) == FUNCTION_DECL
7244       && arm_function_in_section_p (decl, current_function_section ()))
7245     return false;
7246
7247   if (lookup_attribute ("long_call", attrs))
7248     return true;
7249
7250   return TARGET_LONG_CALLS;
7251 }
7252
7253 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7254 static bool
7255 arm_function_ok_for_sibcall (tree decl, tree exp)
7256 {
7257   unsigned long func_type;
7258
7259   if (cfun->machine->sibcall_blocked)
7260     return false;
7261
7262   /* Never tailcall something if we are generating code for Thumb-1.  */
7263   if (TARGET_THUMB1)
7264     return false;
7265
7266   /* The PIC register is live on entry to VxWorks PLT entries, so we
7267      must make the call before restoring the PIC register.  */
7268   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7269     return false;
7270
7271   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7272      may be used both as target of the call and base register for restoring
7273      the VFP registers  */
7274   if (TARGET_APCS_FRAME && TARGET_ARM
7275       && TARGET_HARD_FLOAT
7276       && decl && arm_is_long_call_p (decl))
7277     return false;
7278
7279   /* If we are interworking and the function is not declared static
7280      then we can't tail-call it unless we know that it exists in this
7281      compilation unit (since it might be a Thumb routine).  */
7282   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7283       && !TREE_ASM_WRITTEN (decl))
7284     return false;
7285
7286   func_type = arm_current_func_type ();
7287   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7288   if (IS_INTERRUPT (func_type))
7289     return false;
7290
7291   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7292      generated for entry functions themselves.  */
7293   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7294     return false;
7295
7296   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7297      this would complicate matters for later code generation.  */
7298   if (TREE_CODE (exp) == CALL_EXPR)
7299     {
7300       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7301       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7302         return false;
7303     }
7304
7305   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7306     {
7307       /* Check that the return value locations are the same.  For
7308          example that we aren't returning a value from the sibling in
7309          a VFP register but then need to transfer it to a core
7310          register.  */
7311       rtx a, b;
7312       tree decl_or_type = decl;
7313
7314       /* If it is an indirect function pointer, get the function type.  */
7315       if (!decl)
7316         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7317
7318       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7319       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7320                               cfun->decl, false);
7321       if (!rtx_equal_p (a, b))
7322         return false;
7323     }
7324
7325   /* Never tailcall if function may be called with a misaligned SP.  */
7326   if (IS_STACKALIGN (func_type))
7327     return false;
7328
7329   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7330      references should become a NOP.  Don't convert such calls into
7331      sibling calls.  */
7332   if (TARGET_AAPCS_BASED
7333       && arm_abi == ARM_ABI_AAPCS
7334       && decl
7335       && DECL_WEAK (decl))
7336     return false;
7337
7338   /* We cannot do a tailcall for an indirect call by descriptor if all the
7339      argument registers are used because the only register left to load the
7340      address is IP and it will already contain the static chain.  */
7341   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7342     {
7343       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7344       CUMULATIVE_ARGS cum;
7345       cumulative_args_t cum_v;
7346
7347       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7348       cum_v = pack_cumulative_args (&cum);
7349
7350       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7351         {
7352           tree type = TREE_VALUE (t);
7353           if (!VOID_TYPE_P (type))
7354             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7355         }
7356
7357       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7358         return false;
7359     }
7360
7361   /* Everything else is ok.  */
7362   return true;
7363 }
7364
7365 \f
7366 /* Addressing mode support functions.  */
7367
7368 /* Return nonzero if X is a legitimate immediate operand when compiling
7369    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7370 int
7371 legitimate_pic_operand_p (rtx x)
7372 {
7373   if (GET_CODE (x) == SYMBOL_REF
7374       || (GET_CODE (x) == CONST
7375           && GET_CODE (XEXP (x, 0)) == PLUS
7376           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7377     return 0;
7378
7379   return 1;
7380 }
7381
7382 /* Record that the current function needs a PIC register.  Initialize
7383    cfun->machine->pic_reg if we have not already done so.  */
7384
7385 static void
7386 require_pic_register (void)
7387 {
7388   /* A lot of the logic here is made obscure by the fact that this
7389      routine gets called as part of the rtx cost estimation process.
7390      We don't want those calls to affect any assumptions about the real
7391      function; and further, we can't call entry_of_function() until we
7392      start the real expansion process.  */
7393   if (!crtl->uses_pic_offset_table)
7394     {
7395       gcc_assert (can_create_pseudo_p ());
7396       if (arm_pic_register != INVALID_REGNUM
7397           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7398         {
7399           if (!cfun->machine->pic_reg)
7400             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7401
7402           /* Play games to avoid marking the function as needing pic
7403              if we are being called as part of the cost-estimation
7404              process.  */
7405           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7406             crtl->uses_pic_offset_table = 1;
7407         }
7408       else
7409         {
7410           rtx_insn *seq, *insn;
7411
7412           if (!cfun->machine->pic_reg)
7413             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7414
7415           /* Play games to avoid marking the function as needing pic
7416              if we are being called as part of the cost-estimation
7417              process.  */
7418           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7419             {
7420               crtl->uses_pic_offset_table = 1;
7421               start_sequence ();
7422
7423               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7424                   && arm_pic_register > LAST_LO_REGNUM)
7425                 emit_move_insn (cfun->machine->pic_reg,
7426                                 gen_rtx_REG (Pmode, arm_pic_register));
7427               else
7428                 arm_load_pic_register (0UL);
7429
7430               seq = get_insns ();
7431               end_sequence ();
7432
7433               for (insn = seq; insn; insn = NEXT_INSN (insn))
7434                 if (INSN_P (insn))
7435                   INSN_LOCATION (insn) = prologue_location;
7436
7437               /* We can be called during expansion of PHI nodes, where
7438                  we can't yet emit instructions directly in the final
7439                  insn stream.  Queue the insns on the entry edge, they will
7440                  be committed after everything else is expanded.  */
7441               insert_insn_on_edge (seq,
7442                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7443             }
7444         }
7445     }
7446 }
7447
7448 rtx
7449 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7450 {
7451   if (GET_CODE (orig) == SYMBOL_REF
7452       || GET_CODE (orig) == LABEL_REF)
7453     {
7454       if (reg == 0)
7455         {
7456           gcc_assert (can_create_pseudo_p ());
7457           reg = gen_reg_rtx (Pmode);
7458         }
7459
7460       /* VxWorks does not impose a fixed gap between segments; the run-time
7461          gap can be different from the object-file gap.  We therefore can't
7462          use GOTOFF unless we are absolutely sure that the symbol is in the
7463          same segment as the GOT.  Unfortunately, the flexibility of linker
7464          scripts means that we can't be sure of that in general, so assume
7465          that GOTOFF is never valid on VxWorks.  */
7466       /* References to weak symbols cannot be resolved locally: they
7467          may be overridden by a non-weak definition at link time.  */
7468       rtx_insn *insn;
7469       if ((GET_CODE (orig) == LABEL_REF
7470            || (GET_CODE (orig) == SYMBOL_REF
7471                && SYMBOL_REF_LOCAL_P (orig)
7472                && (SYMBOL_REF_DECL (orig)
7473                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7474           && NEED_GOT_RELOC
7475           && arm_pic_data_is_text_relative)
7476         insn = arm_pic_static_addr (orig, reg);
7477       else
7478         {
7479           rtx pat;
7480           rtx mem;
7481
7482           /* If this function doesn't have a pic register, create one now.  */
7483           require_pic_register ();
7484
7485           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7486
7487           /* Make the MEM as close to a constant as possible.  */
7488           mem = SET_SRC (pat);
7489           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7490           MEM_READONLY_P (mem) = 1;
7491           MEM_NOTRAP_P (mem) = 1;
7492
7493           insn = emit_insn (pat);
7494         }
7495
7496       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7497          by loop.  */
7498       set_unique_reg_note (insn, REG_EQUAL, orig);
7499
7500       return reg;
7501     }
7502   else if (GET_CODE (orig) == CONST)
7503     {
7504       rtx base, offset;
7505
7506       if (GET_CODE (XEXP (orig, 0)) == PLUS
7507           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7508         return orig;
7509
7510       /* Handle the case where we have: const (UNSPEC_TLS).  */
7511       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7512           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7513         return orig;
7514
7515       /* Handle the case where we have:
7516          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7517          CONST_INT.  */
7518       if (GET_CODE (XEXP (orig, 0)) == PLUS
7519           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7520           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7521         {
7522           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7523           return orig;
7524         }
7525
7526       if (reg == 0)
7527         {
7528           gcc_assert (can_create_pseudo_p ());
7529           reg = gen_reg_rtx (Pmode);
7530         }
7531
7532       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7533
7534       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7535       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7536                                        base == reg ? 0 : reg);
7537
7538       if (CONST_INT_P (offset))
7539         {
7540           /* The base register doesn't really matter, we only want to
7541              test the index for the appropriate mode.  */
7542           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7543             {
7544               gcc_assert (can_create_pseudo_p ());
7545               offset = force_reg (Pmode, offset);
7546             }
7547
7548           if (CONST_INT_P (offset))
7549             return plus_constant (Pmode, base, INTVAL (offset));
7550         }
7551
7552       if (GET_MODE_SIZE (mode) > 4
7553           && (GET_MODE_CLASS (mode) == MODE_INT
7554               || TARGET_SOFT_FLOAT))
7555         {
7556           emit_insn (gen_addsi3 (reg, base, offset));
7557           return reg;
7558         }
7559
7560       return gen_rtx_PLUS (Pmode, base, offset);
7561     }
7562
7563   return orig;
7564 }
7565
7566
7567 /* Find a spare register to use during the prolog of a function.  */
7568
7569 static int
7570 thumb_find_work_register (unsigned long pushed_regs_mask)
7571 {
7572   int reg;
7573
7574   /* Check the argument registers first as these are call-used.  The
7575      register allocation order means that sometimes r3 might be used
7576      but earlier argument registers might not, so check them all.  */
7577   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7578     if (!df_regs_ever_live_p (reg))
7579       return reg;
7580
7581   /* Before going on to check the call-saved registers we can try a couple
7582      more ways of deducing that r3 is available.  The first is when we are
7583      pushing anonymous arguments onto the stack and we have less than 4
7584      registers worth of fixed arguments(*).  In this case r3 will be part of
7585      the variable argument list and so we can be sure that it will be
7586      pushed right at the start of the function.  Hence it will be available
7587      for the rest of the prologue.
7588      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7589   if (cfun->machine->uses_anonymous_args
7590       && crtl->args.pretend_args_size > 0)
7591     return LAST_ARG_REGNUM;
7592
7593   /* The other case is when we have fixed arguments but less than 4 registers
7594      worth.  In this case r3 might be used in the body of the function, but
7595      it is not being used to convey an argument into the function.  In theory
7596      we could just check crtl->args.size to see how many bytes are
7597      being passed in argument registers, but it seems that it is unreliable.
7598      Sometimes it will have the value 0 when in fact arguments are being
7599      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7600      check the args_info.nregs field as well.  The problem with this field is
7601      that it makes no allowances for arguments that are passed to the
7602      function but which are not used.  Hence we could miss an opportunity
7603      when a function has an unused argument in r3.  But it is better to be
7604      safe than to be sorry.  */
7605   if (! cfun->machine->uses_anonymous_args
7606       && crtl->args.size >= 0
7607       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7608       && (TARGET_AAPCS_BASED
7609           ? crtl->args.info.aapcs_ncrn < 4
7610           : crtl->args.info.nregs < 4))
7611     return LAST_ARG_REGNUM;
7612
7613   /* Otherwise look for a call-saved register that is going to be pushed.  */
7614   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7615     if (pushed_regs_mask & (1 << reg))
7616       return reg;
7617
7618   if (TARGET_THUMB2)
7619     {
7620       /* Thumb-2 can use high regs.  */
7621       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7622         if (pushed_regs_mask & (1 << reg))
7623           return reg;
7624     }
7625   /* Something went wrong - thumb_compute_save_reg_mask()
7626      should have arranged for a suitable register to be pushed.  */
7627   gcc_unreachable ();
7628 }
7629
7630 static GTY(()) int pic_labelno;
7631
7632 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7633    low register.  */
7634
7635 void
7636 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7637 {
7638   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7639
7640   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7641     return;
7642
7643   gcc_assert (flag_pic);
7644
7645   pic_reg = cfun->machine->pic_reg;
7646   if (TARGET_VXWORKS_RTP)
7647     {
7648       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7649       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7650       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7651
7652       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7653
7654       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7655       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7656     }
7657   else
7658     {
7659       /* We use an UNSPEC rather than a LABEL_REF because this label
7660          never appears in the code stream.  */
7661
7662       labelno = GEN_INT (pic_labelno++);
7663       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7664       l1 = gen_rtx_CONST (VOIDmode, l1);
7665
7666       /* On the ARM the PC register contains 'dot + 8' at the time of the
7667          addition, on the Thumb it is 'dot + 4'.  */
7668       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7669       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7670                                 UNSPEC_GOTSYM_OFF);
7671       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7672
7673       if (TARGET_32BIT)
7674         {
7675           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7676         }
7677       else /* TARGET_THUMB1 */
7678         {
7679           if (arm_pic_register != INVALID_REGNUM
7680               && REGNO (pic_reg) > LAST_LO_REGNUM)
7681             {
7682               /* We will have pushed the pic register, so we should always be
7683                  able to find a work register.  */
7684               pic_tmp = gen_rtx_REG (SImode,
7685                                      thumb_find_work_register (saved_regs));
7686               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7687               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7688               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7689             }
7690           else if (arm_pic_register != INVALID_REGNUM
7691                    && arm_pic_register > LAST_LO_REGNUM
7692                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7693             {
7694               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7695               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7696               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7697             }
7698           else
7699             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7700         }
7701     }
7702
7703   /* Need to emit this whether or not we obey regdecls,
7704      since setjmp/longjmp can cause life info to screw up.  */
7705   emit_use (pic_reg);
7706 }
7707
7708 /* Generate code to load the address of a static var when flag_pic is set.  */
7709 static rtx_insn *
7710 arm_pic_static_addr (rtx orig, rtx reg)
7711 {
7712   rtx l1, labelno, offset_rtx;
7713
7714   gcc_assert (flag_pic);
7715
7716   /* We use an UNSPEC rather than a LABEL_REF because this label
7717      never appears in the code stream.  */
7718   labelno = GEN_INT (pic_labelno++);
7719   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7720   l1 = gen_rtx_CONST (VOIDmode, l1);
7721
7722   /* On the ARM the PC register contains 'dot + 8' at the time of the
7723      addition, on the Thumb it is 'dot + 4'.  */
7724   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7725   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7726                                UNSPEC_SYMBOL_OFFSET);
7727   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7728
7729   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7730 }
7731
7732 /* Return nonzero if X is valid as an ARM state addressing register.  */
7733 static int
7734 arm_address_register_rtx_p (rtx x, int strict_p)
7735 {
7736   int regno;
7737
7738   if (!REG_P (x))
7739     return 0;
7740
7741   regno = REGNO (x);
7742
7743   if (strict_p)
7744     return ARM_REGNO_OK_FOR_BASE_P (regno);
7745
7746   return (regno <= LAST_ARM_REGNUM
7747           || regno >= FIRST_PSEUDO_REGISTER
7748           || regno == FRAME_POINTER_REGNUM
7749           || regno == ARG_POINTER_REGNUM);
7750 }
7751
7752 /* Return TRUE if this rtx is the difference of a symbol and a label,
7753    and will reduce to a PC-relative relocation in the object file.
7754    Expressions like this can be left alone when generating PIC, rather
7755    than forced through the GOT.  */
7756 static int
7757 pcrel_constant_p (rtx x)
7758 {
7759   if (GET_CODE (x) == MINUS)
7760     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7761
7762   return FALSE;
7763 }
7764
7765 /* Return true if X will surely end up in an index register after next
7766    splitting pass.  */
7767 static bool
7768 will_be_in_index_register (const_rtx x)
7769 {
7770   /* arm.md: calculate_pic_address will split this into a register.  */
7771   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7772 }
7773
7774 /* Return nonzero if X is a valid ARM state address operand.  */
7775 int
7776 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7777                                 int strict_p)
7778 {
7779   bool use_ldrd;
7780   enum rtx_code code = GET_CODE (x);
7781
7782   if (arm_address_register_rtx_p (x, strict_p))
7783     return 1;
7784
7785   use_ldrd = (TARGET_LDRD
7786               && (mode == DImode || mode == DFmode));
7787
7788   if (code == POST_INC || code == PRE_DEC
7789       || ((code == PRE_INC || code == POST_DEC)
7790           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7791     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7792
7793   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7794            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7795            && GET_CODE (XEXP (x, 1)) == PLUS
7796            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7797     {
7798       rtx addend = XEXP (XEXP (x, 1), 1);
7799
7800       /* Don't allow ldrd post increment by register because it's hard
7801          to fixup invalid register choices.  */
7802       if (use_ldrd
7803           && GET_CODE (x) == POST_MODIFY
7804           && REG_P (addend))
7805         return 0;
7806
7807       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7808               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7809     }
7810
7811   /* After reload constants split into minipools will have addresses
7812      from a LABEL_REF.  */
7813   else if (reload_completed
7814            && (code == LABEL_REF
7815                || (code == CONST
7816                    && GET_CODE (XEXP (x, 0)) == PLUS
7817                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7818                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7819     return 1;
7820
7821   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7822     return 0;
7823
7824   else if (code == PLUS)
7825     {
7826       rtx xop0 = XEXP (x, 0);
7827       rtx xop1 = XEXP (x, 1);
7828
7829       return ((arm_address_register_rtx_p (xop0, strict_p)
7830                && ((CONST_INT_P (xop1)
7831                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7832                    || (!strict_p && will_be_in_index_register (xop1))))
7833               || (arm_address_register_rtx_p (xop1, strict_p)
7834                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7835     }
7836
7837 #if 0
7838   /* Reload currently can't handle MINUS, so disable this for now */
7839   else if (GET_CODE (x) == MINUS)
7840     {
7841       rtx xop0 = XEXP (x, 0);
7842       rtx xop1 = XEXP (x, 1);
7843
7844       return (arm_address_register_rtx_p (xop0, strict_p)
7845               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7846     }
7847 #endif
7848
7849   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7850            && code == SYMBOL_REF
7851            && CONSTANT_POOL_ADDRESS_P (x)
7852            && ! (flag_pic
7853                  && symbol_mentioned_p (get_pool_constant (x))
7854                  && ! pcrel_constant_p (get_pool_constant (x))))
7855     return 1;
7856
7857   return 0;
7858 }
7859
7860 /* Return true if we can avoid creating a constant pool entry for x.  */
7861 static bool
7862 can_avoid_literal_pool_for_label_p (rtx x)
7863 {
7864   /* Normally we can assign constant values to target registers without
7865      the help of constant pool.  But there are cases we have to use constant
7866      pool like:
7867      1) assign a label to register.
7868      2) sign-extend a 8bit value to 32bit and then assign to register.
7869
7870      Constant pool access in format:
7871      (set (reg r0) (mem (symbol_ref (".LC0"))))
7872      will cause the use of literal pool (later in function arm_reorg).
7873      So here we mark such format as an invalid format, then the compiler
7874      will adjust it into:
7875      (set (reg r0) (symbol_ref (".LC0")))
7876      (set (reg r0) (mem (reg r0))).
7877      No extra register is required, and (mem (reg r0)) won't cause the use
7878      of literal pools.  */
7879   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7880       && CONSTANT_POOL_ADDRESS_P (x))
7881     return 1;
7882   return 0;
7883 }
7884
7885
7886 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7887 static int
7888 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7889 {
7890   bool use_ldrd;
7891   enum rtx_code code = GET_CODE (x);
7892
7893   if (arm_address_register_rtx_p (x, strict_p))
7894     return 1;
7895
7896   use_ldrd = (TARGET_LDRD
7897               && (mode == DImode || mode == DFmode));
7898
7899   if (code == POST_INC || code == PRE_DEC
7900       || ((code == PRE_INC || code == POST_DEC)
7901           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7902     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7903
7904   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7905            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7906            && GET_CODE (XEXP (x, 1)) == PLUS
7907            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7908     {
7909       /* Thumb-2 only has autoincrement by constant.  */
7910       rtx addend = XEXP (XEXP (x, 1), 1);
7911       HOST_WIDE_INT offset;
7912
7913       if (!CONST_INT_P (addend))
7914         return 0;
7915
7916       offset = INTVAL(addend);
7917       if (GET_MODE_SIZE (mode) <= 4)
7918         return (offset > -256 && offset < 256);
7919
7920       return (use_ldrd && offset > -1024 && offset < 1024
7921               && (offset & 3) == 0);
7922     }
7923
7924   /* After reload constants split into minipools will have addresses
7925      from a LABEL_REF.  */
7926   else if (reload_completed
7927            && (code == LABEL_REF
7928                || (code == CONST
7929                    && GET_CODE (XEXP (x, 0)) == PLUS
7930                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7931                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7932     return 1;
7933
7934   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7935     return 0;
7936
7937   else if (code == PLUS)
7938     {
7939       rtx xop0 = XEXP (x, 0);
7940       rtx xop1 = XEXP (x, 1);
7941
7942       return ((arm_address_register_rtx_p (xop0, strict_p)
7943                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7944                    || (!strict_p && will_be_in_index_register (xop1))))
7945               || (arm_address_register_rtx_p (xop1, strict_p)
7946                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7947     }
7948
7949   else if (can_avoid_literal_pool_for_label_p (x))
7950     return 0;
7951
7952   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7953            && code == SYMBOL_REF
7954            && CONSTANT_POOL_ADDRESS_P (x)
7955            && ! (flag_pic
7956                  && symbol_mentioned_p (get_pool_constant (x))
7957                  && ! pcrel_constant_p (get_pool_constant (x))))
7958     return 1;
7959
7960   return 0;
7961 }
7962
7963 /* Return nonzero if INDEX is valid for an address index operand in
7964    ARM state.  */
7965 static int
7966 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7967                         int strict_p)
7968 {
7969   HOST_WIDE_INT range;
7970   enum rtx_code code = GET_CODE (index);
7971
7972   /* Standard coprocessor addressing modes.  */
7973   if (TARGET_HARD_FLOAT
7974       && (mode == SFmode || mode == DFmode))
7975     return (code == CONST_INT && INTVAL (index) < 1024
7976             && INTVAL (index) > -1024
7977             && (INTVAL (index) & 3) == 0);
7978
7979   /* For quad modes, we restrict the constant offset to be slightly less
7980      than what the instruction format permits.  We do this because for
7981      quad mode moves, we will actually decompose them into two separate
7982      double-mode reads or writes.  INDEX must therefore be a valid
7983      (double-mode) offset and so should INDEX+8.  */
7984   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7985     return (code == CONST_INT
7986             && INTVAL (index) < 1016
7987             && INTVAL (index) > -1024
7988             && (INTVAL (index) & 3) == 0);
7989
7990   /* We have no such constraint on double mode offsets, so we permit the
7991      full range of the instruction format.  */
7992   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7993     return (code == CONST_INT
7994             && INTVAL (index) < 1024
7995             && INTVAL (index) > -1024
7996             && (INTVAL (index) & 3) == 0);
7997
7998   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7999     return (code == CONST_INT
8000             && INTVAL (index) < 1024
8001             && INTVAL (index) > -1024
8002             && (INTVAL (index) & 3) == 0);
8003
8004   if (arm_address_register_rtx_p (index, strict_p)
8005       && (GET_MODE_SIZE (mode) <= 4))
8006     return 1;
8007
8008   if (mode == DImode || mode == DFmode)
8009     {
8010       if (code == CONST_INT)
8011         {
8012           HOST_WIDE_INT val = INTVAL (index);
8013
8014           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8015              If vldr is selected it uses arm_coproc_mem_operand.  */
8016           if (TARGET_LDRD)
8017             return val > -256 && val < 256;
8018           else
8019             return val > -4096 && val < 4092;
8020         }
8021
8022       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8023     }
8024
8025   if (GET_MODE_SIZE (mode) <= 4
8026       && ! (arm_arch4
8027             && (mode == HImode
8028                 || mode == HFmode
8029                 || (mode == QImode && outer == SIGN_EXTEND))))
8030     {
8031       if (code == MULT)
8032         {
8033           rtx xiop0 = XEXP (index, 0);
8034           rtx xiop1 = XEXP (index, 1);
8035
8036           return ((arm_address_register_rtx_p (xiop0, strict_p)
8037                    && power_of_two_operand (xiop1, SImode))
8038                   || (arm_address_register_rtx_p (xiop1, strict_p)
8039                       && power_of_two_operand (xiop0, SImode)));
8040         }
8041       else if (code == LSHIFTRT || code == ASHIFTRT
8042                || code == ASHIFT || code == ROTATERT)
8043         {
8044           rtx op = XEXP (index, 1);
8045
8046           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8047                   && CONST_INT_P (op)
8048                   && INTVAL (op) > 0
8049                   && INTVAL (op) <= 31);
8050         }
8051     }
8052
8053   /* For ARM v4 we may be doing a sign-extend operation during the
8054      load.  */
8055   if (arm_arch4)
8056     {
8057       if (mode == HImode
8058           || mode == HFmode
8059           || (outer == SIGN_EXTEND && mode == QImode))
8060         range = 256;
8061       else
8062         range = 4096;
8063     }
8064   else
8065     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8066
8067   return (code == CONST_INT
8068           && INTVAL (index) < range
8069           && INTVAL (index) > -range);
8070 }
8071
8072 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8073    index operand.  i.e. 1, 2, 4 or 8.  */
8074 static bool
8075 thumb2_index_mul_operand (rtx op)
8076 {
8077   HOST_WIDE_INT val;
8078
8079   if (!CONST_INT_P (op))
8080     return false;
8081
8082   val = INTVAL(op);
8083   return (val == 1 || val == 2 || val == 4 || val == 8);
8084 }
8085
8086 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8087 static int
8088 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8089 {
8090   enum rtx_code code = GET_CODE (index);
8091
8092   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8093   /* Standard coprocessor addressing modes.  */
8094   if (TARGET_HARD_FLOAT
8095       && (mode == SFmode || mode == DFmode))
8096     return (code == CONST_INT && INTVAL (index) < 1024
8097             /* Thumb-2 allows only > -256 index range for it's core register
8098                load/stores. Since we allow SF/DF in core registers, we have
8099                to use the intersection between -256~4096 (core) and -1024~1024
8100                (coprocessor).  */
8101             && INTVAL (index) > -256
8102             && (INTVAL (index) & 3) == 0);
8103
8104   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8105     {
8106       /* For DImode assume values will usually live in core regs
8107          and only allow LDRD addressing modes.  */
8108       if (!TARGET_LDRD || mode != DImode)
8109         return (code == CONST_INT
8110                 && INTVAL (index) < 1024
8111                 && INTVAL (index) > -1024
8112                 && (INTVAL (index) & 3) == 0);
8113     }
8114
8115   /* For quad modes, we restrict the constant offset to be slightly less
8116      than what the instruction format permits.  We do this because for
8117      quad mode moves, we will actually decompose them into two separate
8118      double-mode reads or writes.  INDEX must therefore be a valid
8119      (double-mode) offset and so should INDEX+8.  */
8120   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8121     return (code == CONST_INT
8122             && INTVAL (index) < 1016
8123             && INTVAL (index) > -1024
8124             && (INTVAL (index) & 3) == 0);
8125
8126   /* We have no such constraint on double mode offsets, so we permit the
8127      full range of the instruction format.  */
8128   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8129     return (code == CONST_INT
8130             && INTVAL (index) < 1024
8131             && INTVAL (index) > -1024
8132             && (INTVAL (index) & 3) == 0);
8133
8134   if (arm_address_register_rtx_p (index, strict_p)
8135       && (GET_MODE_SIZE (mode) <= 4))
8136     return 1;
8137
8138   if (mode == DImode || mode == DFmode)
8139     {
8140       if (code == CONST_INT)
8141         {
8142           HOST_WIDE_INT val = INTVAL (index);
8143           /* Thumb-2 ldrd only has reg+const addressing modes.
8144              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8145              If vldr is selected it uses arm_coproc_mem_operand.  */
8146           if (TARGET_LDRD)
8147             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8148           else
8149             return IN_RANGE (val, -255, 4095 - 4);
8150         }
8151       else
8152         return 0;
8153     }
8154
8155   if (code == MULT)
8156     {
8157       rtx xiop0 = XEXP (index, 0);
8158       rtx xiop1 = XEXP (index, 1);
8159
8160       return ((arm_address_register_rtx_p (xiop0, strict_p)
8161                && thumb2_index_mul_operand (xiop1))
8162               || (arm_address_register_rtx_p (xiop1, strict_p)
8163                   && thumb2_index_mul_operand (xiop0)));
8164     }
8165   else if (code == ASHIFT)
8166     {
8167       rtx op = XEXP (index, 1);
8168
8169       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8170               && CONST_INT_P (op)
8171               && INTVAL (op) > 0
8172               && INTVAL (op) <= 3);
8173     }
8174
8175   return (code == CONST_INT
8176           && INTVAL (index) < 4096
8177           && INTVAL (index) > -256);
8178 }
8179
8180 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8181 static int
8182 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8183 {
8184   int regno;
8185
8186   if (!REG_P (x))
8187     return 0;
8188
8189   regno = REGNO (x);
8190
8191   if (strict_p)
8192     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8193
8194   return (regno <= LAST_LO_REGNUM
8195           || regno > LAST_VIRTUAL_REGISTER
8196           || regno == FRAME_POINTER_REGNUM
8197           || (GET_MODE_SIZE (mode) >= 4
8198               && (regno == STACK_POINTER_REGNUM
8199                   || regno >= FIRST_PSEUDO_REGISTER
8200                   || x == hard_frame_pointer_rtx
8201                   || x == arg_pointer_rtx)));
8202 }
8203
8204 /* Return nonzero if x is a legitimate index register.  This is the case
8205    for any base register that can access a QImode object.  */
8206 inline static int
8207 thumb1_index_register_rtx_p (rtx x, int strict_p)
8208 {
8209   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8210 }
8211
8212 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8213
8214    The AP may be eliminated to either the SP or the FP, so we use the
8215    least common denominator, e.g. SImode, and offsets from 0 to 64.
8216
8217    ??? Verify whether the above is the right approach.
8218
8219    ??? Also, the FP may be eliminated to the SP, so perhaps that
8220    needs special handling also.
8221
8222    ??? Look at how the mips16 port solves this problem.  It probably uses
8223    better ways to solve some of these problems.
8224
8225    Although it is not incorrect, we don't accept QImode and HImode
8226    addresses based on the frame pointer or arg pointer until the
8227    reload pass starts.  This is so that eliminating such addresses
8228    into stack based ones won't produce impossible code.  */
8229 int
8230 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8231 {
8232   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8233     return 0;
8234
8235   /* ??? Not clear if this is right.  Experiment.  */
8236   if (GET_MODE_SIZE (mode) < 4
8237       && !(reload_in_progress || reload_completed)
8238       && (reg_mentioned_p (frame_pointer_rtx, x)
8239           || reg_mentioned_p (arg_pointer_rtx, x)
8240           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8241           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8242           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8243           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8244     return 0;
8245
8246   /* Accept any base register.  SP only in SImode or larger.  */
8247   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8248     return 1;
8249
8250   /* This is PC relative data before arm_reorg runs.  */
8251   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8252            && GET_CODE (x) == SYMBOL_REF
8253            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8254     return 1;
8255
8256   /* This is PC relative data after arm_reorg runs.  */
8257   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8258            && reload_completed
8259            && (GET_CODE (x) == LABEL_REF
8260                || (GET_CODE (x) == CONST
8261                    && GET_CODE (XEXP (x, 0)) == PLUS
8262                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8263                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8264     return 1;
8265
8266   /* Post-inc indexing only supported for SImode and larger.  */
8267   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8268            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8269     return 1;
8270
8271   else if (GET_CODE (x) == PLUS)
8272     {
8273       /* REG+REG address can be any two index registers.  */
8274       /* We disallow FRAME+REG addressing since we know that FRAME
8275          will be replaced with STACK, and SP relative addressing only
8276          permits SP+OFFSET.  */
8277       if (GET_MODE_SIZE (mode) <= 4
8278           && XEXP (x, 0) != frame_pointer_rtx
8279           && XEXP (x, 1) != frame_pointer_rtx
8280           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8281           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8282               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8283         return 1;
8284
8285       /* REG+const has 5-7 bit offset for non-SP registers.  */
8286       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8287                 || XEXP (x, 0) == arg_pointer_rtx)
8288                && CONST_INT_P (XEXP (x, 1))
8289                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8290         return 1;
8291
8292       /* REG+const has 10-bit offset for SP, but only SImode and
8293          larger is supported.  */
8294       /* ??? Should probably check for DI/DFmode overflow here
8295          just like GO_IF_LEGITIMATE_OFFSET does.  */
8296       else if (REG_P (XEXP (x, 0))
8297                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8298                && GET_MODE_SIZE (mode) >= 4
8299                && CONST_INT_P (XEXP (x, 1))
8300                && INTVAL (XEXP (x, 1)) >= 0
8301                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8302                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8303         return 1;
8304
8305       else if (REG_P (XEXP (x, 0))
8306                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8307                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8308                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8309                        && REGNO (XEXP (x, 0))
8310                           <= LAST_VIRTUAL_POINTER_REGISTER))
8311                && GET_MODE_SIZE (mode) >= 4
8312                && CONST_INT_P (XEXP (x, 1))
8313                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8314         return 1;
8315     }
8316
8317   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8318            && GET_MODE_SIZE (mode) == 4
8319            && GET_CODE (x) == SYMBOL_REF
8320            && CONSTANT_POOL_ADDRESS_P (x)
8321            && ! (flag_pic
8322                  && symbol_mentioned_p (get_pool_constant (x))
8323                  && ! pcrel_constant_p (get_pool_constant (x))))
8324     return 1;
8325
8326   return 0;
8327 }
8328
8329 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8330    instruction of mode MODE.  */
8331 int
8332 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8333 {
8334   switch (GET_MODE_SIZE (mode))
8335     {
8336     case 1:
8337       return val >= 0 && val < 32;
8338
8339     case 2:
8340       return val >= 0 && val < 64 && (val & 1) == 0;
8341
8342     default:
8343       return (val >= 0
8344               && (val + GET_MODE_SIZE (mode)) <= 128
8345               && (val & 3) == 0);
8346     }
8347 }
8348
8349 bool
8350 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8351 {
8352   if (TARGET_ARM)
8353     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8354   else if (TARGET_THUMB2)
8355     return thumb2_legitimate_address_p (mode, x, strict_p);
8356   else /* if (TARGET_THUMB1) */
8357     return thumb1_legitimate_address_p (mode, x, strict_p);
8358 }
8359
8360 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8361
8362    Given an rtx X being reloaded into a reg required to be
8363    in class CLASS, return the class of reg to actually use.
8364    In general this is just CLASS, but for the Thumb core registers and
8365    immediate constants we prefer a LO_REGS class or a subset.  */
8366
8367 static reg_class_t
8368 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8369 {
8370   if (TARGET_32BIT)
8371     return rclass;
8372   else
8373     {
8374       if (rclass == GENERAL_REGS)
8375         return LO_REGS;
8376       else
8377         return rclass;
8378     }
8379 }
8380
8381 /* Build the SYMBOL_REF for __tls_get_addr.  */
8382
8383 static GTY(()) rtx tls_get_addr_libfunc;
8384
8385 static rtx
8386 get_tls_get_addr (void)
8387 {
8388   if (!tls_get_addr_libfunc)
8389     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8390   return tls_get_addr_libfunc;
8391 }
8392
8393 rtx
8394 arm_load_tp (rtx target)
8395 {
8396   if (!target)
8397     target = gen_reg_rtx (SImode);
8398
8399   if (TARGET_HARD_TP)
8400     {
8401       /* Can return in any reg.  */
8402       emit_insn (gen_load_tp_hard (target));
8403     }
8404   else
8405     {
8406       /* Always returned in r0.  Immediately copy the result into a pseudo,
8407          otherwise other uses of r0 (e.g. setting up function arguments) may
8408          clobber the value.  */
8409
8410       rtx tmp;
8411
8412       emit_insn (gen_load_tp_soft ());
8413
8414       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8415       emit_move_insn (target, tmp);
8416     }
8417   return target;
8418 }
8419
8420 static rtx
8421 load_tls_operand (rtx x, rtx reg)
8422 {
8423   rtx tmp;
8424
8425   if (reg == NULL_RTX)
8426     reg = gen_reg_rtx (SImode);
8427
8428   tmp = gen_rtx_CONST (SImode, x);
8429
8430   emit_move_insn (reg, tmp);
8431
8432   return reg;
8433 }
8434
8435 static rtx_insn *
8436 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8437 {
8438   rtx label, labelno, sum;
8439
8440   gcc_assert (reloc != TLS_DESCSEQ);
8441   start_sequence ();
8442
8443   labelno = GEN_INT (pic_labelno++);
8444   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8445   label = gen_rtx_CONST (VOIDmode, label);
8446
8447   sum = gen_rtx_UNSPEC (Pmode,
8448                         gen_rtvec (4, x, GEN_INT (reloc), label,
8449                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8450                         UNSPEC_TLS);
8451   reg = load_tls_operand (sum, reg);
8452
8453   if (TARGET_ARM)
8454     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8455   else
8456     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8457
8458   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8459                                      LCT_PURE, /* LCT_CONST?  */
8460                                      Pmode, reg, Pmode);
8461
8462   rtx_insn *insns = get_insns ();
8463   end_sequence ();
8464
8465   return insns;
8466 }
8467
8468 static rtx
8469 arm_tls_descseq_addr (rtx x, rtx reg)
8470 {
8471   rtx labelno = GEN_INT (pic_labelno++);
8472   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8473   rtx sum = gen_rtx_UNSPEC (Pmode,
8474                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8475                                        gen_rtx_CONST (VOIDmode, label),
8476                                        GEN_INT (!TARGET_ARM)),
8477                             UNSPEC_TLS);
8478   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8479
8480   emit_insn (gen_tlscall (x, labelno));
8481   if (!reg)
8482     reg = gen_reg_rtx (SImode);
8483   else
8484     gcc_assert (REGNO (reg) != R0_REGNUM);
8485
8486   emit_move_insn (reg, reg0);
8487
8488   return reg;
8489 }
8490
8491 rtx
8492 legitimize_tls_address (rtx x, rtx reg)
8493 {
8494   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8495   rtx_insn *insns;
8496   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8497
8498   switch (model)
8499     {
8500     case TLS_MODEL_GLOBAL_DYNAMIC:
8501       if (TARGET_GNU2_TLS)
8502         {
8503           reg = arm_tls_descseq_addr (x, reg);
8504
8505           tp = arm_load_tp (NULL_RTX);
8506
8507           dest = gen_rtx_PLUS (Pmode, tp, reg);
8508         }
8509       else
8510         {
8511           /* Original scheme */
8512           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8513           dest = gen_reg_rtx (Pmode);
8514           emit_libcall_block (insns, dest, ret, x);
8515         }
8516       return dest;
8517
8518     case TLS_MODEL_LOCAL_DYNAMIC:
8519       if (TARGET_GNU2_TLS)
8520         {
8521           reg = arm_tls_descseq_addr (x, reg);
8522
8523           tp = arm_load_tp (NULL_RTX);
8524
8525           dest = gen_rtx_PLUS (Pmode, tp, reg);
8526         }
8527       else
8528         {
8529           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8530
8531           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8532              share the LDM result with other LD model accesses.  */
8533           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8534                                 UNSPEC_TLS);
8535           dest = gen_reg_rtx (Pmode);
8536           emit_libcall_block (insns, dest, ret, eqv);
8537
8538           /* Load the addend.  */
8539           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8540                                                      GEN_INT (TLS_LDO32)),
8541                                    UNSPEC_TLS);
8542           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8543           dest = gen_rtx_PLUS (Pmode, dest, addend);
8544         }
8545       return dest;
8546
8547     case TLS_MODEL_INITIAL_EXEC:
8548       labelno = GEN_INT (pic_labelno++);
8549       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8550       label = gen_rtx_CONST (VOIDmode, label);
8551       sum = gen_rtx_UNSPEC (Pmode,
8552                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8553                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8554                             UNSPEC_TLS);
8555       reg = load_tls_operand (sum, reg);
8556
8557       if (TARGET_ARM)
8558         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8559       else if (TARGET_THUMB2)
8560         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8561       else
8562         {
8563           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8564           emit_move_insn (reg, gen_const_mem (SImode, reg));
8565         }
8566
8567       tp = arm_load_tp (NULL_RTX);
8568
8569       return gen_rtx_PLUS (Pmode, tp, reg);
8570
8571     case TLS_MODEL_LOCAL_EXEC:
8572       tp = arm_load_tp (NULL_RTX);
8573
8574       reg = gen_rtx_UNSPEC (Pmode,
8575                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8576                             UNSPEC_TLS);
8577       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8578
8579       return gen_rtx_PLUS (Pmode, tp, reg);
8580
8581     default:
8582       abort ();
8583     }
8584 }
8585
8586 /* Try machine-dependent ways of modifying an illegitimate address
8587    to be legitimate.  If we find one, return the new, valid address.  */
8588 rtx
8589 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8590 {
8591   if (arm_tls_referenced_p (x))
8592     {
8593       rtx addend = NULL;
8594
8595       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8596         {
8597           addend = XEXP (XEXP (x, 0), 1);
8598           x = XEXP (XEXP (x, 0), 0);
8599         }
8600
8601       if (GET_CODE (x) != SYMBOL_REF)
8602         return x;
8603
8604       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8605
8606       x = legitimize_tls_address (x, NULL_RTX);
8607
8608       if (addend)
8609         {
8610           x = gen_rtx_PLUS (SImode, x, addend);
8611           orig_x = x;
8612         }
8613       else
8614         return x;
8615     }
8616
8617   if (!TARGET_ARM)
8618     {
8619       /* TODO: legitimize_address for Thumb2.  */
8620       if (TARGET_THUMB2)
8621         return x;
8622       return thumb_legitimize_address (x, orig_x, mode);
8623     }
8624
8625   if (GET_CODE (x) == PLUS)
8626     {
8627       rtx xop0 = XEXP (x, 0);
8628       rtx xop1 = XEXP (x, 1);
8629
8630       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8631         xop0 = force_reg (SImode, xop0);
8632
8633       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8634           && !symbol_mentioned_p (xop1))
8635         xop1 = force_reg (SImode, xop1);
8636
8637       if (ARM_BASE_REGISTER_RTX_P (xop0)
8638           && CONST_INT_P (xop1))
8639         {
8640           HOST_WIDE_INT n, low_n;
8641           rtx base_reg, val;
8642           n = INTVAL (xop1);
8643
8644           /* VFP addressing modes actually allow greater offsets, but for
8645              now we just stick with the lowest common denominator.  */
8646           if (mode == DImode || mode == DFmode)
8647             {
8648               low_n = n & 0x0f;
8649               n &= ~0x0f;
8650               if (low_n > 4)
8651                 {
8652                   n += 16;
8653                   low_n -= 16;
8654                 }
8655             }
8656           else
8657             {
8658               low_n = ((mode) == TImode ? 0
8659                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8660               n -= low_n;
8661             }
8662
8663           base_reg = gen_reg_rtx (SImode);
8664           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8665           emit_move_insn (base_reg, val);
8666           x = plus_constant (Pmode, base_reg, low_n);
8667         }
8668       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8669         x = gen_rtx_PLUS (SImode, xop0, xop1);
8670     }
8671
8672   /* XXX We don't allow MINUS any more -- see comment in
8673      arm_legitimate_address_outer_p ().  */
8674   else if (GET_CODE (x) == MINUS)
8675     {
8676       rtx xop0 = XEXP (x, 0);
8677       rtx xop1 = XEXP (x, 1);
8678
8679       if (CONSTANT_P (xop0))
8680         xop0 = force_reg (SImode, xop0);
8681
8682       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8683         xop1 = force_reg (SImode, xop1);
8684
8685       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8686         x = gen_rtx_MINUS (SImode, xop0, xop1);
8687     }
8688
8689   /* Make sure to take full advantage of the pre-indexed addressing mode
8690      with absolute addresses which often allows for the base register to
8691      be factorized for multiple adjacent memory references, and it might
8692      even allows for the mini pool to be avoided entirely. */
8693   else if (CONST_INT_P (x) && optimize > 0)
8694     {
8695       unsigned int bits;
8696       HOST_WIDE_INT mask, base, index;
8697       rtx base_reg;
8698
8699       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8700          use a 8-bit index. So let's use a 12-bit index for SImode only and
8701          hope that arm_gen_constant will enable ldrb to use more bits. */
8702       bits = (mode == SImode) ? 12 : 8;
8703       mask = (1 << bits) - 1;
8704       base = INTVAL (x) & ~mask;
8705       index = INTVAL (x) & mask;
8706       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8707         {
8708           /* It'll most probably be more efficient to generate the base
8709              with more bits set and use a negative index instead. */
8710           base |= mask;
8711           index -= mask;
8712         }
8713       base_reg = force_reg (SImode, GEN_INT (base));
8714       x = plus_constant (Pmode, base_reg, index);
8715     }
8716
8717   if (flag_pic)
8718     {
8719       /* We need to find and carefully transform any SYMBOL and LABEL
8720          references; so go back to the original address expression.  */
8721       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8722
8723       if (new_x != orig_x)
8724         x = new_x;
8725     }
8726
8727   return x;
8728 }
8729
8730
8731 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8732    to be legitimate.  If we find one, return the new, valid address.  */
8733 rtx
8734 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8735 {
8736   if (GET_CODE (x) == PLUS
8737       && CONST_INT_P (XEXP (x, 1))
8738       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8739           || INTVAL (XEXP (x, 1)) < 0))
8740     {
8741       rtx xop0 = XEXP (x, 0);
8742       rtx xop1 = XEXP (x, 1);
8743       HOST_WIDE_INT offset = INTVAL (xop1);
8744
8745       /* Try and fold the offset into a biasing of the base register and
8746          then offsetting that.  Don't do this when optimizing for space
8747          since it can cause too many CSEs.  */
8748       if (optimize_size && offset >= 0
8749           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8750         {
8751           HOST_WIDE_INT delta;
8752
8753           if (offset >= 256)
8754             delta = offset - (256 - GET_MODE_SIZE (mode));
8755           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8756             delta = 31 * GET_MODE_SIZE (mode);
8757           else
8758             delta = offset & (~31 * GET_MODE_SIZE (mode));
8759
8760           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8761                                 NULL_RTX);
8762           x = plus_constant (Pmode, xop0, delta);
8763         }
8764       else if (offset < 0 && offset > -256)
8765         /* Small negative offsets are best done with a subtract before the
8766            dereference, forcing these into a register normally takes two
8767            instructions.  */
8768         x = force_operand (x, NULL_RTX);
8769       else
8770         {
8771           /* For the remaining cases, force the constant into a register.  */
8772           xop1 = force_reg (SImode, xop1);
8773           x = gen_rtx_PLUS (SImode, xop0, xop1);
8774         }
8775     }
8776   else if (GET_CODE (x) == PLUS
8777            && s_register_operand (XEXP (x, 1), SImode)
8778            && !s_register_operand (XEXP (x, 0), SImode))
8779     {
8780       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8781
8782       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8783     }
8784
8785   if (flag_pic)
8786     {
8787       /* We need to find and carefully transform any SYMBOL and LABEL
8788          references; so go back to the original address expression.  */
8789       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8790
8791       if (new_x != orig_x)
8792         x = new_x;
8793     }
8794
8795   return x;
8796 }
8797
8798 /* Return TRUE if X contains any TLS symbol references.  */
8799
8800 bool
8801 arm_tls_referenced_p (rtx x)
8802 {
8803   if (! TARGET_HAVE_TLS)
8804     return false;
8805
8806   subrtx_iterator::array_type array;
8807   FOR_EACH_SUBRTX (iter, array, x, ALL)
8808     {
8809       const_rtx x = *iter;
8810       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8811         {
8812           /* ARM currently does not provide relocations to encode TLS variables
8813              into AArch32 instructions, only data, so there is no way to
8814              currently implement these if a literal pool is disabled.  */
8815           if (arm_disable_literal_pool)
8816             sorry ("accessing thread-local storage is not currently supported "
8817                    "with -mpure-code or -mslow-flash-data");
8818
8819           return true;
8820         }
8821
8822       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8823          TLS offsets, not real symbol references.  */
8824       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8825         iter.skip_subrtxes ();
8826     }
8827   return false;
8828 }
8829
8830 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8831
8832    On the ARM, allow any integer (invalid ones are removed later by insn
8833    patterns), nice doubles and symbol_refs which refer to the function's
8834    constant pool XXX.
8835
8836    When generating pic allow anything.  */
8837
8838 static bool
8839 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8840 {
8841   return flag_pic || !label_mentioned_p (x);
8842 }
8843
8844 static bool
8845 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8846 {
8847   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8848      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8849      for ARMv8-M Baseline or later the result is valid.  */
8850   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8851     x = XEXP (x, 0);
8852
8853   return (CONST_INT_P (x)
8854           || CONST_DOUBLE_P (x)
8855           || CONSTANT_ADDRESS_P (x)
8856           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8857           || flag_pic);
8858 }
8859
8860 static bool
8861 arm_legitimate_constant_p (machine_mode mode, rtx x)
8862 {
8863   return (!arm_cannot_force_const_mem (mode, x)
8864           && (TARGET_32BIT
8865               ? arm_legitimate_constant_p_1 (mode, x)
8866               : thumb_legitimate_constant_p (mode, x)));
8867 }
8868
8869 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8870
8871 static bool
8872 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8873 {
8874   rtx base, offset;
8875
8876   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8877     {
8878       split_const (x, &base, &offset);
8879       if (GET_CODE (base) == SYMBOL_REF
8880           && !offset_within_block_p (base, INTVAL (offset)))
8881         return true;
8882     }
8883   return arm_tls_referenced_p (x);
8884 }
8885 \f
8886 #define REG_OR_SUBREG_REG(X)                                            \
8887   (REG_P (X)                                                    \
8888    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8889
8890 #define REG_OR_SUBREG_RTX(X)                    \
8891    (REG_P (X) ? (X) : SUBREG_REG (X))
8892
8893 static inline int
8894 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8895 {
8896   machine_mode mode = GET_MODE (x);
8897   int total, words;
8898
8899   switch (code)
8900     {
8901     case ASHIFT:
8902     case ASHIFTRT:
8903     case LSHIFTRT:
8904     case ROTATERT:
8905       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8906
8907     case PLUS:
8908     case MINUS:
8909     case COMPARE:
8910     case NEG:
8911     case NOT:
8912       return COSTS_N_INSNS (1);
8913
8914     case MULT:
8915       if (arm_arch6m && arm_m_profile_small_mul)
8916         return COSTS_N_INSNS (32);
8917
8918       if (CONST_INT_P (XEXP (x, 1)))
8919         {
8920           int cycles = 0;
8921           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8922
8923           while (i)
8924             {
8925               i >>= 2;
8926               cycles++;
8927             }
8928           return COSTS_N_INSNS (2) + cycles;
8929         }
8930       return COSTS_N_INSNS (1) + 16;
8931
8932     case SET:
8933       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8934          the mode.  */
8935       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8936       return (COSTS_N_INSNS (words)
8937               + 4 * ((MEM_P (SET_SRC (x)))
8938                      + MEM_P (SET_DEST (x))));
8939
8940     case CONST_INT:
8941       if (outer == SET)
8942         {
8943           if (UINTVAL (x) < 256
8944               /* 16-bit constant.  */
8945               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8946             return 0;
8947           if (thumb_shiftable_const (INTVAL (x)))
8948             return COSTS_N_INSNS (2);
8949           return COSTS_N_INSNS (3);
8950         }
8951       else if ((outer == PLUS || outer == COMPARE)
8952                && INTVAL (x) < 256 && INTVAL (x) > -256)
8953         return 0;
8954       else if ((outer == IOR || outer == XOR || outer == AND)
8955                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8956         return COSTS_N_INSNS (1);
8957       else if (outer == AND)
8958         {
8959           int i;
8960           /* This duplicates the tests in the andsi3 expander.  */
8961           for (i = 9; i <= 31; i++)
8962             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8963                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8964               return COSTS_N_INSNS (2);
8965         }
8966       else if (outer == ASHIFT || outer == ASHIFTRT
8967                || outer == LSHIFTRT)
8968         return 0;
8969       return COSTS_N_INSNS (2);
8970
8971     case CONST:
8972     case CONST_DOUBLE:
8973     case LABEL_REF:
8974     case SYMBOL_REF:
8975       return COSTS_N_INSNS (3);
8976
8977     case UDIV:
8978     case UMOD:
8979     case DIV:
8980     case MOD:
8981       return 100;
8982
8983     case TRUNCATE:
8984       return 99;
8985
8986     case AND:
8987     case XOR:
8988     case IOR:
8989       /* XXX guess.  */
8990       return 8;
8991
8992     case MEM:
8993       /* XXX another guess.  */
8994       /* Memory costs quite a lot for the first word, but subsequent words
8995          load at the equivalent of a single insn each.  */
8996       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8997               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8998                  ? 4 : 0));
8999
9000     case IF_THEN_ELSE:
9001       /* XXX a guess.  */
9002       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9003         return 14;
9004       return 2;
9005
9006     case SIGN_EXTEND:
9007     case ZERO_EXTEND:
9008       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9009       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9010
9011       if (mode == SImode)
9012         return total;
9013
9014       if (arm_arch6)
9015         return total + COSTS_N_INSNS (1);
9016
9017       /* Assume a two-shift sequence.  Increase the cost slightly so
9018          we prefer actual shifts over an extend operation.  */
9019       return total + 1 + COSTS_N_INSNS (2);
9020
9021     default:
9022       return 99;
9023     }
9024 }
9025
9026 /* Estimates the size cost of thumb1 instructions.
9027    For now most of the code is copied from thumb1_rtx_costs. We need more
9028    fine grain tuning when we have more related test cases.  */
9029 static inline int
9030 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9031 {
9032   machine_mode mode = GET_MODE (x);
9033   int words, cost;
9034
9035   switch (code)
9036     {
9037     case ASHIFT:
9038     case ASHIFTRT:
9039     case LSHIFTRT:
9040     case ROTATERT:
9041       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9042
9043     case PLUS:
9044     case MINUS:
9045       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9046          defined by RTL expansion, especially for the expansion of
9047          multiplication.  */
9048       if ((GET_CODE (XEXP (x, 0)) == MULT
9049            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9050           || (GET_CODE (XEXP (x, 1)) == MULT
9051               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9052         return COSTS_N_INSNS (2);
9053       /* Fall through.  */
9054     case COMPARE:
9055     case NEG:
9056     case NOT:
9057       return COSTS_N_INSNS (1);
9058
9059     case MULT:
9060       if (CONST_INT_P (XEXP (x, 1)))
9061         {
9062           /* Thumb1 mul instruction can't operate on const. We must Load it
9063              into a register first.  */
9064           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9065           /* For the targets which have a very small and high-latency multiply
9066              unit, we prefer to synthesize the mult with up to 5 instructions,
9067              giving a good balance between size and performance.  */
9068           if (arm_arch6m && arm_m_profile_small_mul)
9069             return COSTS_N_INSNS (5);
9070           else
9071             return COSTS_N_INSNS (1) + const_size;
9072         }
9073       return COSTS_N_INSNS (1);
9074
9075     case SET:
9076       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9077          the mode.  */
9078       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9079       cost = COSTS_N_INSNS (words);
9080       if (satisfies_constraint_J (SET_SRC (x))
9081           || satisfies_constraint_K (SET_SRC (x))
9082              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9083           || (CONST_INT_P (SET_SRC (x))
9084               && UINTVAL (SET_SRC (x)) >= 256
9085               && TARGET_HAVE_MOVT
9086               && satisfies_constraint_j (SET_SRC (x)))
9087              /* thumb1_movdi_insn.  */
9088           || ((words > 1) && MEM_P (SET_SRC (x))))
9089         cost += COSTS_N_INSNS (1);
9090       return cost;
9091
9092     case CONST_INT:
9093       if (outer == SET)
9094         {
9095           if (UINTVAL (x) < 256)
9096             return COSTS_N_INSNS (1);
9097           /* movw is 4byte long.  */
9098           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9099             return COSTS_N_INSNS (2);
9100           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9101           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9102             return COSTS_N_INSNS (2);
9103           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9104           if (thumb_shiftable_const (INTVAL (x)))
9105             return COSTS_N_INSNS (2);
9106           return COSTS_N_INSNS (3);
9107         }
9108       else if ((outer == PLUS || outer == COMPARE)
9109                && INTVAL (x) < 256 && INTVAL (x) > -256)
9110         return 0;
9111       else if ((outer == IOR || outer == XOR || outer == AND)
9112                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9113         return COSTS_N_INSNS (1);
9114       else if (outer == AND)
9115         {
9116           int i;
9117           /* This duplicates the tests in the andsi3 expander.  */
9118           for (i = 9; i <= 31; i++)
9119             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9120                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9121               return COSTS_N_INSNS (2);
9122         }
9123       else if (outer == ASHIFT || outer == ASHIFTRT
9124                || outer == LSHIFTRT)
9125         return 0;
9126       return COSTS_N_INSNS (2);
9127
9128     case CONST:
9129     case CONST_DOUBLE:
9130     case LABEL_REF:
9131     case SYMBOL_REF:
9132       return COSTS_N_INSNS (3);
9133
9134     case UDIV:
9135     case UMOD:
9136     case DIV:
9137     case MOD:
9138       return 100;
9139
9140     case TRUNCATE:
9141       return 99;
9142
9143     case AND:
9144     case XOR:
9145     case IOR:
9146       return COSTS_N_INSNS (1);
9147
9148     case MEM:
9149       return (COSTS_N_INSNS (1)
9150               + COSTS_N_INSNS (1)
9151                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9152               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9153                  ? COSTS_N_INSNS (1) : 0));
9154
9155     case IF_THEN_ELSE:
9156       /* XXX a guess.  */
9157       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9158         return 14;
9159       return 2;
9160
9161     case ZERO_EXTEND:
9162       /* XXX still guessing.  */
9163       switch (GET_MODE (XEXP (x, 0)))
9164         {
9165           case E_QImode:
9166             return (1 + (mode == DImode ? 4 : 0)
9167                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9168
9169           case E_HImode:
9170             return (4 + (mode == DImode ? 4 : 0)
9171                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9172
9173           case E_SImode:
9174             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9175
9176           default:
9177             return 99;
9178         }
9179
9180     default:
9181       return 99;
9182     }
9183 }
9184
9185 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9186    operand, then return the operand that is being shifted.  If the shift
9187    is not by a constant, then set SHIFT_REG to point to the operand.
9188    Return NULL if OP is not a shifter operand.  */
9189 static rtx
9190 shifter_op_p (rtx op, rtx *shift_reg)
9191 {
9192   enum rtx_code code = GET_CODE (op);
9193
9194   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9195       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9196     return XEXP (op, 0);
9197   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9198     return XEXP (op, 0);
9199   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9200            || code == ASHIFTRT)
9201     {
9202       if (!CONST_INT_P (XEXP (op, 1)))
9203         *shift_reg = XEXP (op, 1);
9204       return XEXP (op, 0);
9205     }
9206
9207   return NULL;
9208 }
9209
9210 static bool
9211 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9212 {
9213   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9214   rtx_code code = GET_CODE (x);
9215   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9216
9217   switch (XINT (x, 1))
9218     {
9219     case UNSPEC_UNALIGNED_LOAD:
9220       /* We can only do unaligned loads into the integer unit, and we can't
9221          use LDM or LDRD.  */
9222       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9223       if (speed_p)
9224         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9225                   + extra_cost->ldst.load_unaligned);
9226
9227 #ifdef NOT_YET
9228       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9229                                  ADDR_SPACE_GENERIC, speed_p);
9230 #endif
9231       return true;
9232
9233     case UNSPEC_UNALIGNED_STORE:
9234       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9235       if (speed_p)
9236         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9237                   + extra_cost->ldst.store_unaligned);
9238
9239       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9240 #ifdef NOT_YET
9241       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9242                                  ADDR_SPACE_GENERIC, speed_p);
9243 #endif
9244       return true;
9245
9246     case UNSPEC_VRINTZ:
9247     case UNSPEC_VRINTP:
9248     case UNSPEC_VRINTM:
9249     case UNSPEC_VRINTR:
9250     case UNSPEC_VRINTX:
9251     case UNSPEC_VRINTA:
9252       if (speed_p)
9253         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9254
9255       return true;
9256     default:
9257       *cost = COSTS_N_INSNS (2);
9258       break;
9259     }
9260   return true;
9261 }
9262
9263 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9264    call (one insn for -Os) and then one for processing the result.  */
9265 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9266
9267 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9268         do                                                              \
9269           {                                                             \
9270             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9271             if (shift_op != NULL                                        \
9272                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9273               {                                                         \
9274                 if (shift_reg)                                          \
9275                   {                                                     \
9276                     if (speed_p)                                        \
9277                       *cost += extra_cost->alu.arith_shift_reg;         \
9278                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9279                                        ASHIFT, 1, speed_p);             \
9280                   }                                                     \
9281                 else if (speed_p)                                       \
9282                   *cost += extra_cost->alu.arith_shift;                 \
9283                                                                         \
9284                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9285                                     ASHIFT, 0, speed_p)                 \
9286                           + rtx_cost (XEXP (x, 1 - IDX),                \
9287                                       GET_MODE (shift_op),              \
9288                                       OP, 1, speed_p));                 \
9289                 return true;                                            \
9290               }                                                         \
9291           }                                                             \
9292         while (0)
9293
9294 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9295    considering the costs of the addressing mode and memory access
9296    separately.  */
9297 static bool
9298 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9299                int *cost, bool speed_p)
9300 {
9301   machine_mode mode = GET_MODE (x);
9302
9303   *cost = COSTS_N_INSNS (1);
9304
9305   if (flag_pic
9306       && GET_CODE (XEXP (x, 0)) == PLUS
9307       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308     /* This will be split into two instructions.  Add the cost of the
9309        additional instruction here.  The cost of the memory access is computed
9310        below.  See arm.md:calculate_pic_address.  */
9311     *cost += COSTS_N_INSNS (1);
9312
9313   /* Calculate cost of the addressing mode.  */
9314   if (speed_p)
9315     {
9316       arm_addr_mode_op op_type;
9317       switch (GET_CODE (XEXP (x, 0)))
9318         {
9319         default:
9320         case REG:
9321           op_type = AMO_DEFAULT;
9322           break;
9323         case MINUS:
9324           /* MINUS does not appear in RTL, but the architecture supports it,
9325              so handle this case defensively.  */
9326           /* fall through */
9327         case PLUS:
9328           op_type = AMO_NO_WB;
9329           break;
9330         case PRE_INC:
9331         case PRE_DEC:
9332         case POST_INC:
9333         case POST_DEC:
9334         case PRE_MODIFY:
9335         case POST_MODIFY:
9336           op_type = AMO_WB;
9337           break;
9338         }
9339
9340       if (VECTOR_MODE_P (mode))
9341           *cost += current_tune->addr_mode_costs->vector[op_type];
9342       else if (FLOAT_MODE_P (mode))
9343           *cost += current_tune->addr_mode_costs->fp[op_type];
9344       else
9345           *cost += current_tune->addr_mode_costs->integer[op_type];
9346     }
9347
9348   /* Calculate cost of memory access.  */
9349   if (speed_p)
9350     {
9351       if (FLOAT_MODE_P (mode))
9352         {
9353           if (GET_MODE_SIZE (mode) == 8)
9354             *cost += extra_cost->ldst.loadd;
9355           else
9356             *cost += extra_cost->ldst.loadf;
9357         }
9358       else if (VECTOR_MODE_P (mode))
9359         *cost += extra_cost->ldst.loadv;
9360       else
9361         {
9362           /* Integer modes */
9363           if (GET_MODE_SIZE (mode) == 8)
9364             *cost += extra_cost->ldst.ldrd;
9365           else
9366             *cost += extra_cost->ldst.load;
9367         }
9368     }
9369
9370   return true;
9371 }
9372
9373 /* RTX costs.  Make an estimate of the cost of executing the operation
9374    X, which is contained within an operation with code OUTER_CODE.
9375    SPEED_P indicates whether the cost desired is the performance cost,
9376    or the size cost.  The estimate is stored in COST and the return
9377    value is TRUE if the cost calculation is final, or FALSE if the
9378    caller should recurse through the operands of X to add additional
9379    costs.
9380
9381    We currently make no attempt to model the size savings of Thumb-2
9382    16-bit instructions.  At the normal points in compilation where
9383    this code is called we have no measure of whether the condition
9384    flags are live or not, and thus no realistic way to determine what
9385    the size will eventually be.  */
9386 static bool
9387 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9388                    const struct cpu_cost_table *extra_cost,
9389                    int *cost, bool speed_p)
9390 {
9391   machine_mode mode = GET_MODE (x);
9392
9393   *cost = COSTS_N_INSNS (1);
9394
9395   if (TARGET_THUMB1)
9396     {
9397       if (speed_p)
9398         *cost = thumb1_rtx_costs (x, code, outer_code);
9399       else
9400         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9401       return true;
9402     }
9403
9404   switch (code)
9405     {
9406     case SET:
9407       *cost = 0;
9408       /* SET RTXs don't have a mode so we get it from the destination.  */
9409       mode = GET_MODE (SET_DEST (x));
9410
9411       if (REG_P (SET_SRC (x))
9412           && REG_P (SET_DEST (x)))
9413         {
9414           /* Assume that most copies can be done with a single insn,
9415              unless we don't have HW FP, in which case everything
9416              larger than word mode will require two insns.  */
9417           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9418                                    && GET_MODE_SIZE (mode) > 4)
9419                                   || mode == DImode)
9420                                  ? 2 : 1);
9421           /* Conditional register moves can be encoded
9422              in 16 bits in Thumb mode.  */
9423           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9424             *cost >>= 1;
9425
9426           return true;
9427         }
9428
9429       if (CONST_INT_P (SET_SRC (x)))
9430         {
9431           /* Handle CONST_INT here, since the value doesn't have a mode
9432              and we would otherwise be unable to work out the true cost.  */
9433           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9434                             0, speed_p);
9435           outer_code = SET;
9436           /* Slightly lower the cost of setting a core reg to a constant.
9437              This helps break up chains and allows for better scheduling.  */
9438           if (REG_P (SET_DEST (x))
9439               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9440             *cost -= 1;
9441           x = SET_SRC (x);
9442           /* Immediate moves with an immediate in the range [0, 255] can be
9443              encoded in 16 bits in Thumb mode.  */
9444           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9445               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9446             *cost >>= 1;
9447           goto const_int_cost;
9448         }
9449
9450       return false;
9451
9452     case MEM:
9453       return arm_mem_costs (x, extra_cost, cost, speed_p);
9454
9455     case PARALLEL:
9456     {
9457    /* Calculations of LDM costs are complex.  We assume an initial cost
9458    (ldm_1st) which will load the number of registers mentioned in
9459    ldm_regs_per_insn_1st registers; then each additional
9460    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9461    formula for N regs is thus:
9462
9463    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9464                              + ldm_regs_per_insn_subsequent - 1)
9465                             / ldm_regs_per_insn_subsequent).
9466
9467    Additional costs may also be added for addressing.  A similar
9468    formula is used for STM.  */
9469
9470       bool is_ldm = load_multiple_operation (x, SImode);
9471       bool is_stm = store_multiple_operation (x, SImode);
9472
9473       if (is_ldm || is_stm)
9474         {
9475           if (speed_p)
9476             {
9477               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9478               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9479                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9480                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9481               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9482                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9483                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9484
9485               *cost += regs_per_insn_1st
9486                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9487                                             + regs_per_insn_sub - 1)
9488                                           / regs_per_insn_sub);
9489               return true;
9490             }
9491
9492         }
9493       return false;
9494     }
9495     case DIV:
9496     case UDIV:
9497       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9498           && (mode == SFmode || !TARGET_VFP_SINGLE))
9499         *cost += COSTS_N_INSNS (speed_p
9500                                ? extra_cost->fp[mode != SFmode].div : 0);
9501       else if (mode == SImode && TARGET_IDIV)
9502         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9503       else
9504         *cost = LIBCALL_COST (2);
9505
9506       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9507          possible udiv is prefered.  */
9508       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9509       return false;     /* All arguments must be in registers.  */
9510
9511     case MOD:
9512       /* MOD by a power of 2 can be expanded as:
9513          rsbs    r1, r0, #0
9514          and     r0, r0, #(n - 1)
9515          and     r1, r1, #(n - 1)
9516          rsbpl   r0, r1, #0.  */
9517       if (CONST_INT_P (XEXP (x, 1))
9518           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9519           && mode == SImode)
9520         {
9521           *cost += COSTS_N_INSNS (3);
9522
9523           if (speed_p)
9524             *cost += 2 * extra_cost->alu.logical
9525                      + extra_cost->alu.arith;
9526           return true;
9527         }
9528
9529     /* Fall-through.  */
9530     case UMOD:
9531       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9532          possible udiv is prefered.  */
9533       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9534       return false;     /* All arguments must be in registers.  */
9535
9536     case ROTATE:
9537       if (mode == SImode && REG_P (XEXP (x, 1)))
9538         {
9539           *cost += (COSTS_N_INSNS (1)
9540                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9541           if (speed_p)
9542             *cost += extra_cost->alu.shift_reg;
9543           return true;
9544         }
9545       /* Fall through */
9546     case ROTATERT:
9547     case ASHIFT:
9548     case LSHIFTRT:
9549     case ASHIFTRT:
9550       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9551         {
9552           *cost += (COSTS_N_INSNS (2)
9553                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9554           if (speed_p)
9555             *cost += 2 * extra_cost->alu.shift;
9556           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9557           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9558             *cost += 1;
9559           return true;
9560         }
9561       else if (mode == SImode)
9562         {
9563           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9564           /* Slightly disparage register shifts at -Os, but not by much.  */
9565           if (!CONST_INT_P (XEXP (x, 1)))
9566             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9567                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9568           return true;
9569         }
9570       else if (GET_MODE_CLASS (mode) == MODE_INT
9571                && GET_MODE_SIZE (mode) < 4)
9572         {
9573           if (code == ASHIFT)
9574             {
9575               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9576               /* Slightly disparage register shifts at -Os, but not by
9577                  much.  */
9578               if (!CONST_INT_P (XEXP (x, 1)))
9579                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9580                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9581             }
9582           else if (code == LSHIFTRT || code == ASHIFTRT)
9583             {
9584               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9585                 {
9586                   /* Can use SBFX/UBFX.  */
9587                   if (speed_p)
9588                     *cost += extra_cost->alu.bfx;
9589                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9590                 }
9591               else
9592                 {
9593                   *cost += COSTS_N_INSNS (1);
9594                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9595                   if (speed_p)
9596                     {
9597                       if (CONST_INT_P (XEXP (x, 1)))
9598                         *cost += 2 * extra_cost->alu.shift;
9599                       else
9600                         *cost += (extra_cost->alu.shift
9601                                   + extra_cost->alu.shift_reg);
9602                     }
9603                   else
9604                     /* Slightly disparage register shifts.  */
9605                     *cost += !CONST_INT_P (XEXP (x, 1));
9606                 }
9607             }
9608           else /* Rotates.  */
9609             {
9610               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9611               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9612               if (speed_p)
9613                 {
9614                   if (CONST_INT_P (XEXP (x, 1)))
9615                     *cost += (2 * extra_cost->alu.shift
9616                               + extra_cost->alu.log_shift);
9617                   else
9618                     *cost += (extra_cost->alu.shift
9619                               + extra_cost->alu.shift_reg
9620                               + extra_cost->alu.log_shift_reg);
9621                 }
9622             }
9623           return true;
9624         }
9625
9626       *cost = LIBCALL_COST (2);
9627       return false;
9628
9629     case BSWAP:
9630       if (arm_arch6)
9631         {
9632           if (mode == SImode)
9633             {
9634               if (speed_p)
9635                 *cost += extra_cost->alu.rev;
9636
9637               return false;
9638             }
9639         }
9640       else
9641         {
9642         /* No rev instruction available.  Look at arm_legacy_rev
9643            and thumb_legacy_rev for the form of RTL used then.  */
9644           if (TARGET_THUMB)
9645             {
9646               *cost += COSTS_N_INSNS (9);
9647
9648               if (speed_p)
9649                 {
9650                   *cost += 6 * extra_cost->alu.shift;
9651                   *cost += 3 * extra_cost->alu.logical;
9652                 }
9653             }
9654           else
9655             {
9656               *cost += COSTS_N_INSNS (4);
9657
9658               if (speed_p)
9659                 {
9660                   *cost += 2 * extra_cost->alu.shift;
9661                   *cost += extra_cost->alu.arith_shift;
9662                   *cost += 2 * extra_cost->alu.logical;
9663                 }
9664             }
9665           return true;
9666         }
9667       return false;
9668
9669     case MINUS:
9670       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9671           && (mode == SFmode || !TARGET_VFP_SINGLE))
9672         {
9673           if (GET_CODE (XEXP (x, 0)) == MULT
9674               || GET_CODE (XEXP (x, 1)) == MULT)
9675             {
9676               rtx mul_op0, mul_op1, sub_op;
9677
9678               if (speed_p)
9679                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9680
9681               if (GET_CODE (XEXP (x, 0)) == MULT)
9682                 {
9683                   mul_op0 = XEXP (XEXP (x, 0), 0);
9684                   mul_op1 = XEXP (XEXP (x, 0), 1);
9685                   sub_op = XEXP (x, 1);
9686                 }
9687               else
9688                 {
9689                   mul_op0 = XEXP (XEXP (x, 1), 0);
9690                   mul_op1 = XEXP (XEXP (x, 1), 1);
9691                   sub_op = XEXP (x, 0);
9692                 }
9693
9694               /* The first operand of the multiply may be optionally
9695                  negated.  */
9696               if (GET_CODE (mul_op0) == NEG)
9697                 mul_op0 = XEXP (mul_op0, 0);
9698
9699               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9700                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9701                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9702
9703               return true;
9704             }
9705
9706           if (speed_p)
9707             *cost += extra_cost->fp[mode != SFmode].addsub;
9708           return false;
9709         }
9710
9711       if (mode == SImode)
9712         {
9713           rtx shift_by_reg = NULL;
9714           rtx shift_op;
9715           rtx non_shift_op;
9716
9717           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9718           if (shift_op == NULL)
9719             {
9720               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9721               non_shift_op = XEXP (x, 0);
9722             }
9723           else
9724             non_shift_op = XEXP (x, 1);
9725
9726           if (shift_op != NULL)
9727             {
9728               if (shift_by_reg != NULL)
9729                 {
9730                   if (speed_p)
9731                     *cost += extra_cost->alu.arith_shift_reg;
9732                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9733                 }
9734               else if (speed_p)
9735                 *cost += extra_cost->alu.arith_shift;
9736
9737               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9738               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9739               return true;
9740             }
9741
9742           if (arm_arch_thumb2
9743               && GET_CODE (XEXP (x, 1)) == MULT)
9744             {
9745               /* MLS.  */
9746               if (speed_p)
9747                 *cost += extra_cost->mult[0].add;
9748               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9749               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9750               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9751               return true;
9752             }
9753
9754           if (CONST_INT_P (XEXP (x, 0)))
9755             {
9756               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9757                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9758                                             NULL_RTX, 1, 0);
9759               *cost = COSTS_N_INSNS (insns);
9760               if (speed_p)
9761                 *cost += insns * extra_cost->alu.arith;
9762               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9763               return true;
9764             }
9765           else if (speed_p)
9766             *cost += extra_cost->alu.arith;
9767
9768           return false;
9769         }
9770
9771       if (GET_MODE_CLASS (mode) == MODE_INT
9772           && GET_MODE_SIZE (mode) < 4)
9773         {
9774           rtx shift_op, shift_reg;
9775           shift_reg = NULL;
9776
9777           /* We check both sides of the MINUS for shifter operands since,
9778              unlike PLUS, it's not commutative.  */
9779
9780           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9781           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9782
9783           /* Slightly disparage, as we might need to widen the result.  */
9784           *cost += 1;
9785           if (speed_p)
9786             *cost += extra_cost->alu.arith;
9787
9788           if (CONST_INT_P (XEXP (x, 0)))
9789             {
9790               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9791               return true;
9792             }
9793
9794           return false;
9795         }
9796
9797       if (mode == DImode)
9798         {
9799           *cost += COSTS_N_INSNS (1);
9800
9801           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9802             {
9803               rtx op1 = XEXP (x, 1);
9804
9805               if (speed_p)
9806                 *cost += 2 * extra_cost->alu.arith;
9807
9808               if (GET_CODE (op1) == ZERO_EXTEND)
9809                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9810                                    0, speed_p);
9811               else
9812                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9813               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9814                                  0, speed_p);
9815               return true;
9816             }
9817           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9818             {
9819               if (speed_p)
9820                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9821               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9822                                   0, speed_p)
9823                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9824               return true;
9825             }
9826           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9827                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9828             {
9829               if (speed_p)
9830                 *cost += (extra_cost->alu.arith
9831                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9832                              ? extra_cost->alu.arith
9833                              : extra_cost->alu.arith_shift));
9834               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9835                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9836                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9837               return true;
9838             }
9839
9840           if (speed_p)
9841             *cost += 2 * extra_cost->alu.arith;
9842           return false;
9843         }
9844
9845       /* Vector mode?  */
9846
9847       *cost = LIBCALL_COST (2);
9848       return false;
9849
9850     case PLUS:
9851       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9852           && (mode == SFmode || !TARGET_VFP_SINGLE))
9853         {
9854           if (GET_CODE (XEXP (x, 0)) == MULT)
9855             {
9856               rtx mul_op0, mul_op1, add_op;
9857
9858               if (speed_p)
9859                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9860
9861               mul_op0 = XEXP (XEXP (x, 0), 0);
9862               mul_op1 = XEXP (XEXP (x, 0), 1);
9863               add_op = XEXP (x, 1);
9864
9865               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9866                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9867                         + rtx_cost (add_op, mode, code, 0, speed_p));
9868
9869               return true;
9870             }
9871
9872           if (speed_p)
9873             *cost += extra_cost->fp[mode != SFmode].addsub;
9874           return false;
9875         }
9876       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9877         {
9878           *cost = LIBCALL_COST (2);
9879           return false;
9880         }
9881
9882         /* Narrow modes can be synthesized in SImode, but the range
9883            of useful sub-operations is limited.  Check for shift operations
9884            on one of the operands.  Only left shifts can be used in the
9885            narrow modes.  */
9886       if (GET_MODE_CLASS (mode) == MODE_INT
9887           && GET_MODE_SIZE (mode) < 4)
9888         {
9889           rtx shift_op, shift_reg;
9890           shift_reg = NULL;
9891
9892           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9893
9894           if (CONST_INT_P (XEXP (x, 1)))
9895             {
9896               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9897                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9898                                             NULL_RTX, 1, 0);
9899               *cost = COSTS_N_INSNS (insns);
9900               if (speed_p)
9901                 *cost += insns * extra_cost->alu.arith;
9902               /* Slightly penalize a narrow operation as the result may
9903                  need widening.  */
9904               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9905               return true;
9906             }
9907
9908           /* Slightly penalize a narrow operation as the result may
9909              need widening.  */
9910           *cost += 1;
9911           if (speed_p)
9912             *cost += extra_cost->alu.arith;
9913
9914           return false;
9915         }
9916
9917       if (mode == SImode)
9918         {
9919           rtx shift_op, shift_reg;
9920
9921           if (TARGET_INT_SIMD
9922               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9923                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9924             {
9925               /* UXTA[BH] or SXTA[BH].  */
9926               if (speed_p)
9927                 *cost += extra_cost->alu.extend_arith;
9928               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9929                                   0, speed_p)
9930                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9931               return true;
9932             }
9933
9934           shift_reg = NULL;
9935           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9936           if (shift_op != NULL)
9937             {
9938               if (shift_reg)
9939                 {
9940                   if (speed_p)
9941                     *cost += extra_cost->alu.arith_shift_reg;
9942                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9943                 }
9944               else if (speed_p)
9945                 *cost += extra_cost->alu.arith_shift;
9946
9947               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9948                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9949               return true;
9950             }
9951           if (GET_CODE (XEXP (x, 0)) == MULT)
9952             {
9953               rtx mul_op = XEXP (x, 0);
9954
9955               if (TARGET_DSP_MULTIPLY
9956                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968                                       == 16))))))
9969                 {
9970                   /* SMLA[BT][BT].  */
9971                   if (speed_p)
9972                     *cost += extra_cost->mult[0].extend_add;
9973                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9974                                       SIGN_EXTEND, 0, speed_p)
9975                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9976                                         SIGN_EXTEND, 0, speed_p)
9977                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9978                   return true;
9979                 }
9980
9981               if (speed_p)
9982                 *cost += extra_cost->mult[0].add;
9983               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9984                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9985                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9986               return true;
9987             }
9988           if (CONST_INT_P (XEXP (x, 1)))
9989             {
9990               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9992                                             NULL_RTX, 1, 0);
9993               *cost = COSTS_N_INSNS (insns);
9994               if (speed_p)
9995                 *cost += insns * extra_cost->alu.arith;
9996               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9997               return true;
9998             }
9999           else if (speed_p)
10000             *cost += extra_cost->alu.arith;
10001
10002           return false;
10003         }
10004
10005       if (mode == DImode)
10006         {
10007           if (arm_arch3m
10008               && GET_CODE (XEXP (x, 0)) == MULT
10009               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10010                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10011                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10012                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10013             {
10014               if (speed_p)
10015                 *cost += extra_cost->mult[1].extend_add;
10016               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10017                                   ZERO_EXTEND, 0, speed_p)
10018                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10019                                     ZERO_EXTEND, 0, speed_p)
10020                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10021               return true;
10022             }
10023
10024           *cost += COSTS_N_INSNS (1);
10025
10026           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10027               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10028             {
10029               if (speed_p)
10030                 *cost += (extra_cost->alu.arith
10031                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10032                              ? extra_cost->alu.arith
10033                              : extra_cost->alu.arith_shift));
10034
10035               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10036                                   0, speed_p)
10037                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10038               return true;
10039             }
10040
10041           if (speed_p)
10042             *cost += 2 * extra_cost->alu.arith;
10043           return false;
10044         }
10045
10046       /* Vector mode?  */
10047       *cost = LIBCALL_COST (2);
10048       return false;
10049     case IOR:
10050       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10051         {
10052           if (speed_p)
10053             *cost += extra_cost->alu.rev;
10054
10055           return true;
10056         }
10057     /* Fall through.  */
10058     case AND: case XOR:
10059       if (mode == SImode)
10060         {
10061           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10062           rtx op0 = XEXP (x, 0);
10063           rtx shift_op, shift_reg;
10064
10065           if (subcode == NOT
10066               && (code == AND
10067                   || (code == IOR && TARGET_THUMB2)))
10068             op0 = XEXP (op0, 0);
10069
10070           shift_reg = NULL;
10071           shift_op = shifter_op_p (op0, &shift_reg);
10072           if (shift_op != NULL)
10073             {
10074               if (shift_reg)
10075                 {
10076                   if (speed_p)
10077                     *cost += extra_cost->alu.log_shift_reg;
10078                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10079                 }
10080               else if (speed_p)
10081                 *cost += extra_cost->alu.log_shift;
10082
10083               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10084                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10085               return true;
10086             }
10087
10088           if (CONST_INT_P (XEXP (x, 1)))
10089             {
10090               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10091                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10092                                             NULL_RTX, 1, 0);
10093
10094               *cost = COSTS_N_INSNS (insns);
10095               if (speed_p)
10096                 *cost += insns * extra_cost->alu.logical;
10097               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10098               return true;
10099             }
10100
10101           if (speed_p)
10102             *cost += extra_cost->alu.logical;
10103           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10104                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10105           return true;
10106         }
10107
10108       if (mode == DImode)
10109         {
10110           rtx op0 = XEXP (x, 0);
10111           enum rtx_code subcode = GET_CODE (op0);
10112
10113           *cost += COSTS_N_INSNS (1);
10114
10115           if (subcode == NOT
10116               && (code == AND
10117                   || (code == IOR && TARGET_THUMB2)))
10118             op0 = XEXP (op0, 0);
10119
10120           if (GET_CODE (op0) == ZERO_EXTEND)
10121             {
10122               if (speed_p)
10123                 *cost += 2 * extra_cost->alu.logical;
10124
10125               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10126                                   0, speed_p)
10127                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10128               return true;
10129             }
10130           else if (GET_CODE (op0) == SIGN_EXTEND)
10131             {
10132               if (speed_p)
10133                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10134
10135               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10136                                   0, speed_p)
10137                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10138               return true;
10139             }
10140
10141           if (speed_p)
10142             *cost += 2 * extra_cost->alu.logical;
10143
10144           return true;
10145         }
10146       /* Vector mode?  */
10147
10148       *cost = LIBCALL_COST (2);
10149       return false;
10150
10151     case MULT:
10152       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10153           && (mode == SFmode || !TARGET_VFP_SINGLE))
10154         {
10155           rtx op0 = XEXP (x, 0);
10156
10157           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10158             op0 = XEXP (op0, 0);
10159
10160           if (speed_p)
10161             *cost += extra_cost->fp[mode != SFmode].mult;
10162
10163           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10164                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10165           return true;
10166         }
10167       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10168         {
10169           *cost = LIBCALL_COST (2);
10170           return false;
10171         }
10172
10173       if (mode == SImode)
10174         {
10175           if (TARGET_DSP_MULTIPLY
10176               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10177                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10178                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10179                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10180                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10181                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10182                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10183                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10184                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10185                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10186                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10187                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10188                                   == 16))))))
10189             {
10190               /* SMUL[TB][TB].  */
10191               if (speed_p)
10192                 *cost += extra_cost->mult[0].extend;
10193               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10194                                  SIGN_EXTEND, 0, speed_p);
10195               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10196                                  SIGN_EXTEND, 1, speed_p);
10197               return true;
10198             }
10199           if (speed_p)
10200             *cost += extra_cost->mult[0].simple;
10201           return false;
10202         }
10203
10204       if (mode == DImode)
10205         {
10206           if (arm_arch3m
10207               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10208                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10209                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10210                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10211             {
10212               if (speed_p)
10213                 *cost += extra_cost->mult[1].extend;
10214               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10215                                   ZERO_EXTEND, 0, speed_p)
10216                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10217                                     ZERO_EXTEND, 0, speed_p));
10218               return true;
10219             }
10220
10221           *cost = LIBCALL_COST (2);
10222           return false;
10223         }
10224
10225       /* Vector mode?  */
10226       *cost = LIBCALL_COST (2);
10227       return false;
10228
10229     case NEG:
10230       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10231           && (mode == SFmode || !TARGET_VFP_SINGLE))
10232         {
10233           if (GET_CODE (XEXP (x, 0)) == MULT)
10234             {
10235               /* VNMUL.  */
10236               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10237               return true;
10238             }
10239
10240           if (speed_p)
10241             *cost += extra_cost->fp[mode != SFmode].neg;
10242
10243           return false;
10244         }
10245       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10246         {
10247           *cost = LIBCALL_COST (1);
10248           return false;
10249         }
10250
10251       if (mode == SImode)
10252         {
10253           if (GET_CODE (XEXP (x, 0)) == ABS)
10254             {
10255               *cost += COSTS_N_INSNS (1);
10256               /* Assume the non-flag-changing variant.  */
10257               if (speed_p)
10258                 *cost += (extra_cost->alu.log_shift
10259                           + extra_cost->alu.arith_shift);
10260               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10261               return true;
10262             }
10263
10264           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10265               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10266             {
10267               *cost += COSTS_N_INSNS (1);
10268               /* No extra cost for MOV imm and MVN imm.  */
10269               /* If the comparison op is using the flags, there's no further
10270                  cost, otherwise we need to add the cost of the comparison.  */
10271               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10272                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10273                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10274                 {
10275                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10276                   *cost += (COSTS_N_INSNS (1)
10277                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10278                                         0, speed_p)
10279                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10280                                         1, speed_p));
10281                   if (speed_p)
10282                     *cost += extra_cost->alu.arith;
10283                 }
10284               return true;
10285             }
10286
10287           if (speed_p)
10288             *cost += extra_cost->alu.arith;
10289           return false;
10290         }
10291
10292       if (GET_MODE_CLASS (mode) == MODE_INT
10293           && GET_MODE_SIZE (mode) < 4)
10294         {
10295           /* Slightly disparage, as we might need an extend operation.  */
10296           *cost += 1;
10297           if (speed_p)
10298             *cost += extra_cost->alu.arith;
10299           return false;
10300         }
10301
10302       if (mode == DImode)
10303         {
10304           *cost += COSTS_N_INSNS (1);
10305           if (speed_p)
10306             *cost += 2 * extra_cost->alu.arith;
10307           return false;
10308         }
10309
10310       /* Vector mode?  */
10311       *cost = LIBCALL_COST (1);
10312       return false;
10313
10314     case NOT:
10315       if (mode == SImode)
10316         {
10317           rtx shift_op;
10318           rtx shift_reg = NULL;
10319
10320           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10321
10322           if (shift_op)
10323             {
10324               if (shift_reg != NULL)
10325                 {
10326                   if (speed_p)
10327                     *cost += extra_cost->alu.log_shift_reg;
10328                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10329                 }
10330               else if (speed_p)
10331                 *cost += extra_cost->alu.log_shift;
10332               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10333               return true;
10334             }
10335
10336           if (speed_p)
10337             *cost += extra_cost->alu.logical;
10338           return false;
10339         }
10340       if (mode == DImode)
10341         {
10342           *cost += COSTS_N_INSNS (1);
10343           return false;
10344         }
10345
10346       /* Vector mode?  */
10347
10348       *cost += LIBCALL_COST (1);
10349       return false;
10350
10351     case IF_THEN_ELSE:
10352       {
10353         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10354           {
10355             *cost += COSTS_N_INSNS (3);
10356             return true;
10357           }
10358         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10359         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10360
10361         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10362         /* Assume that if one arm of the if_then_else is a register,
10363            that it will be tied with the result and eliminate the
10364            conditional insn.  */
10365         if (REG_P (XEXP (x, 1)))
10366           *cost += op2cost;
10367         else if (REG_P (XEXP (x, 2)))
10368           *cost += op1cost;
10369         else
10370           {
10371             if (speed_p)
10372               {
10373                 if (extra_cost->alu.non_exec_costs_exec)
10374                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10375                 else
10376                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10377               }
10378             else
10379               *cost += op1cost + op2cost;
10380           }
10381       }
10382       return true;
10383
10384     case COMPARE:
10385       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10386         *cost = 0;
10387       else
10388         {
10389           machine_mode op0mode;
10390           /* We'll mostly assume that the cost of a compare is the cost of the
10391              LHS.  However, there are some notable exceptions.  */
10392
10393           /* Floating point compares are never done as side-effects.  */
10394           op0mode = GET_MODE (XEXP (x, 0));
10395           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10396               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10397             {
10398               if (speed_p)
10399                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10400
10401               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10402                 {
10403                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10404                   return true;
10405                 }
10406
10407               return false;
10408             }
10409           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10410             {
10411               *cost = LIBCALL_COST (2);
10412               return false;
10413             }
10414
10415           /* DImode compares normally take two insns.  */
10416           if (op0mode == DImode)
10417             {
10418               *cost += COSTS_N_INSNS (1);
10419               if (speed_p)
10420                 *cost += 2 * extra_cost->alu.arith;
10421               return false;
10422             }
10423
10424           if (op0mode == SImode)
10425             {
10426               rtx shift_op;
10427               rtx shift_reg;
10428
10429               if (XEXP (x, 1) == const0_rtx
10430                   && !(REG_P (XEXP (x, 0))
10431                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10432                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10433                 {
10434                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10435
10436                   /* Multiply operations that set the flags are often
10437                      significantly more expensive.  */
10438                   if (speed_p
10439                       && GET_CODE (XEXP (x, 0)) == MULT
10440                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10441                     *cost += extra_cost->mult[0].flag_setting;
10442
10443                   if (speed_p
10444                       && GET_CODE (XEXP (x, 0)) == PLUS
10445                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10446                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10447                                                             0), 1), mode))
10448                     *cost += extra_cost->mult[0].flag_setting;
10449                   return true;
10450                 }
10451
10452               shift_reg = NULL;
10453               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10454               if (shift_op != NULL)
10455                 {
10456                   if (shift_reg != NULL)
10457                     {
10458                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10459                                          1, speed_p);
10460                       if (speed_p)
10461                         *cost += extra_cost->alu.arith_shift_reg;
10462                     }
10463                   else if (speed_p)
10464                     *cost += extra_cost->alu.arith_shift;
10465                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10466                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10467                   return true;
10468                 }
10469
10470               if (speed_p)
10471                 *cost += extra_cost->alu.arith;
10472               if (CONST_INT_P (XEXP (x, 1))
10473                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10474                 {
10475                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10476                   return true;
10477                 }
10478               return false;
10479             }
10480
10481           /* Vector mode?  */
10482
10483           *cost = LIBCALL_COST (2);
10484           return false;
10485         }
10486       return true;
10487
10488     case EQ:
10489     case NE:
10490     case LT:
10491     case LE:
10492     case GT:
10493     case GE:
10494     case LTU:
10495     case LEU:
10496     case GEU:
10497     case GTU:
10498     case ORDERED:
10499     case UNORDERED:
10500     case UNEQ:
10501     case UNLE:
10502     case UNLT:
10503     case UNGE:
10504     case UNGT:
10505     case LTGT:
10506       if (outer_code == SET)
10507         {
10508           /* Is it a store-flag operation?  */
10509           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10510               && XEXP (x, 1) == const0_rtx)
10511             {
10512               /* Thumb also needs an IT insn.  */
10513               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10514               return true;
10515             }
10516           if (XEXP (x, 1) == const0_rtx)
10517             {
10518               switch (code)
10519                 {
10520                 case LT:
10521                   /* LSR Rd, Rn, #31.  */
10522                   if (speed_p)
10523                     *cost += extra_cost->alu.shift;
10524                   break;
10525
10526                 case EQ:
10527                   /* RSBS T1, Rn, #0
10528                      ADC  Rd, Rn, T1.  */
10529
10530                 case NE:
10531                   /* SUBS T1, Rn, #1
10532                      SBC  Rd, Rn, T1.  */
10533                   *cost += COSTS_N_INSNS (1);
10534                   break;
10535
10536                 case LE:
10537                   /* RSBS T1, Rn, Rn, LSR #31
10538                      ADC  Rd, Rn, T1. */
10539                   *cost += COSTS_N_INSNS (1);
10540                   if (speed_p)
10541                     *cost += extra_cost->alu.arith_shift;
10542                   break;
10543
10544                 case GT:
10545                   /* RSB  Rd, Rn, Rn, ASR #1
10546                      LSR  Rd, Rd, #31.  */
10547                   *cost += COSTS_N_INSNS (1);
10548                   if (speed_p)
10549                     *cost += (extra_cost->alu.arith_shift
10550                               + extra_cost->alu.shift);
10551                   break;
10552
10553                 case GE:
10554                   /* ASR  Rd, Rn, #31
10555                      ADD  Rd, Rn, #1.  */
10556                   *cost += COSTS_N_INSNS (1);
10557                   if (speed_p)
10558                     *cost += extra_cost->alu.shift;
10559                   break;
10560
10561                 default:
10562                   /* Remaining cases are either meaningless or would take
10563                      three insns anyway.  */
10564                   *cost = COSTS_N_INSNS (3);
10565                   break;
10566                 }
10567               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10568               return true;
10569             }
10570           else
10571             {
10572               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10573               if (CONST_INT_P (XEXP (x, 1))
10574                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10575                 {
10576                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10577                   return true;
10578                 }
10579
10580               return false;
10581             }
10582         }
10583       /* Not directly inside a set.  If it involves the condition code
10584          register it must be the condition for a branch, cond_exec or
10585          I_T_E operation.  Since the comparison is performed elsewhere
10586          this is just the control part which has no additional
10587          cost.  */
10588       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10589                && XEXP (x, 1) == const0_rtx)
10590         {
10591           *cost = 0;
10592           return true;
10593         }
10594       return false;
10595
10596     case ABS:
10597       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10598           && (mode == SFmode || !TARGET_VFP_SINGLE))
10599         {
10600           if (speed_p)
10601             *cost += extra_cost->fp[mode != SFmode].neg;
10602
10603           return false;
10604         }
10605       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10606         {
10607           *cost = LIBCALL_COST (1);
10608           return false;
10609         }
10610
10611       if (mode == SImode)
10612         {
10613           if (speed_p)
10614             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10615           return false;
10616         }
10617       /* Vector mode?  */
10618       *cost = LIBCALL_COST (1);
10619       return false;
10620
10621     case SIGN_EXTEND:
10622       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10623           && MEM_P (XEXP (x, 0)))
10624         {
10625           if (mode == DImode)
10626             *cost += COSTS_N_INSNS (1);
10627
10628           if (!speed_p)
10629             return true;
10630
10631           if (GET_MODE (XEXP (x, 0)) == SImode)
10632             *cost += extra_cost->ldst.load;
10633           else
10634             *cost += extra_cost->ldst.load_sign_extend;
10635
10636           if (mode == DImode)
10637             *cost += extra_cost->alu.shift;
10638
10639           return true;
10640         }
10641
10642       /* Widening from less than 32-bits requires an extend operation.  */
10643       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10644         {
10645           /* We have SXTB/SXTH.  */
10646           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10647           if (speed_p)
10648             *cost += extra_cost->alu.extend;
10649         }
10650       else if (GET_MODE (XEXP (x, 0)) != SImode)
10651         {
10652           /* Needs two shifts.  */
10653           *cost += COSTS_N_INSNS (1);
10654           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10655           if (speed_p)
10656             *cost += 2 * extra_cost->alu.shift;
10657         }
10658
10659       /* Widening beyond 32-bits requires one more insn.  */
10660       if (mode == DImode)
10661         {
10662           *cost += COSTS_N_INSNS (1);
10663           if (speed_p)
10664             *cost += extra_cost->alu.shift;
10665         }
10666
10667       return true;
10668
10669     case ZERO_EXTEND:
10670       if ((arm_arch4
10671            || GET_MODE (XEXP (x, 0)) == SImode
10672            || GET_MODE (XEXP (x, 0)) == QImode)
10673           && MEM_P (XEXP (x, 0)))
10674         {
10675           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10676
10677           if (mode == DImode)
10678             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10679
10680           return true;
10681         }
10682
10683       /* Widening from less than 32-bits requires an extend operation.  */
10684       if (GET_MODE (XEXP (x, 0)) == QImode)
10685         {
10686           /* UXTB can be a shorter instruction in Thumb2, but it might
10687              be slower than the AND Rd, Rn, #255 alternative.  When
10688              optimizing for speed it should never be slower to use
10689              AND, and we don't really model 16-bit vs 32-bit insns
10690              here.  */
10691           if (speed_p)
10692             *cost += extra_cost->alu.logical;
10693         }
10694       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10695         {
10696           /* We have UXTB/UXTH.  */
10697           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10698           if (speed_p)
10699             *cost += extra_cost->alu.extend;
10700         }
10701       else if (GET_MODE (XEXP (x, 0)) != SImode)
10702         {
10703           /* Needs two shifts.  It's marginally preferable to use
10704              shifts rather than two BIC instructions as the second
10705              shift may merge with a subsequent insn as a shifter
10706              op.  */
10707           *cost = COSTS_N_INSNS (2);
10708           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10709           if (speed_p)
10710             *cost += 2 * extra_cost->alu.shift;
10711         }
10712
10713       /* Widening beyond 32-bits requires one more insn.  */
10714       if (mode == DImode)
10715         {
10716           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10717         }
10718
10719       return true;
10720
10721     case CONST_INT:
10722       *cost = 0;
10723       /* CONST_INT has no mode, so we cannot tell for sure how many
10724          insns are really going to be needed.  The best we can do is
10725          look at the value passed.  If it fits in SImode, then assume
10726          that's the mode it will be used for.  Otherwise assume it
10727          will be used in DImode.  */
10728       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10729         mode = SImode;
10730       else
10731         mode = DImode;
10732
10733       /* Avoid blowing up in arm_gen_constant ().  */
10734       if (!(outer_code == PLUS
10735             || outer_code == AND
10736             || outer_code == IOR
10737             || outer_code == XOR
10738             || outer_code == MINUS))
10739         outer_code = SET;
10740
10741     const_int_cost:
10742       if (mode == SImode)
10743         {
10744           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10745                                                     INTVAL (x), NULL, NULL,
10746                                                     0, 0));
10747           /* Extra costs?  */
10748         }
10749       else
10750         {
10751           *cost += COSTS_N_INSNS (arm_gen_constant
10752                                   (outer_code, SImode, NULL,
10753                                    trunc_int_for_mode (INTVAL (x), SImode),
10754                                    NULL, NULL, 0, 0)
10755                                   + arm_gen_constant (outer_code, SImode, NULL,
10756                                                       INTVAL (x) >> 32, NULL,
10757                                                       NULL, 0, 0));
10758           /* Extra costs?  */
10759         }
10760
10761       return true;
10762
10763     case CONST:
10764     case LABEL_REF:
10765     case SYMBOL_REF:
10766       if (speed_p)
10767         {
10768           if (arm_arch_thumb2 && !flag_pic)
10769             *cost += COSTS_N_INSNS (1);
10770           else
10771             *cost += extra_cost->ldst.load;
10772         }
10773       else
10774         *cost += COSTS_N_INSNS (1);
10775
10776       if (flag_pic)
10777         {
10778           *cost += COSTS_N_INSNS (1);
10779           if (speed_p)
10780             *cost += extra_cost->alu.arith;
10781         }
10782
10783       return true;
10784
10785     case CONST_FIXED:
10786       *cost = COSTS_N_INSNS (4);
10787       /* Fixme.  */
10788       return true;
10789
10790     case CONST_DOUBLE:
10791       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10792           && (mode == SFmode || !TARGET_VFP_SINGLE))
10793         {
10794           if (vfp3_const_double_rtx (x))
10795             {
10796               if (speed_p)
10797                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10798               return true;
10799             }
10800
10801           if (speed_p)
10802             {
10803               if (mode == DFmode)
10804                 *cost += extra_cost->ldst.loadd;
10805               else
10806                 *cost += extra_cost->ldst.loadf;
10807             }
10808           else
10809             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10810
10811           return true;
10812         }
10813       *cost = COSTS_N_INSNS (4);
10814       return true;
10815
10816     case CONST_VECTOR:
10817       /* Fixme.  */
10818       if (TARGET_NEON
10819           && TARGET_HARD_FLOAT
10820           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10821           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10822         *cost = COSTS_N_INSNS (1);
10823       else
10824         *cost = COSTS_N_INSNS (4);
10825       return true;
10826
10827     case HIGH:
10828     case LO_SUM:
10829       /* When optimizing for size, we prefer constant pool entries to
10830          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10831       if (!speed_p)
10832         *cost += 1;
10833       return true;
10834
10835     case CLZ:
10836       if (speed_p)
10837         *cost += extra_cost->alu.clz;
10838       return false;
10839
10840     case SMIN:
10841       if (XEXP (x, 1) == const0_rtx)
10842         {
10843           if (speed_p)
10844             *cost += extra_cost->alu.log_shift;
10845           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10846           return true;
10847         }
10848       /* Fall through.  */
10849     case SMAX:
10850     case UMIN:
10851     case UMAX:
10852       *cost += COSTS_N_INSNS (1);
10853       return false;
10854
10855     case TRUNCATE:
10856       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10857           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10858           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10859           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10860           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10861                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10862               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10863                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10864                       == ZERO_EXTEND))))
10865         {
10866           if (speed_p)
10867             *cost += extra_cost->mult[1].extend;
10868           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10869                               ZERO_EXTEND, 0, speed_p)
10870                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10871                                 ZERO_EXTEND, 0, speed_p));
10872           return true;
10873         }
10874       *cost = LIBCALL_COST (1);
10875       return false;
10876
10877     case UNSPEC_VOLATILE:
10878     case UNSPEC:
10879       return arm_unspec_cost (x, outer_code, speed_p, cost);
10880
10881     case PC:
10882       /* Reading the PC is like reading any other register.  Writing it
10883          is more expensive, but we take that into account elsewhere.  */
10884       *cost = 0;
10885       return true;
10886
10887     case ZERO_EXTRACT:
10888       /* TODO: Simple zero_extract of bottom bits using AND.  */
10889       /* Fall through.  */
10890     case SIGN_EXTRACT:
10891       if (arm_arch6
10892           && mode == SImode
10893           && CONST_INT_P (XEXP (x, 1))
10894           && CONST_INT_P (XEXP (x, 2)))
10895         {
10896           if (speed_p)
10897             *cost += extra_cost->alu.bfx;
10898           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10899           return true;
10900         }
10901       /* Without UBFX/SBFX, need to resort to shift operations.  */
10902       *cost += COSTS_N_INSNS (1);
10903       if (speed_p)
10904         *cost += 2 * extra_cost->alu.shift;
10905       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10906       return true;
10907
10908     case FLOAT_EXTEND:
10909       if (TARGET_HARD_FLOAT)
10910         {
10911           if (speed_p)
10912             *cost += extra_cost->fp[mode == DFmode].widen;
10913           if (!TARGET_VFP5
10914               && GET_MODE (XEXP (x, 0)) == HFmode)
10915             {
10916               /* Pre v8, widening HF->DF is a two-step process, first
10917                  widening to SFmode.  */
10918               *cost += COSTS_N_INSNS (1);
10919               if (speed_p)
10920                 *cost += extra_cost->fp[0].widen;
10921             }
10922           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10923           return true;
10924         }
10925
10926       *cost = LIBCALL_COST (1);
10927       return false;
10928
10929     case FLOAT_TRUNCATE:
10930       if (TARGET_HARD_FLOAT)
10931         {
10932           if (speed_p)
10933             *cost += extra_cost->fp[mode == DFmode].narrow;
10934           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10935           return true;
10936           /* Vector modes?  */
10937         }
10938       *cost = LIBCALL_COST (1);
10939       return false;
10940
10941     case FMA:
10942       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10943         {
10944           rtx op0 = XEXP (x, 0);
10945           rtx op1 = XEXP (x, 1);
10946           rtx op2 = XEXP (x, 2);
10947
10948
10949           /* vfms or vfnma.  */
10950           if (GET_CODE (op0) == NEG)
10951             op0 = XEXP (op0, 0);
10952
10953           /* vfnms or vfnma.  */
10954           if (GET_CODE (op2) == NEG)
10955             op2 = XEXP (op2, 0);
10956
10957           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10958           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10959           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10960
10961           if (speed_p)
10962             *cost += extra_cost->fp[mode ==DFmode].fma;
10963
10964           return true;
10965         }
10966
10967       *cost = LIBCALL_COST (3);
10968       return false;
10969
10970     case FIX:
10971     case UNSIGNED_FIX:
10972       if (TARGET_HARD_FLOAT)
10973         {
10974           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10975              a vcvt fixed-point conversion.  */
10976           if (code == FIX && mode == SImode
10977               && GET_CODE (XEXP (x, 0)) == FIX
10978               && GET_MODE (XEXP (x, 0)) == SFmode
10979               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10980               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10981                  > 0)
10982             {
10983               if (speed_p)
10984                 *cost += extra_cost->fp[0].toint;
10985
10986               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10987                                  code, 0, speed_p);
10988               return true;
10989             }
10990
10991           if (GET_MODE_CLASS (mode) == MODE_INT)
10992             {
10993               mode = GET_MODE (XEXP (x, 0));
10994               if (speed_p)
10995                 *cost += extra_cost->fp[mode == DFmode].toint;
10996               /* Strip of the 'cost' of rounding towards zero.  */
10997               if (GET_CODE (XEXP (x, 0)) == FIX)
10998                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10999                                    0, speed_p);
11000               else
11001                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11002               /* ??? Increase the cost to deal with transferring from
11003                  FP -> CORE registers?  */
11004               return true;
11005             }
11006           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11007                    && TARGET_VFP5)
11008             {
11009               if (speed_p)
11010                 *cost += extra_cost->fp[mode == DFmode].roundint;
11011               return false;
11012             }
11013           /* Vector costs? */
11014         }
11015       *cost = LIBCALL_COST (1);
11016       return false;
11017
11018     case FLOAT:
11019     case UNSIGNED_FLOAT:
11020       if (TARGET_HARD_FLOAT)
11021         {
11022           /* ??? Increase the cost to deal with transferring from CORE
11023              -> FP registers?  */
11024           if (speed_p)
11025             *cost += extra_cost->fp[mode == DFmode].fromint;
11026           return false;
11027         }
11028       *cost = LIBCALL_COST (1);
11029       return false;
11030
11031     case CALL:
11032       return true;
11033
11034     case ASM_OPERANDS:
11035       {
11036       /* Just a guess.  Guess number of instructions in the asm
11037          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11038          though (see PR60663).  */
11039         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11040         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11041
11042         *cost = COSTS_N_INSNS (asm_length + num_operands);
11043         return true;
11044       }
11045     default:
11046       if (mode != VOIDmode)
11047         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11048       else
11049         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11050       return false;
11051     }
11052 }
11053
11054 #undef HANDLE_NARROW_SHIFT_ARITH
11055
11056 /* RTX costs entry point.  */
11057
11058 static bool
11059 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11060                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11061 {
11062   bool result;
11063   int code = GET_CODE (x);
11064   gcc_assert (current_tune->insn_extra_cost);
11065
11066   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11067                                 (enum rtx_code) outer_code,
11068                                 current_tune->insn_extra_cost,
11069                                 total, speed);
11070
11071   if (dump_file && (dump_flags & TDF_DETAILS))
11072     {
11073       print_rtl_single (dump_file, x);
11074       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11075                *total, result ? "final" : "partial");
11076     }
11077   return result;
11078 }
11079
11080 /* All address computations that can be done are free, but rtx cost returns
11081    the same for practically all of them.  So we weight the different types
11082    of address here in the order (most pref first):
11083    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11084 static inline int
11085 arm_arm_address_cost (rtx x)
11086 {
11087   enum rtx_code c  = GET_CODE (x);
11088
11089   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11090     return 0;
11091   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11092     return 10;
11093
11094   if (c == PLUS)
11095     {
11096       if (CONST_INT_P (XEXP (x, 1)))
11097         return 2;
11098
11099       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11100         return 3;
11101
11102       return 4;
11103     }
11104
11105   return 6;
11106 }
11107
11108 static inline int
11109 arm_thumb_address_cost (rtx x)
11110 {
11111   enum rtx_code c  = GET_CODE (x);
11112
11113   if (c == REG)
11114     return 1;
11115   if (c == PLUS
11116       && REG_P (XEXP (x, 0))
11117       && CONST_INT_P (XEXP (x, 1)))
11118     return 1;
11119
11120   return 2;
11121 }
11122
11123 static int
11124 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11125                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11126 {
11127   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11128 }
11129
11130 /* Adjust cost hook for XScale.  */
11131 static bool
11132 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11133                           int * cost)
11134 {
11135   /* Some true dependencies can have a higher cost depending
11136      on precisely how certain input operands are used.  */
11137   if (dep_type == 0
11138       && recog_memoized (insn) >= 0
11139       && recog_memoized (dep) >= 0)
11140     {
11141       int shift_opnum = get_attr_shift (insn);
11142       enum attr_type attr_type = get_attr_type (dep);
11143
11144       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11145          operand for INSN.  If we have a shifted input operand and the
11146          instruction we depend on is another ALU instruction, then we may
11147          have to account for an additional stall.  */
11148       if (shift_opnum != 0
11149           && (attr_type == TYPE_ALU_SHIFT_IMM
11150               || attr_type == TYPE_ALUS_SHIFT_IMM
11151               || attr_type == TYPE_LOGIC_SHIFT_IMM
11152               || attr_type == TYPE_LOGICS_SHIFT_IMM
11153               || attr_type == TYPE_ALU_SHIFT_REG
11154               || attr_type == TYPE_ALUS_SHIFT_REG
11155               || attr_type == TYPE_LOGIC_SHIFT_REG
11156               || attr_type == TYPE_LOGICS_SHIFT_REG
11157               || attr_type == TYPE_MOV_SHIFT
11158               || attr_type == TYPE_MVN_SHIFT
11159               || attr_type == TYPE_MOV_SHIFT_REG
11160               || attr_type == TYPE_MVN_SHIFT_REG))
11161         {
11162           rtx shifted_operand;
11163           int opno;
11164
11165           /* Get the shifted operand.  */
11166           extract_insn (insn);
11167           shifted_operand = recog_data.operand[shift_opnum];
11168
11169           /* Iterate over all the operands in DEP.  If we write an operand
11170              that overlaps with SHIFTED_OPERAND, then we have increase the
11171              cost of this dependency.  */
11172           extract_insn (dep);
11173           preprocess_constraints (dep);
11174           for (opno = 0; opno < recog_data.n_operands; opno++)
11175             {
11176               /* We can ignore strict inputs.  */
11177               if (recog_data.operand_type[opno] == OP_IN)
11178                 continue;
11179
11180               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11181                                            shifted_operand))
11182                 {
11183                   *cost = 2;
11184                   return false;
11185                 }
11186             }
11187         }
11188     }
11189   return true;
11190 }
11191
11192 /* Adjust cost hook for Cortex A9.  */
11193 static bool
11194 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11195                              int * cost)
11196 {
11197   switch (dep_type)
11198     {
11199     case REG_DEP_ANTI:
11200       *cost = 0;
11201       return false;
11202
11203     case REG_DEP_TRUE:
11204     case REG_DEP_OUTPUT:
11205         if (recog_memoized (insn) >= 0
11206             && recog_memoized (dep) >= 0)
11207           {
11208             if (GET_CODE (PATTERN (insn)) == SET)
11209               {
11210                 if (GET_MODE_CLASS
11211                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11212                   || GET_MODE_CLASS
11213                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11214                   {
11215                     enum attr_type attr_type_insn = get_attr_type (insn);
11216                     enum attr_type attr_type_dep = get_attr_type (dep);
11217
11218                     /* By default all dependencies of the form
11219                        s0 = s0 <op> s1
11220                        s0 = s0 <op> s2
11221                        have an extra latency of 1 cycle because
11222                        of the input and output dependency in this
11223                        case. However this gets modeled as an true
11224                        dependency and hence all these checks.  */
11225                     if (REG_P (SET_DEST (PATTERN (insn)))
11226                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11227                       {
11228                         /* FMACS is a special case where the dependent
11229                            instruction can be issued 3 cycles before
11230                            the normal latency in case of an output
11231                            dependency.  */
11232                         if ((attr_type_insn == TYPE_FMACS
11233                              || attr_type_insn == TYPE_FMACD)
11234                             && (attr_type_dep == TYPE_FMACS
11235                                 || attr_type_dep == TYPE_FMACD))
11236                           {
11237                             if (dep_type == REG_DEP_OUTPUT)
11238                               *cost = insn_default_latency (dep) - 3;
11239                             else
11240                               *cost = insn_default_latency (dep);
11241                             return false;
11242                           }
11243                         else
11244                           {
11245                             if (dep_type == REG_DEP_OUTPUT)
11246                               *cost = insn_default_latency (dep) + 1;
11247                             else
11248                               *cost = insn_default_latency (dep);
11249                           }
11250                         return false;
11251                       }
11252                   }
11253               }
11254           }
11255         break;
11256
11257     default:
11258       gcc_unreachable ();
11259     }
11260
11261   return true;
11262 }
11263
11264 /* Adjust cost hook for FA726TE.  */
11265 static bool
11266 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11267                            int * cost)
11268 {
11269   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11270      have penalty of 3.  */
11271   if (dep_type == REG_DEP_TRUE
11272       && recog_memoized (insn) >= 0
11273       && recog_memoized (dep) >= 0
11274       && get_attr_conds (dep) == CONDS_SET)
11275     {
11276       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11277       if (get_attr_conds (insn) == CONDS_USE
11278           && get_attr_type (insn) != TYPE_BRANCH)
11279         {
11280           *cost = 3;
11281           return false;
11282         }
11283
11284       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11285           || get_attr_conds (insn) == CONDS_USE)
11286         {
11287           *cost = 0;
11288           return false;
11289         }
11290     }
11291
11292   return true;
11293 }
11294
11295 /* Implement TARGET_REGISTER_MOVE_COST.
11296
11297    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11298    it is typically more expensive than a single memory access.  We set
11299    the cost to less than two memory accesses so that floating
11300    point to integer conversion does not go through memory.  */
11301
11302 int
11303 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11304                         reg_class_t from, reg_class_t to)
11305 {
11306   if (TARGET_32BIT)
11307     {
11308       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11309           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11310         return 15;
11311       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11312                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11313         return 4;
11314       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11315         return 20;
11316       else
11317         return 2;
11318     }
11319   else
11320     {
11321       if (from == HI_REGS || to == HI_REGS)
11322         return 4;
11323       else
11324         return 2;
11325     }
11326 }
11327
11328 /* Implement TARGET_MEMORY_MOVE_COST.  */
11329
11330 int
11331 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11332                       bool in ATTRIBUTE_UNUSED)
11333 {
11334   if (TARGET_32BIT)
11335     return 10;
11336   else
11337     {
11338       if (GET_MODE_SIZE (mode) < 4)
11339         return 8;
11340       else
11341         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11342     }
11343 }
11344
11345 /* Vectorizer cost model implementation.  */
11346
11347 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11348 static int
11349 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11350                                 tree vectype,
11351                                 int misalign ATTRIBUTE_UNUSED)
11352 {
11353   unsigned elements;
11354
11355   switch (type_of_cost)
11356     {
11357       case scalar_stmt:
11358         return current_tune->vec_costs->scalar_stmt_cost;
11359
11360       case scalar_load:
11361         return current_tune->vec_costs->scalar_load_cost;
11362
11363       case scalar_store:
11364         return current_tune->vec_costs->scalar_store_cost;
11365
11366       case vector_stmt:
11367         return current_tune->vec_costs->vec_stmt_cost;
11368
11369       case vector_load:
11370         return current_tune->vec_costs->vec_align_load_cost;
11371
11372       case vector_store:
11373         return current_tune->vec_costs->vec_store_cost;
11374
11375       case vec_to_scalar:
11376         return current_tune->vec_costs->vec_to_scalar_cost;
11377
11378       case scalar_to_vec:
11379         return current_tune->vec_costs->scalar_to_vec_cost;
11380
11381       case unaligned_load:
11382       case vector_gather_load:
11383         return current_tune->vec_costs->vec_unalign_load_cost;
11384
11385       case unaligned_store:
11386       case vector_scatter_store:
11387         return current_tune->vec_costs->vec_unalign_store_cost;
11388
11389       case cond_branch_taken:
11390         return current_tune->vec_costs->cond_taken_branch_cost;
11391
11392       case cond_branch_not_taken:
11393         return current_tune->vec_costs->cond_not_taken_branch_cost;
11394
11395       case vec_perm:
11396       case vec_promote_demote:
11397         return current_tune->vec_costs->vec_stmt_cost;
11398
11399       case vec_construct:
11400         elements = TYPE_VECTOR_SUBPARTS (vectype);
11401         return elements / 2 + 1;
11402
11403       default:
11404         gcc_unreachable ();
11405     }
11406 }
11407
11408 /* Implement targetm.vectorize.add_stmt_cost.  */
11409
11410 static unsigned
11411 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11412                    struct _stmt_vec_info *stmt_info, int misalign,
11413                    enum vect_cost_model_location where)
11414 {
11415   unsigned *cost = (unsigned *) data;
11416   unsigned retval = 0;
11417
11418   if (flag_vect_cost_model)
11419     {
11420       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11421       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11422
11423       /* Statements in an inner loop relative to the loop being
11424          vectorized are weighted more heavily.  The value here is
11425          arbitrary and could potentially be improved with analysis.  */
11426       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11427         count *= 50;  /* FIXME.  */
11428
11429       retval = (unsigned) (count * stmt_cost);
11430       cost[where] += retval;
11431     }
11432
11433   return retval;
11434 }
11435
11436 /* Return true if and only if this insn can dual-issue only as older.  */
11437 static bool
11438 cortexa7_older_only (rtx_insn *insn)
11439 {
11440   if (recog_memoized (insn) < 0)
11441     return false;
11442
11443   switch (get_attr_type (insn))
11444     {
11445     case TYPE_ALU_DSP_REG:
11446     case TYPE_ALU_SREG:
11447     case TYPE_ALUS_SREG:
11448     case TYPE_LOGIC_REG:
11449     case TYPE_LOGICS_REG:
11450     case TYPE_ADC_REG:
11451     case TYPE_ADCS_REG:
11452     case TYPE_ADR:
11453     case TYPE_BFM:
11454     case TYPE_REV:
11455     case TYPE_MVN_REG:
11456     case TYPE_SHIFT_IMM:
11457     case TYPE_SHIFT_REG:
11458     case TYPE_LOAD_BYTE:
11459     case TYPE_LOAD_4:
11460     case TYPE_STORE_4:
11461     case TYPE_FFARITHS:
11462     case TYPE_FADDS:
11463     case TYPE_FFARITHD:
11464     case TYPE_FADDD:
11465     case TYPE_FMOV:
11466     case TYPE_F_CVT:
11467     case TYPE_FCMPS:
11468     case TYPE_FCMPD:
11469     case TYPE_FCONSTS:
11470     case TYPE_FCONSTD:
11471     case TYPE_FMULS:
11472     case TYPE_FMACS:
11473     case TYPE_FMULD:
11474     case TYPE_FMACD:
11475     case TYPE_FDIVS:
11476     case TYPE_FDIVD:
11477     case TYPE_F_MRC:
11478     case TYPE_F_MRRC:
11479     case TYPE_F_FLAG:
11480     case TYPE_F_LOADS:
11481     case TYPE_F_STORES:
11482       return true;
11483     default:
11484       return false;
11485     }
11486 }
11487
11488 /* Return true if and only if this insn can dual-issue as younger.  */
11489 static bool
11490 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11491 {
11492   if (recog_memoized (insn) < 0)
11493     {
11494       if (verbose > 5)
11495         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11496       return false;
11497     }
11498
11499   switch (get_attr_type (insn))
11500     {
11501     case TYPE_ALU_IMM:
11502     case TYPE_ALUS_IMM:
11503     case TYPE_LOGIC_IMM:
11504     case TYPE_LOGICS_IMM:
11505     case TYPE_EXTEND:
11506     case TYPE_MVN_IMM:
11507     case TYPE_MOV_IMM:
11508     case TYPE_MOV_REG:
11509     case TYPE_MOV_SHIFT:
11510     case TYPE_MOV_SHIFT_REG:
11511     case TYPE_BRANCH:
11512     case TYPE_CALL:
11513       return true;
11514     default:
11515       return false;
11516     }
11517 }
11518
11519
11520 /* Look for an instruction that can dual issue only as an older
11521    instruction, and move it in front of any instructions that can
11522    dual-issue as younger, while preserving the relative order of all
11523    other instructions in the ready list.  This is a hueuristic to help
11524    dual-issue in later cycles, by postponing issue of more flexible
11525    instructions.  This heuristic may affect dual issue opportunities
11526    in the current cycle.  */
11527 static void
11528 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11529                         int *n_readyp, int clock)
11530 {
11531   int i;
11532   int first_older_only = -1, first_younger = -1;
11533
11534   if (verbose > 5)
11535     fprintf (file,
11536              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11537              clock,
11538              *n_readyp);
11539
11540   /* Traverse the ready list from the head (the instruction to issue
11541      first), and looking for the first instruction that can issue as
11542      younger and the first instruction that can dual-issue only as
11543      older.  */
11544   for (i = *n_readyp - 1; i >= 0; i--)
11545     {
11546       rtx_insn *insn = ready[i];
11547       if (cortexa7_older_only (insn))
11548         {
11549           first_older_only = i;
11550           if (verbose > 5)
11551             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11552           break;
11553         }
11554       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11555         first_younger = i;
11556     }
11557
11558   /* Nothing to reorder because either no younger insn found or insn
11559      that can dual-issue only as older appears before any insn that
11560      can dual-issue as younger.  */
11561   if (first_younger == -1)
11562     {
11563       if (verbose > 5)
11564         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11565       return;
11566     }
11567
11568   /* Nothing to reorder because no older-only insn in the ready list.  */
11569   if (first_older_only == -1)
11570     {
11571       if (verbose > 5)
11572         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11573       return;
11574     }
11575
11576   /* Move first_older_only insn before first_younger.  */
11577   if (verbose > 5)
11578     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11579              INSN_UID(ready [first_older_only]),
11580              INSN_UID(ready [first_younger]));
11581   rtx_insn *first_older_only_insn = ready [first_older_only];
11582   for (i = first_older_only; i < first_younger; i++)
11583     {
11584       ready[i] = ready[i+1];
11585     }
11586
11587   ready[i] = first_older_only_insn;
11588   return;
11589 }
11590
11591 /* Implement TARGET_SCHED_REORDER. */
11592 static int
11593 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11594                    int clock)
11595 {
11596   switch (arm_tune)
11597     {
11598     case TARGET_CPU_cortexa7:
11599       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11600       break;
11601     default:
11602       /* Do nothing for other cores.  */
11603       break;
11604     }
11605
11606   return arm_issue_rate ();
11607 }
11608
11609 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11610    It corrects the value of COST based on the relationship between
11611    INSN and DEP through the dependence LINK.  It returns the new
11612    value. There is a per-core adjust_cost hook to adjust scheduler costs
11613    and the per-core hook can choose to completely override the generic
11614    adjust_cost function. Only put bits of code into arm_adjust_cost that
11615    are common across all cores.  */
11616 static int
11617 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11618                  unsigned int)
11619 {
11620   rtx i_pat, d_pat;
11621
11622  /* When generating Thumb-1 code, we want to place flag-setting operations
11623     close to a conditional branch which depends on them, so that we can
11624     omit the comparison. */
11625   if (TARGET_THUMB1
11626       && dep_type == 0
11627       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11628       && recog_memoized (dep) >= 0
11629       && get_attr_conds (dep) == CONDS_SET)
11630     return 0;
11631
11632   if (current_tune->sched_adjust_cost != NULL)
11633     {
11634       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11635         return cost;
11636     }
11637
11638   /* XXX Is this strictly true?  */
11639   if (dep_type == REG_DEP_ANTI
11640       || dep_type == REG_DEP_OUTPUT)
11641     return 0;
11642
11643   /* Call insns don't incur a stall, even if they follow a load.  */
11644   if (dep_type == 0
11645       && CALL_P (insn))
11646     return 1;
11647
11648   if ((i_pat = single_set (insn)) != NULL
11649       && MEM_P (SET_SRC (i_pat))
11650       && (d_pat = single_set (dep)) != NULL
11651       && MEM_P (SET_DEST (d_pat)))
11652     {
11653       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11654       /* This is a load after a store, there is no conflict if the load reads
11655          from a cached area.  Assume that loads from the stack, and from the
11656          constant pool are cached, and that others will miss.  This is a
11657          hack.  */
11658
11659       if ((GET_CODE (src_mem) == SYMBOL_REF
11660            && CONSTANT_POOL_ADDRESS_P (src_mem))
11661           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11662           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11663           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11664         return 1;
11665     }
11666
11667   return cost;
11668 }
11669
11670 int
11671 arm_max_conditional_execute (void)
11672 {
11673   return max_insns_skipped;
11674 }
11675
11676 static int
11677 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11678 {
11679   if (TARGET_32BIT)
11680     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11681   else
11682     return (optimize > 0) ? 2 : 0;
11683 }
11684
11685 static int
11686 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11687 {
11688   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11689 }
11690
11691 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11692    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11693    sequences of non-executed instructions in IT blocks probably take the same
11694    amount of time as executed instructions (and the IT instruction itself takes
11695    space in icache).  This function was experimentally determined to give good
11696    results on a popular embedded benchmark.  */
11697
11698 static int
11699 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11700 {
11701   return (TARGET_32BIT && speed_p) ? 1
11702          : arm_default_branch_cost (speed_p, predictable_p);
11703 }
11704
11705 static int
11706 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11707 {
11708   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11709 }
11710
11711 static bool fp_consts_inited = false;
11712
11713 static REAL_VALUE_TYPE value_fp0;
11714
11715 static void
11716 init_fp_table (void)
11717 {
11718   REAL_VALUE_TYPE r;
11719
11720   r = REAL_VALUE_ATOF ("0", DFmode);
11721   value_fp0 = r;
11722   fp_consts_inited = true;
11723 }
11724
11725 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11726 int
11727 arm_const_double_rtx (rtx x)
11728 {
11729   const REAL_VALUE_TYPE *r;
11730
11731   if (!fp_consts_inited)
11732     init_fp_table ();
11733
11734   r = CONST_DOUBLE_REAL_VALUE (x);
11735   if (REAL_VALUE_MINUS_ZERO (*r))
11736     return 0;
11737
11738   if (real_equal (r, &value_fp0))
11739     return 1;
11740
11741   return 0;
11742 }
11743
11744 /* VFPv3 has a fairly wide range of representable immediates, formed from
11745    "quarter-precision" floating-point values. These can be evaluated using this
11746    formula (with ^ for exponentiation):
11747
11748      -1^s * n * 2^-r
11749
11750    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11751    16 <= n <= 31 and 0 <= r <= 7.
11752
11753    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11754
11755      - A (most-significant) is the sign bit.
11756      - BCD are the exponent (encoded as r XOR 3).
11757      - EFGH are the mantissa (encoded as n - 16).
11758 */
11759
11760 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11761    fconst[sd] instruction, or -1 if X isn't suitable.  */
11762 static int
11763 vfp3_const_double_index (rtx x)
11764 {
11765   REAL_VALUE_TYPE r, m;
11766   int sign, exponent;
11767   unsigned HOST_WIDE_INT mantissa, mant_hi;
11768   unsigned HOST_WIDE_INT mask;
11769   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11770   bool fail;
11771
11772   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11773     return -1;
11774
11775   r = *CONST_DOUBLE_REAL_VALUE (x);
11776
11777   /* We can't represent these things, so detect them first.  */
11778   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11779     return -1;
11780
11781   /* Extract sign, exponent and mantissa.  */
11782   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11783   r = real_value_abs (&r);
11784   exponent = REAL_EXP (&r);
11785   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11786      highest (sign) bit, with a fixed binary point at bit point_pos.
11787      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11788      bits for the mantissa, this may fail (low bits would be lost).  */
11789   real_ldexp (&m, &r, point_pos - exponent);
11790   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11791   mantissa = w.elt (0);
11792   mant_hi = w.elt (1);
11793
11794   /* If there are bits set in the low part of the mantissa, we can't
11795      represent this value.  */
11796   if (mantissa != 0)
11797     return -1;
11798
11799   /* Now make it so that mantissa contains the most-significant bits, and move
11800      the point_pos to indicate that the least-significant bits have been
11801      discarded.  */
11802   point_pos -= HOST_BITS_PER_WIDE_INT;
11803   mantissa = mant_hi;
11804
11805   /* We can permit four significant bits of mantissa only, plus a high bit
11806      which is always 1.  */
11807   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11808   if ((mantissa & mask) != 0)
11809     return -1;
11810
11811   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11812   mantissa >>= point_pos - 5;
11813
11814   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11815      floating-point immediate zero with Neon using an integer-zero load, but
11816      that case is handled elsewhere.)  */
11817   if (mantissa == 0)
11818     return -1;
11819
11820   gcc_assert (mantissa >= 16 && mantissa <= 31);
11821
11822   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11823      normalized significands are in the range [1, 2). (Our mantissa is shifted
11824      left 4 places at this point relative to normalized IEEE754 values).  GCC
11825      internally uses [0.5, 1) (see real.c), so the exponent returned from
11826      REAL_EXP must be altered.  */
11827   exponent = 5 - exponent;
11828
11829   if (exponent < 0 || exponent > 7)
11830     return -1;
11831
11832   /* Sign, mantissa and exponent are now in the correct form to plug into the
11833      formula described in the comment above.  */
11834   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11835 }
11836
11837 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11838 int
11839 vfp3_const_double_rtx (rtx x)
11840 {
11841   if (!TARGET_VFP3)
11842     return 0;
11843
11844   return vfp3_const_double_index (x) != -1;
11845 }
11846
11847 /* Recognize immediates which can be used in various Neon instructions. Legal
11848    immediates are described by the following table (for VMVN variants, the
11849    bitwise inverse of the constant shown is recognized. In either case, VMOV
11850    is output and the correct instruction to use for a given constant is chosen
11851    by the assembler). The constant shown is replicated across all elements of
11852    the destination vector.
11853
11854    insn elems variant constant (binary)
11855    ---- ----- ------- -----------------
11856    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11857    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11858    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11859    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11860    vmov  i16     4    00000000 abcdefgh
11861    vmov  i16     5    abcdefgh 00000000
11862    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11863    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11864    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11865    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11866    vmvn  i16    10    00000000 abcdefgh
11867    vmvn  i16    11    abcdefgh 00000000
11868    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11869    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11870    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11871    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11872    vmov   i8    16    abcdefgh
11873    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11874                       eeeeeeee ffffffff gggggggg hhhhhhhh
11875    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11876    vmov  f32    19    00000000 00000000 00000000 00000000
11877
11878    For case 18, B = !b. Representable values are exactly those accepted by
11879    vfp3_const_double_index, but are output as floating-point numbers rather
11880    than indices.
11881
11882    For case 19, we will change it to vmov.i32 when assembling.
11883
11884    Variants 0-5 (inclusive) may also be used as immediates for the second
11885    operand of VORR/VBIC instructions.
11886
11887    The INVERSE argument causes the bitwise inverse of the given operand to be
11888    recognized instead (used for recognizing legal immediates for the VAND/VORN
11889    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11890    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11891    output, rather than the real insns vbic/vorr).
11892
11893    INVERSE makes no difference to the recognition of float vectors.
11894
11895    The return value is the variant of immediate as shown in the above table, or
11896    -1 if the given value doesn't match any of the listed patterns.
11897 */
11898 static int
11899 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11900                       rtx *modconst, int *elementwidth)
11901 {
11902 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11903   matches = 1;                                  \
11904   for (i = 0; i < idx; i += (STRIDE))           \
11905     if (!(TEST))                                \
11906       matches = 0;                              \
11907   if (matches)                                  \
11908     {                                           \
11909       immtype = (CLASS);                        \
11910       elsize = (ELSIZE);                        \
11911       break;                                    \
11912     }
11913
11914   unsigned int i, elsize = 0, idx = 0, n_elts;
11915   unsigned int innersize;
11916   unsigned char bytes[16];
11917   int immtype = -1, matches;
11918   unsigned int invmask = inverse ? 0xff : 0;
11919   bool vector = GET_CODE (op) == CONST_VECTOR;
11920
11921   if (vector)
11922     n_elts = CONST_VECTOR_NUNITS (op);
11923   else
11924     {
11925       n_elts = 1;
11926       if (mode == VOIDmode)
11927         mode = DImode;
11928     }
11929
11930   innersize = GET_MODE_UNIT_SIZE (mode);
11931
11932   /* Vectors of float constants.  */
11933   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11934     {
11935       rtx el0 = CONST_VECTOR_ELT (op, 0);
11936
11937       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11938         return -1;
11939
11940       /* FP16 vectors cannot be represented.  */
11941       if (GET_MODE_INNER (mode) == HFmode)
11942         return -1;
11943
11944       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11945          are distinct in this context.  */
11946       if (!const_vec_duplicate_p (op))
11947         return -1;
11948
11949       if (modconst)
11950         *modconst = CONST_VECTOR_ELT (op, 0);
11951
11952       if (elementwidth)
11953         *elementwidth = 0;
11954
11955       if (el0 == CONST0_RTX (GET_MODE (el0)))
11956         return 19;
11957       else
11958         return 18;
11959     }
11960
11961   /* The tricks done in the code below apply for little-endian vector layout.
11962      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11963      FIXME: Implement logic for big-endian vectors.  */
11964   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11965     return -1;
11966
11967   /* Splat vector constant out into a byte vector.  */
11968   for (i = 0; i < n_elts; i++)
11969     {
11970       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11971       unsigned HOST_WIDE_INT elpart;
11972
11973       gcc_assert (CONST_INT_P (el));
11974       elpart = INTVAL (el);
11975
11976       for (unsigned int byte = 0; byte < innersize; byte++)
11977         {
11978           bytes[idx++] = (elpart & 0xff) ^ invmask;
11979           elpart >>= BITS_PER_UNIT;
11980         }
11981     }
11982
11983   /* Sanity check.  */
11984   gcc_assert (idx == GET_MODE_SIZE (mode));
11985
11986   do
11987     {
11988       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11989                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11990
11991       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11992                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11993
11994       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11995                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11996
11997       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11998                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11999
12000       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12001
12002       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12003
12004       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12005                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12006
12007       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12008                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12009
12010       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12011                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12012
12013       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12014                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12015
12016       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12017
12018       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12019
12020       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12021                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12022
12023       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12024                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12025
12026       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12027                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12028
12029       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12030                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12031
12032       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12033
12034       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12035                         && bytes[i] == bytes[(i + 8) % idx]);
12036     }
12037   while (0);
12038
12039   if (immtype == -1)
12040     return -1;
12041
12042   if (elementwidth)
12043     *elementwidth = elsize;
12044
12045   if (modconst)
12046     {
12047       unsigned HOST_WIDE_INT imm = 0;
12048
12049       /* Un-invert bytes of recognized vector, if necessary.  */
12050       if (invmask != 0)
12051         for (i = 0; i < idx; i++)
12052           bytes[i] ^= invmask;
12053
12054       if (immtype == 17)
12055         {
12056           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12057           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12058
12059           for (i = 0; i < 8; i++)
12060             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12061                    << (i * BITS_PER_UNIT);
12062
12063           *modconst = GEN_INT (imm);
12064         }
12065       else
12066         {
12067           unsigned HOST_WIDE_INT imm = 0;
12068
12069           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12070             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12071
12072           *modconst = GEN_INT (imm);
12073         }
12074     }
12075
12076   return immtype;
12077 #undef CHECK
12078 }
12079
12080 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12081    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12082    float elements), and a modified constant (whatever should be output for a
12083    VMOV) in *MODCONST.  */
12084
12085 int
12086 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12087                                rtx *modconst, int *elementwidth)
12088 {
12089   rtx tmpconst;
12090   int tmpwidth;
12091   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12092
12093   if (retval == -1)
12094     return 0;
12095
12096   if (modconst)
12097     *modconst = tmpconst;
12098
12099   if (elementwidth)
12100     *elementwidth = tmpwidth;
12101
12102   return 1;
12103 }
12104
12105 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12106    the immediate is valid, write a constant suitable for using as an operand
12107    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12108    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12109
12110 int
12111 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12112                                 rtx *modconst, int *elementwidth)
12113 {
12114   rtx tmpconst;
12115   int tmpwidth;
12116   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12117
12118   if (retval < 0 || retval > 5)
12119     return 0;
12120
12121   if (modconst)
12122     *modconst = tmpconst;
12123
12124   if (elementwidth)
12125     *elementwidth = tmpwidth;
12126
12127   return 1;
12128 }
12129
12130 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12131    the immediate is valid, write a constant suitable for using as an operand
12132    to VSHR/VSHL to *MODCONST and the corresponding element width to
12133    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12134    because they have different limitations.  */
12135
12136 int
12137 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12138                                 rtx *modconst, int *elementwidth,
12139                                 bool isleftshift)
12140 {
12141   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12142   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12143   unsigned HOST_WIDE_INT last_elt = 0;
12144   unsigned HOST_WIDE_INT maxshift;
12145
12146   /* Split vector constant out into a byte vector.  */
12147   for (i = 0; i < n_elts; i++)
12148     {
12149       rtx el = CONST_VECTOR_ELT (op, i);
12150       unsigned HOST_WIDE_INT elpart;
12151
12152       if (CONST_INT_P (el))
12153         elpart = INTVAL (el);
12154       else if (CONST_DOUBLE_P (el))
12155         return 0;
12156       else
12157         gcc_unreachable ();
12158
12159       if (i != 0 && elpart != last_elt)
12160         return 0;
12161
12162       last_elt = elpart;
12163     }
12164
12165   /* Shift less than element size.  */
12166   maxshift = innersize * 8;
12167
12168   if (isleftshift)
12169     {
12170       /* Left shift immediate value can be from 0 to <size>-1.  */
12171       if (last_elt >= maxshift)
12172         return 0;
12173     }
12174   else
12175     {
12176       /* Right shift immediate value can be from 1 to <size>.  */
12177       if (last_elt == 0 || last_elt > maxshift)
12178         return 0;
12179     }
12180
12181   if (elementwidth)
12182     *elementwidth = innersize * 8;
12183
12184   if (modconst)
12185     *modconst = CONST_VECTOR_ELT (op, 0);
12186
12187   return 1;
12188 }
12189
12190 /* Return a string suitable for output of Neon immediate logic operation
12191    MNEM.  */
12192
12193 char *
12194 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12195                              int inverse, int quad)
12196 {
12197   int width, is_valid;
12198   static char templ[40];
12199
12200   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12201
12202   gcc_assert (is_valid != 0);
12203
12204   if (quad)
12205     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12206   else
12207     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12208
12209   return templ;
12210 }
12211
12212 /* Return a string suitable for output of Neon immediate shift operation
12213    (VSHR or VSHL) MNEM.  */
12214
12215 char *
12216 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12217                              machine_mode mode, int quad,
12218                              bool isleftshift)
12219 {
12220   int width, is_valid;
12221   static char templ[40];
12222
12223   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12224   gcc_assert (is_valid != 0);
12225
12226   if (quad)
12227     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12228   else
12229     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12230
12231   return templ;
12232 }
12233
12234 /* Output a sequence of pairwise operations to implement a reduction.
12235    NOTE: We do "too much work" here, because pairwise operations work on two
12236    registers-worth of operands in one go. Unfortunately we can't exploit those
12237    extra calculations to do the full operation in fewer steps, I don't think.
12238    Although all vector elements of the result but the first are ignored, we
12239    actually calculate the same result in each of the elements. An alternative
12240    such as initially loading a vector with zero to use as each of the second
12241    operands would use up an additional register and take an extra instruction,
12242    for no particular gain.  */
12243
12244 void
12245 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12246                       rtx (*reduc) (rtx, rtx, rtx))
12247 {
12248   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12249   rtx tmpsum = op1;
12250
12251   for (i = parts / 2; i >= 1; i /= 2)
12252     {
12253       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12254       emit_insn (reduc (dest, tmpsum, tmpsum));
12255       tmpsum = dest;
12256     }
12257 }
12258
12259 /* If VALS is a vector constant that can be loaded into a register
12260    using VDUP, generate instructions to do so and return an RTX to
12261    assign to the register.  Otherwise return NULL_RTX.  */
12262
12263 static rtx
12264 neon_vdup_constant (rtx vals)
12265 {
12266   machine_mode mode = GET_MODE (vals);
12267   machine_mode inner_mode = GET_MODE_INNER (mode);
12268   rtx x;
12269
12270   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12271     return NULL_RTX;
12272
12273   if (!const_vec_duplicate_p (vals, &x))
12274     /* The elements are not all the same.  We could handle repeating
12275        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12276        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12277        vdup.i16).  */
12278     return NULL_RTX;
12279
12280   /* We can load this constant by using VDUP and a constant in a
12281      single ARM register.  This will be cheaper than a vector
12282      load.  */
12283
12284   x = copy_to_mode_reg (inner_mode, x);
12285   return gen_vec_duplicate (mode, x);
12286 }
12287
12288 /* Generate code to load VALS, which is a PARALLEL containing only
12289    constants (for vec_init) or CONST_VECTOR, efficiently into a
12290    register.  Returns an RTX to copy into the register, or NULL_RTX
12291    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12292
12293 rtx
12294 neon_make_constant (rtx vals)
12295 {
12296   machine_mode mode = GET_MODE (vals);
12297   rtx target;
12298   rtx const_vec = NULL_RTX;
12299   int n_elts = GET_MODE_NUNITS (mode);
12300   int n_const = 0;
12301   int i;
12302
12303   if (GET_CODE (vals) == CONST_VECTOR)
12304     const_vec = vals;
12305   else if (GET_CODE (vals) == PARALLEL)
12306     {
12307       /* A CONST_VECTOR must contain only CONST_INTs and
12308          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12309          Only store valid constants in a CONST_VECTOR.  */
12310       for (i = 0; i < n_elts; ++i)
12311         {
12312           rtx x = XVECEXP (vals, 0, i);
12313           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12314             n_const++;
12315         }
12316       if (n_const == n_elts)
12317         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12318     }
12319   else
12320     gcc_unreachable ();
12321
12322   if (const_vec != NULL
12323       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12324     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12325     return const_vec;
12326   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12327     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12328        pipeline cycle; creating the constant takes one or two ARM
12329        pipeline cycles.  */
12330     return target;
12331   else if (const_vec != NULL_RTX)
12332     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12333        (for either double or quad vectors).  We can not take advantage
12334        of single-cycle VLD1 because we need a PC-relative addressing
12335        mode.  */
12336     return const_vec;
12337   else
12338     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12339        We can not construct an initializer.  */
12340     return NULL_RTX;
12341 }
12342
12343 /* Initialize vector TARGET to VALS.  */
12344
12345 void
12346 neon_expand_vector_init (rtx target, rtx vals)
12347 {
12348   machine_mode mode = GET_MODE (target);
12349   machine_mode inner_mode = GET_MODE_INNER (mode);
12350   int n_elts = GET_MODE_NUNITS (mode);
12351   int n_var = 0, one_var = -1;
12352   bool all_same = true;
12353   rtx x, mem;
12354   int i;
12355
12356   for (i = 0; i < n_elts; ++i)
12357     {
12358       x = XVECEXP (vals, 0, i);
12359       if (!CONSTANT_P (x))
12360         ++n_var, one_var = i;
12361
12362       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12363         all_same = false;
12364     }
12365
12366   if (n_var == 0)
12367     {
12368       rtx constant = neon_make_constant (vals);
12369       if (constant != NULL_RTX)
12370         {
12371           emit_move_insn (target, constant);
12372           return;
12373         }
12374     }
12375
12376   /* Splat a single non-constant element if we can.  */
12377   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12378     {
12379       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12380       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12381       return;
12382     }
12383
12384   /* One field is non-constant.  Load constant then overwrite varying
12385      field.  This is more efficient than using the stack.  */
12386   if (n_var == 1)
12387     {
12388       rtx copy = copy_rtx (vals);
12389       rtx index = GEN_INT (one_var);
12390
12391       /* Load constant part of vector, substitute neighboring value for
12392          varying element.  */
12393       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12394       neon_expand_vector_init (target, copy);
12395
12396       /* Insert variable.  */
12397       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12398       switch (mode)
12399         {
12400         case E_V8QImode:
12401           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12402           break;
12403         case E_V16QImode:
12404           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12405           break;
12406         case E_V4HImode:
12407           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12408           break;
12409         case E_V8HImode:
12410           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12411           break;
12412         case E_V2SImode:
12413           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12414           break;
12415         case E_V4SImode:
12416           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12417           break;
12418         case E_V2SFmode:
12419           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12420           break;
12421         case E_V4SFmode:
12422           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12423           break;
12424         case E_V2DImode:
12425           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12426           break;
12427         default:
12428           gcc_unreachable ();
12429         }
12430       return;
12431     }
12432
12433   /* Construct the vector in memory one field at a time
12434      and load the whole vector.  */
12435   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12436   for (i = 0; i < n_elts; i++)
12437     emit_move_insn (adjust_address_nv (mem, inner_mode,
12438                                     i * GET_MODE_SIZE (inner_mode)),
12439                     XVECEXP (vals, 0, i));
12440   emit_move_insn (target, mem);
12441 }
12442
12443 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12444    ERR if it doesn't.  EXP indicates the source location, which includes the
12445    inlining history for intrinsics.  */
12446
12447 static void
12448 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12449               const_tree exp, const char *desc)
12450 {
12451   HOST_WIDE_INT lane;
12452
12453   gcc_assert (CONST_INT_P (operand));
12454
12455   lane = INTVAL (operand);
12456
12457   if (lane < low || lane >= high)
12458     {
12459       if (exp)
12460         error ("%K%s %wd out of range %wd - %wd",
12461                exp, desc, lane, low, high - 1);
12462       else
12463         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12464     }
12465 }
12466
12467 /* Bounds-check lanes.  */
12468
12469 void
12470 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12471                   const_tree exp)
12472 {
12473   bounds_check (operand, low, high, exp, "lane");
12474 }
12475
12476 /* Bounds-check constants.  */
12477
12478 void
12479 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12480 {
12481   bounds_check (operand, low, high, NULL_TREE, "constant");
12482 }
12483
12484 HOST_WIDE_INT
12485 neon_element_bits (machine_mode mode)
12486 {
12487   return GET_MODE_UNIT_BITSIZE (mode);
12488 }
12489
12490 \f
12491 /* Predicates for `match_operand' and `match_operator'.  */
12492
12493 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12494    WB is true if full writeback address modes are allowed and is false
12495    if limited writeback address modes (POST_INC and PRE_DEC) are
12496    allowed.  */
12497
12498 int
12499 arm_coproc_mem_operand (rtx op, bool wb)
12500 {
12501   rtx ind;
12502
12503   /* Reject eliminable registers.  */
12504   if (! (reload_in_progress || reload_completed || lra_in_progress)
12505       && (   reg_mentioned_p (frame_pointer_rtx, op)
12506           || reg_mentioned_p (arg_pointer_rtx, op)
12507           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12508           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12509           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12510           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12511     return FALSE;
12512
12513   /* Constants are converted into offsets from labels.  */
12514   if (!MEM_P (op))
12515     return FALSE;
12516
12517   ind = XEXP (op, 0);
12518
12519   if (reload_completed
12520       && (GET_CODE (ind) == LABEL_REF
12521           || (GET_CODE (ind) == CONST
12522               && GET_CODE (XEXP (ind, 0)) == PLUS
12523               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12524               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12525     return TRUE;
12526
12527   /* Match: (mem (reg)).  */
12528   if (REG_P (ind))
12529     return arm_address_register_rtx_p (ind, 0);
12530
12531   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12532      acceptable in any case (subject to verification by
12533      arm_address_register_rtx_p).  We need WB to be true to accept
12534      PRE_INC and POST_DEC.  */
12535   if (GET_CODE (ind) == POST_INC
12536       || GET_CODE (ind) == PRE_DEC
12537       || (wb
12538           && (GET_CODE (ind) == PRE_INC
12539               || GET_CODE (ind) == POST_DEC)))
12540     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12541
12542   if (wb
12543       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12544       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12545       && GET_CODE (XEXP (ind, 1)) == PLUS
12546       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12547     ind = XEXP (ind, 1);
12548
12549   /* Match:
12550      (plus (reg)
12551            (const)).  */
12552   if (GET_CODE (ind) == PLUS
12553       && REG_P (XEXP (ind, 0))
12554       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12555       && CONST_INT_P (XEXP (ind, 1))
12556       && INTVAL (XEXP (ind, 1)) > -1024
12557       && INTVAL (XEXP (ind, 1)) <  1024
12558       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12559     return TRUE;
12560
12561   return FALSE;
12562 }
12563
12564 /* Return TRUE if OP is a memory operand which we can load or store a vector
12565    to/from. TYPE is one of the following values:
12566     0 - Vector load/stor (vldr)
12567     1 - Core registers (ldm)
12568     2 - Element/structure loads (vld1)
12569  */
12570 int
12571 neon_vector_mem_operand (rtx op, int type, bool strict)
12572 {
12573   rtx ind;
12574
12575   /* Reject eliminable registers.  */
12576   if (strict && ! (reload_in_progress || reload_completed)
12577       && (reg_mentioned_p (frame_pointer_rtx, op)
12578           || reg_mentioned_p (arg_pointer_rtx, op)
12579           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12580           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12581           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12582           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12583     return FALSE;
12584
12585   /* Constants are converted into offsets from labels.  */
12586   if (!MEM_P (op))
12587     return FALSE;
12588
12589   ind = XEXP (op, 0);
12590
12591   if (reload_completed
12592       && (GET_CODE (ind) == LABEL_REF
12593           || (GET_CODE (ind) == CONST
12594               && GET_CODE (XEXP (ind, 0)) == PLUS
12595               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12596               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12597     return TRUE;
12598
12599   /* Match: (mem (reg)).  */
12600   if (REG_P (ind))
12601     return arm_address_register_rtx_p (ind, 0);
12602
12603   /* Allow post-increment with Neon registers.  */
12604   if ((type != 1 && GET_CODE (ind) == POST_INC)
12605       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12606     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12607
12608   /* Allow post-increment by register for VLDn */
12609   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12610       && GET_CODE (XEXP (ind, 1)) == PLUS
12611       && REG_P (XEXP (XEXP (ind, 1), 1)))
12612      return true;
12613
12614   /* Match:
12615      (plus (reg)
12616           (const)).  */
12617   if (type == 0
12618       && GET_CODE (ind) == PLUS
12619       && REG_P (XEXP (ind, 0))
12620       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12621       && CONST_INT_P (XEXP (ind, 1))
12622       && INTVAL (XEXP (ind, 1)) > -1024
12623       /* For quad modes, we restrict the constant offset to be slightly less
12624          than what the instruction format permits.  We have no such constraint
12625          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12626       && (INTVAL (XEXP (ind, 1))
12627           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12628       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12629     return TRUE;
12630
12631   return FALSE;
12632 }
12633
12634 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12635    type.  */
12636 int
12637 neon_struct_mem_operand (rtx op)
12638 {
12639   rtx ind;
12640
12641   /* Reject eliminable registers.  */
12642   if (! (reload_in_progress || reload_completed)
12643       && (   reg_mentioned_p (frame_pointer_rtx, op)
12644           || reg_mentioned_p (arg_pointer_rtx, op)
12645           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12646           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12647           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12648           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12649     return FALSE;
12650
12651   /* Constants are converted into offsets from labels.  */
12652   if (!MEM_P (op))
12653     return FALSE;
12654
12655   ind = XEXP (op, 0);
12656
12657   if (reload_completed
12658       && (GET_CODE (ind) == LABEL_REF
12659           || (GET_CODE (ind) == CONST
12660               && GET_CODE (XEXP (ind, 0)) == PLUS
12661               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12662               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12663     return TRUE;
12664
12665   /* Match: (mem (reg)).  */
12666   if (REG_P (ind))
12667     return arm_address_register_rtx_p (ind, 0);
12668
12669   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12670   if (GET_CODE (ind) == POST_INC
12671       || GET_CODE (ind) == PRE_DEC)
12672     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12673
12674   return FALSE;
12675 }
12676
12677 /* Return true if X is a register that will be eliminated later on.  */
12678 int
12679 arm_eliminable_register (rtx x)
12680 {
12681   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12682                        || REGNO (x) == ARG_POINTER_REGNUM
12683                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12684                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12685 }
12686
12687 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12688    coprocessor registers.  Otherwise return NO_REGS.  */
12689
12690 enum reg_class
12691 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12692 {
12693   if (mode == HFmode)
12694     {
12695       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12696         return GENERAL_REGS;
12697       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12698         return NO_REGS;
12699       return GENERAL_REGS;
12700     }
12701
12702   /* The neon move patterns handle all legitimate vector and struct
12703      addresses.  */
12704   if (TARGET_NEON
12705       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12706       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12707           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12708           || VALID_NEON_STRUCT_MODE (mode)))
12709     return NO_REGS;
12710
12711   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12712     return NO_REGS;
12713
12714   return GENERAL_REGS;
12715 }
12716
12717 /* Values which must be returned in the most-significant end of the return
12718    register.  */
12719
12720 static bool
12721 arm_return_in_msb (const_tree valtype)
12722 {
12723   return (TARGET_AAPCS_BASED
12724           && BYTES_BIG_ENDIAN
12725           && (AGGREGATE_TYPE_P (valtype)
12726               || TREE_CODE (valtype) == COMPLEX_TYPE
12727               || FIXED_POINT_TYPE_P (valtype)));
12728 }
12729
12730 /* Return TRUE if X references a SYMBOL_REF.  */
12731 int
12732 symbol_mentioned_p (rtx x)
12733 {
12734   const char * fmt;
12735   int i;
12736
12737   if (GET_CODE (x) == SYMBOL_REF)
12738     return 1;
12739
12740   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12741      are constant offsets, not symbols.  */
12742   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12743     return 0;
12744
12745   fmt = GET_RTX_FORMAT (GET_CODE (x));
12746
12747   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12748     {
12749       if (fmt[i] == 'E')
12750         {
12751           int j;
12752
12753           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12754             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12755               return 1;
12756         }
12757       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12758         return 1;
12759     }
12760
12761   return 0;
12762 }
12763
12764 /* Return TRUE if X references a LABEL_REF.  */
12765 int
12766 label_mentioned_p (rtx x)
12767 {
12768   const char * fmt;
12769   int i;
12770
12771   if (GET_CODE (x) == LABEL_REF)
12772     return 1;
12773
12774   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12775      instruction, but they are constant offsets, not symbols.  */
12776   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12777     return 0;
12778
12779   fmt = GET_RTX_FORMAT (GET_CODE (x));
12780   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12781     {
12782       if (fmt[i] == 'E')
12783         {
12784           int j;
12785
12786           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12787             if (label_mentioned_p (XVECEXP (x, i, j)))
12788               return 1;
12789         }
12790       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12791         return 1;
12792     }
12793
12794   return 0;
12795 }
12796
12797 int
12798 tls_mentioned_p (rtx x)
12799 {
12800   switch (GET_CODE (x))
12801     {
12802     case CONST:
12803       return tls_mentioned_p (XEXP (x, 0));
12804
12805     case UNSPEC:
12806       if (XINT (x, 1) == UNSPEC_TLS)
12807         return 1;
12808
12809     /* Fall through.  */
12810     default:
12811       return 0;
12812     }
12813 }
12814
12815 /* Must not copy any rtx that uses a pc-relative address.
12816    Also, disallow copying of load-exclusive instructions that
12817    may appear after splitting of compare-and-swap-style operations
12818    so as to prevent those loops from being transformed away from their
12819    canonical forms (see PR 69904).  */
12820
12821 static bool
12822 arm_cannot_copy_insn_p (rtx_insn *insn)
12823 {
12824   /* The tls call insn cannot be copied, as it is paired with a data
12825      word.  */
12826   if (recog_memoized (insn) == CODE_FOR_tlscall)
12827     return true;
12828
12829   subrtx_iterator::array_type array;
12830   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12831     {
12832       const_rtx x = *iter;
12833       if (GET_CODE (x) == UNSPEC
12834           && (XINT (x, 1) == UNSPEC_PIC_BASE
12835               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12836         return true;
12837     }
12838
12839   rtx set = single_set (insn);
12840   if (set)
12841     {
12842       rtx src = SET_SRC (set);
12843       if (GET_CODE (src) == ZERO_EXTEND)
12844         src = XEXP (src, 0);
12845
12846       /* Catch the load-exclusive and load-acquire operations.  */
12847       if (GET_CODE (src) == UNSPEC_VOLATILE
12848           && (XINT (src, 1) == VUNSPEC_LL
12849               || XINT (src, 1) == VUNSPEC_LAX))
12850         return true;
12851     }
12852   return false;
12853 }
12854
12855 enum rtx_code
12856 minmax_code (rtx x)
12857 {
12858   enum rtx_code code = GET_CODE (x);
12859
12860   switch (code)
12861     {
12862     case SMAX:
12863       return GE;
12864     case SMIN:
12865       return LE;
12866     case UMIN:
12867       return LEU;
12868     case UMAX:
12869       return GEU;
12870     default:
12871       gcc_unreachable ();
12872     }
12873 }
12874
12875 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12876
12877 bool
12878 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12879                         int *mask, bool *signed_sat)
12880 {
12881   /* The high bound must be a power of two minus one.  */
12882   int log = exact_log2 (INTVAL (hi_bound) + 1);
12883   if (log == -1)
12884     return false;
12885
12886   /* The low bound is either zero (for usat) or one less than the
12887      negation of the high bound (for ssat).  */
12888   if (INTVAL (lo_bound) == 0)
12889     {
12890       if (mask)
12891         *mask = log;
12892       if (signed_sat)
12893         *signed_sat = false;
12894
12895       return true;
12896     }
12897
12898   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12899     {
12900       if (mask)
12901         *mask = log + 1;
12902       if (signed_sat)
12903         *signed_sat = true;
12904
12905       return true;
12906     }
12907
12908   return false;
12909 }
12910
12911 /* Return 1 if memory locations are adjacent.  */
12912 int
12913 adjacent_mem_locations (rtx a, rtx b)
12914 {
12915   /* We don't guarantee to preserve the order of these memory refs.  */
12916   if (volatile_refs_p (a) || volatile_refs_p (b))
12917     return 0;
12918
12919   if ((REG_P (XEXP (a, 0))
12920        || (GET_CODE (XEXP (a, 0)) == PLUS
12921            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12922       && (REG_P (XEXP (b, 0))
12923           || (GET_CODE (XEXP (b, 0)) == PLUS
12924               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12925     {
12926       HOST_WIDE_INT val0 = 0, val1 = 0;
12927       rtx reg0, reg1;
12928       int val_diff;
12929
12930       if (GET_CODE (XEXP (a, 0)) == PLUS)
12931         {
12932           reg0 = XEXP (XEXP (a, 0), 0);
12933           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12934         }
12935       else
12936         reg0 = XEXP (a, 0);
12937
12938       if (GET_CODE (XEXP (b, 0)) == PLUS)
12939         {
12940           reg1 = XEXP (XEXP (b, 0), 0);
12941           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12942         }
12943       else
12944         reg1 = XEXP (b, 0);
12945
12946       /* Don't accept any offset that will require multiple
12947          instructions to handle, since this would cause the
12948          arith_adjacentmem pattern to output an overlong sequence.  */
12949       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12950         return 0;
12951
12952       /* Don't allow an eliminable register: register elimination can make
12953          the offset too large.  */
12954       if (arm_eliminable_register (reg0))
12955         return 0;
12956
12957       val_diff = val1 - val0;
12958
12959       if (arm_ld_sched)
12960         {
12961           /* If the target has load delay slots, then there's no benefit
12962              to using an ldm instruction unless the offset is zero and
12963              we are optimizing for size.  */
12964           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12965                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12966                   && (val_diff == 4 || val_diff == -4));
12967         }
12968
12969       return ((REGNO (reg0) == REGNO (reg1))
12970               && (val_diff == 4 || val_diff == -4));
12971     }
12972
12973   return 0;
12974 }
12975
12976 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12977    for load operations, false for store operations.  CONSECUTIVE is true
12978    if the register numbers in the operation must be consecutive in the register
12979    bank. RETURN_PC is true if value is to be loaded in PC.
12980    The pattern we are trying to match for load is:
12981      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12982       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12983        :
12984        :
12985       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12986      ]
12987      where
12988      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12989      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12990      3.  If consecutive is TRUE, then for kth register being loaded,
12991          REGNO (R_dk) = REGNO (R_d0) + k.
12992    The pattern for store is similar.  */
12993 bool
12994 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12995                      bool consecutive, bool return_pc)
12996 {
12997   HOST_WIDE_INT count = XVECLEN (op, 0);
12998   rtx reg, mem, addr;
12999   unsigned regno;
13000   unsigned first_regno;
13001   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13002   rtx elt;
13003   bool addr_reg_in_reglist = false;
13004   bool update = false;
13005   int reg_increment;
13006   int offset_adj;
13007   int regs_per_val;
13008
13009   /* If not in SImode, then registers must be consecutive
13010      (e.g., VLDM instructions for DFmode).  */
13011   gcc_assert ((mode == SImode) || consecutive);
13012   /* Setting return_pc for stores is illegal.  */
13013   gcc_assert (!return_pc || load);
13014
13015   /* Set up the increments and the regs per val based on the mode.  */
13016   reg_increment = GET_MODE_SIZE (mode);
13017   regs_per_val = reg_increment / 4;
13018   offset_adj = return_pc ? 1 : 0;
13019
13020   if (count <= 1
13021       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13022       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13023     return false;
13024
13025   /* Check if this is a write-back.  */
13026   elt = XVECEXP (op, 0, offset_adj);
13027   if (GET_CODE (SET_SRC (elt)) == PLUS)
13028     {
13029       i++;
13030       base = 1;
13031       update = true;
13032
13033       /* The offset adjustment must be the number of registers being
13034          popped times the size of a single register.  */
13035       if (!REG_P (SET_DEST (elt))
13036           || !REG_P (XEXP (SET_SRC (elt), 0))
13037           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13038           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13039           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13040              ((count - 1 - offset_adj) * reg_increment))
13041         return false;
13042     }
13043
13044   i = i + offset_adj;
13045   base = base + offset_adj;
13046   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13047      success depends on the type: VLDM can do just one reg,
13048      LDM must do at least two.  */
13049   if ((count <= i) && (mode == SImode))
13050       return false;
13051
13052   elt = XVECEXP (op, 0, i - 1);
13053   if (GET_CODE (elt) != SET)
13054     return false;
13055
13056   if (load)
13057     {
13058       reg = SET_DEST (elt);
13059       mem = SET_SRC (elt);
13060     }
13061   else
13062     {
13063       reg = SET_SRC (elt);
13064       mem = SET_DEST (elt);
13065     }
13066
13067   if (!REG_P (reg) || !MEM_P (mem))
13068     return false;
13069
13070   regno = REGNO (reg);
13071   first_regno = regno;
13072   addr = XEXP (mem, 0);
13073   if (GET_CODE (addr) == PLUS)
13074     {
13075       if (!CONST_INT_P (XEXP (addr, 1)))
13076         return false;
13077
13078       offset = INTVAL (XEXP (addr, 1));
13079       addr = XEXP (addr, 0);
13080     }
13081
13082   if (!REG_P (addr))
13083     return false;
13084
13085   /* Don't allow SP to be loaded unless it is also the base register. It
13086      guarantees that SP is reset correctly when an LDM instruction
13087      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13088   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13089     return false;
13090
13091   for (; i < count; i++)
13092     {
13093       elt = XVECEXP (op, 0, i);
13094       if (GET_CODE (elt) != SET)
13095         return false;
13096
13097       if (load)
13098         {
13099           reg = SET_DEST (elt);
13100           mem = SET_SRC (elt);
13101         }
13102       else
13103         {
13104           reg = SET_SRC (elt);
13105           mem = SET_DEST (elt);
13106         }
13107
13108       if (!REG_P (reg)
13109           || GET_MODE (reg) != mode
13110           || REGNO (reg) <= regno
13111           || (consecutive
13112               && (REGNO (reg) !=
13113                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13114           /* Don't allow SP to be loaded unless it is also the base register. It
13115              guarantees that SP is reset correctly when an LDM instruction
13116              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13117           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13118           || !MEM_P (mem)
13119           || GET_MODE (mem) != mode
13120           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13121                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13122                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13123                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13124                    offset + (i - base) * reg_increment))
13125               && (!REG_P (XEXP (mem, 0))
13126                   || offset + (i - base) * reg_increment != 0)))
13127         return false;
13128
13129       regno = REGNO (reg);
13130       if (regno == REGNO (addr))
13131         addr_reg_in_reglist = true;
13132     }
13133
13134   if (load)
13135     {
13136       if (update && addr_reg_in_reglist)
13137         return false;
13138
13139       /* For Thumb-1, address register is always modified - either by write-back
13140          or by explicit load.  If the pattern does not describe an update,
13141          then the address register must be in the list of loaded registers.  */
13142       if (TARGET_THUMB1)
13143         return update || addr_reg_in_reglist;
13144     }
13145
13146   return true;
13147 }
13148
13149 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13150    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13151    instruction.  ADD_OFFSET is nonzero if the base address register needs
13152    to be modified with an add instruction before we can use it.  */
13153
13154 static bool
13155 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13156                                  int nops, HOST_WIDE_INT add_offset)
13157  {
13158   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13159      if the offset isn't small enough.  The reason 2 ldrs are faster
13160      is because these ARMs are able to do more than one cache access
13161      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13162      whilst the ARM8 has a double bandwidth cache.  This means that
13163      these cores can do both an instruction fetch and a data fetch in
13164      a single cycle, so the trick of calculating the address into a
13165      scratch register (one of the result regs) and then doing a load
13166      multiple actually becomes slower (and no smaller in code size).
13167      That is the transformation
13168
13169         ldr     rd1, [rbase + offset]
13170         ldr     rd2, [rbase + offset + 4]
13171
13172      to
13173
13174         add     rd1, rbase, offset
13175         ldmia   rd1, {rd1, rd2}
13176
13177      produces worse code -- '3 cycles + any stalls on rd2' instead of
13178      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13179      access per cycle, the first sequence could never complete in less
13180      than 6 cycles, whereas the ldm sequence would only take 5 and
13181      would make better use of sequential accesses if not hitting the
13182      cache.
13183
13184      We cheat here and test 'arm_ld_sched' which we currently know to
13185      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13186      changes, then the test below needs to be reworked.  */
13187   if (nops == 2 && arm_ld_sched && add_offset != 0)
13188     return false;
13189
13190   /* XScale has load-store double instructions, but they have stricter
13191      alignment requirements than load-store multiple, so we cannot
13192      use them.
13193
13194      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13195      the pipeline until completion.
13196
13197         NREGS           CYCLES
13198           1               3
13199           2               4
13200           3               5
13201           4               6
13202
13203      An ldr instruction takes 1-3 cycles, but does not block the
13204      pipeline.
13205
13206         NREGS           CYCLES
13207           1              1-3
13208           2              2-6
13209           3              3-9
13210           4              4-12
13211
13212      Best case ldr will always win.  However, the more ldr instructions
13213      we issue, the less likely we are to be able to schedule them well.
13214      Using ldr instructions also increases code size.
13215
13216      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13217      for counts of 3 or 4 regs.  */
13218   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13219     return false;
13220   return true;
13221 }
13222
13223 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13224    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13225    an array ORDER which describes the sequence to use when accessing the
13226    offsets that produces an ascending order.  In this sequence, each
13227    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13228    must have been filled in with the lowest offset by the caller.
13229    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13230    we use to verify that ORDER produces an ascending order of registers.
13231    Return true if it was possible to construct such an order, false if
13232    not.  */
13233
13234 static bool
13235 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13236                       int *unsorted_regs)
13237 {
13238   int i;
13239   for (i = 1; i < nops; i++)
13240     {
13241       int j;
13242
13243       order[i] = order[i - 1];
13244       for (j = 0; j < nops; j++)
13245         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13246           {
13247             /* We must find exactly one offset that is higher than the
13248                previous one by 4.  */
13249             if (order[i] != order[i - 1])
13250               return false;
13251             order[i] = j;
13252           }
13253       if (order[i] == order[i - 1])
13254         return false;
13255       /* The register numbers must be ascending.  */
13256       if (unsorted_regs != NULL
13257           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13258         return false;
13259     }
13260   return true;
13261 }
13262
13263 /* Used to determine in a peephole whether a sequence of load
13264    instructions can be changed into a load-multiple instruction.
13265    NOPS is the number of separate load instructions we are examining.  The
13266    first NOPS entries in OPERANDS are the destination registers, the
13267    next NOPS entries are memory operands.  If this function is
13268    successful, *BASE is set to the common base register of the memory
13269    accesses; *LOAD_OFFSET is set to the first memory location's offset
13270    from that base register.
13271    REGS is an array filled in with the destination register numbers.
13272    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13273    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13274    the sequence of registers in REGS matches the loads from ascending memory
13275    locations, and the function verifies that the register numbers are
13276    themselves ascending.  If CHECK_REGS is false, the register numbers
13277    are stored in the order they are found in the operands.  */
13278 static int
13279 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13280                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13281 {
13282   int unsorted_regs[MAX_LDM_STM_OPS];
13283   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13284   int order[MAX_LDM_STM_OPS];
13285   rtx base_reg_rtx = NULL;
13286   int base_reg = -1;
13287   int i, ldm_case;
13288
13289   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13290      easily extended if required.  */
13291   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13292
13293   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13294
13295   /* Loop over the operands and check that the memory references are
13296      suitable (i.e. immediate offsets from the same base register).  At
13297      the same time, extract the target register, and the memory
13298      offsets.  */
13299   for (i = 0; i < nops; i++)
13300     {
13301       rtx reg;
13302       rtx offset;
13303
13304       /* Convert a subreg of a mem into the mem itself.  */
13305       if (GET_CODE (operands[nops + i]) == SUBREG)
13306         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13307
13308       gcc_assert (MEM_P (operands[nops + i]));
13309
13310       /* Don't reorder volatile memory references; it doesn't seem worth
13311          looking for the case where the order is ok anyway.  */
13312       if (MEM_VOLATILE_P (operands[nops + i]))
13313         return 0;
13314
13315       offset = const0_rtx;
13316
13317       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13318            || (GET_CODE (reg) == SUBREG
13319                && REG_P (reg = SUBREG_REG (reg))))
13320           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13321               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13322                   || (GET_CODE (reg) == SUBREG
13323                       && REG_P (reg = SUBREG_REG (reg))))
13324               && (CONST_INT_P (offset
13325                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13326         {
13327           if (i == 0)
13328             {
13329               base_reg = REGNO (reg);
13330               base_reg_rtx = reg;
13331               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13332                 return 0;
13333             }
13334           else if (base_reg != (int) REGNO (reg))
13335             /* Not addressed from the same base register.  */
13336             return 0;
13337
13338           unsorted_regs[i] = (REG_P (operands[i])
13339                               ? REGNO (operands[i])
13340                               : REGNO (SUBREG_REG (operands[i])));
13341
13342           /* If it isn't an integer register, or if it overwrites the
13343              base register but isn't the last insn in the list, then
13344              we can't do this.  */
13345           if (unsorted_regs[i] < 0
13346               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13347               || unsorted_regs[i] > 14
13348               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13349             return 0;
13350
13351           /* Don't allow SP to be loaded unless it is also the base
13352              register.  It guarantees that SP is reset correctly when
13353              an LDM instruction is interrupted.  Otherwise, we might
13354              end up with a corrupt stack.  */
13355           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13356             return 0;
13357
13358           unsorted_offsets[i] = INTVAL (offset);
13359           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13360             order[0] = i;
13361         }
13362       else
13363         /* Not a suitable memory address.  */
13364         return 0;
13365     }
13366
13367   /* All the useful information has now been extracted from the
13368      operands into unsorted_regs and unsorted_offsets; additionally,
13369      order[0] has been set to the lowest offset in the list.  Sort
13370      the offsets into order, verifying that they are adjacent, and
13371      check that the register numbers are ascending.  */
13372   if (!compute_offset_order (nops, unsorted_offsets, order,
13373                              check_regs ? unsorted_regs : NULL))
13374     return 0;
13375
13376   if (saved_order)
13377     memcpy (saved_order, order, sizeof order);
13378
13379   if (base)
13380     {
13381       *base = base_reg;
13382
13383       for (i = 0; i < nops; i++)
13384         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13385
13386       *load_offset = unsorted_offsets[order[0]];
13387     }
13388
13389   if (TARGET_THUMB1
13390       && !peep2_reg_dead_p (nops, base_reg_rtx))
13391     return 0;
13392
13393   if (unsorted_offsets[order[0]] == 0)
13394     ldm_case = 1; /* ldmia */
13395   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13396     ldm_case = 2; /* ldmib */
13397   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13398     ldm_case = 3; /* ldmda */
13399   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13400     ldm_case = 4; /* ldmdb */
13401   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13402            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13403     ldm_case = 5;
13404   else
13405     return 0;
13406
13407   if (!multiple_operation_profitable_p (false, nops,
13408                                         ldm_case == 5
13409                                         ? unsorted_offsets[order[0]] : 0))
13410     return 0;
13411
13412   return ldm_case;
13413 }
13414
13415 /* Used to determine in a peephole whether a sequence of store instructions can
13416    be changed into a store-multiple instruction.
13417    NOPS is the number of separate store instructions we are examining.
13418    NOPS_TOTAL is the total number of instructions recognized by the peephole
13419    pattern.
13420    The first NOPS entries in OPERANDS are the source registers, the next
13421    NOPS entries are memory operands.  If this function is successful, *BASE is
13422    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13423    to the first memory location's offset from that base register.  REGS is an
13424    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13425    likewise filled with the corresponding rtx's.
13426    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13427    numbers to an ascending order of stores.
13428    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13429    from ascending memory locations, and the function verifies that the register
13430    numbers are themselves ascending.  If CHECK_REGS is false, the register
13431    numbers are stored in the order they are found in the operands.  */
13432 static int
13433 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13434                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13435                          HOST_WIDE_INT *load_offset, bool check_regs)
13436 {
13437   int unsorted_regs[MAX_LDM_STM_OPS];
13438   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13439   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13440   int order[MAX_LDM_STM_OPS];
13441   int base_reg = -1;
13442   rtx base_reg_rtx = NULL;
13443   int i, stm_case;
13444
13445   /* Write back of base register is currently only supported for Thumb 1.  */
13446   int base_writeback = TARGET_THUMB1;
13447
13448   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13449      easily extended if required.  */
13450   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13451
13452   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13453
13454   /* Loop over the operands and check that the memory references are
13455      suitable (i.e. immediate offsets from the same base register).  At
13456      the same time, extract the target register, and the memory
13457      offsets.  */
13458   for (i = 0; i < nops; i++)
13459     {
13460       rtx reg;
13461       rtx offset;
13462
13463       /* Convert a subreg of a mem into the mem itself.  */
13464       if (GET_CODE (operands[nops + i]) == SUBREG)
13465         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13466
13467       gcc_assert (MEM_P (operands[nops + i]));
13468
13469       /* Don't reorder volatile memory references; it doesn't seem worth
13470          looking for the case where the order is ok anyway.  */
13471       if (MEM_VOLATILE_P (operands[nops + i]))
13472         return 0;
13473
13474       offset = const0_rtx;
13475
13476       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13477            || (GET_CODE (reg) == SUBREG
13478                && REG_P (reg = SUBREG_REG (reg))))
13479           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13480               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13481                   || (GET_CODE (reg) == SUBREG
13482                       && REG_P (reg = SUBREG_REG (reg))))
13483               && (CONST_INT_P (offset
13484                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13485         {
13486           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13487                                   ? operands[i] : SUBREG_REG (operands[i]));
13488           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13489
13490           if (i == 0)
13491             {
13492               base_reg = REGNO (reg);
13493               base_reg_rtx = reg;
13494               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13495                 return 0;
13496             }
13497           else if (base_reg != (int) REGNO (reg))
13498             /* Not addressed from the same base register.  */
13499             return 0;
13500
13501           /* If it isn't an integer register, then we can't do this.  */
13502           if (unsorted_regs[i] < 0
13503               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13504               /* The effects are unpredictable if the base register is
13505                  both updated and stored.  */
13506               || (base_writeback && unsorted_regs[i] == base_reg)
13507               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13508               || unsorted_regs[i] > 14)
13509             return 0;
13510
13511           unsorted_offsets[i] = INTVAL (offset);
13512           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13513             order[0] = i;
13514         }
13515       else
13516         /* Not a suitable memory address.  */
13517         return 0;
13518     }
13519
13520   /* All the useful information has now been extracted from the
13521      operands into unsorted_regs and unsorted_offsets; additionally,
13522      order[0] has been set to the lowest offset in the list.  Sort
13523      the offsets into order, verifying that they are adjacent, and
13524      check that the register numbers are ascending.  */
13525   if (!compute_offset_order (nops, unsorted_offsets, order,
13526                              check_regs ? unsorted_regs : NULL))
13527     return 0;
13528
13529   if (saved_order)
13530     memcpy (saved_order, order, sizeof order);
13531
13532   if (base)
13533     {
13534       *base = base_reg;
13535
13536       for (i = 0; i < nops; i++)
13537         {
13538           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13539           if (reg_rtxs)
13540             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13541         }
13542
13543       *load_offset = unsorted_offsets[order[0]];
13544     }
13545
13546   if (TARGET_THUMB1
13547       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13548     return 0;
13549
13550   if (unsorted_offsets[order[0]] == 0)
13551     stm_case = 1; /* stmia */
13552   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13553     stm_case = 2; /* stmib */
13554   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13555     stm_case = 3; /* stmda */
13556   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13557     stm_case = 4; /* stmdb */
13558   else
13559     return 0;
13560
13561   if (!multiple_operation_profitable_p (false, nops, 0))
13562     return 0;
13563
13564   return stm_case;
13565 }
13566 \f
13567 /* Routines for use in generating RTL.  */
13568
13569 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13570    the instruction; REGS and MEMS are arrays containing the operands.
13571    BASEREG is the base register to be used in addressing the memory operands.
13572    WBACK_OFFSET is nonzero if the instruction should update the base
13573    register.  */
13574
13575 static rtx
13576 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13577                          HOST_WIDE_INT wback_offset)
13578 {
13579   int i = 0, j;
13580   rtx result;
13581
13582   if (!multiple_operation_profitable_p (false, count, 0))
13583     {
13584       rtx seq;
13585
13586       start_sequence ();
13587
13588       for (i = 0; i < count; i++)
13589         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13590
13591       if (wback_offset != 0)
13592         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13593
13594       seq = get_insns ();
13595       end_sequence ();
13596
13597       return seq;
13598     }
13599
13600   result = gen_rtx_PARALLEL (VOIDmode,
13601                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13602   if (wback_offset != 0)
13603     {
13604       XVECEXP (result, 0, 0)
13605         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13606       i = 1;
13607       count++;
13608     }
13609
13610   for (j = 0; i < count; i++, j++)
13611     XVECEXP (result, 0, i)
13612       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13613
13614   return result;
13615 }
13616
13617 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13618    the instruction; REGS and MEMS are arrays containing the operands.
13619    BASEREG is the base register to be used in addressing the memory operands.
13620    WBACK_OFFSET is nonzero if the instruction should update the base
13621    register.  */
13622
13623 static rtx
13624 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13625                           HOST_WIDE_INT wback_offset)
13626 {
13627   int i = 0, j;
13628   rtx result;
13629
13630   if (GET_CODE (basereg) == PLUS)
13631     basereg = XEXP (basereg, 0);
13632
13633   if (!multiple_operation_profitable_p (false, count, 0))
13634     {
13635       rtx seq;
13636
13637       start_sequence ();
13638
13639       for (i = 0; i < count; i++)
13640         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13641
13642       if (wback_offset != 0)
13643         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13644
13645       seq = get_insns ();
13646       end_sequence ();
13647
13648       return seq;
13649     }
13650
13651   result = gen_rtx_PARALLEL (VOIDmode,
13652                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13653   if (wback_offset != 0)
13654     {
13655       XVECEXP (result, 0, 0)
13656         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13657       i = 1;
13658       count++;
13659     }
13660
13661   for (j = 0; i < count; i++, j++)
13662     XVECEXP (result, 0, i)
13663       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13664
13665   return result;
13666 }
13667
13668 /* Generate either a load-multiple or a store-multiple instruction.  This
13669    function can be used in situations where we can start with a single MEM
13670    rtx and adjust its address upwards.
13671    COUNT is the number of operations in the instruction, not counting a
13672    possible update of the base register.  REGS is an array containing the
13673    register operands.
13674    BASEREG is the base register to be used in addressing the memory operands,
13675    which are constructed from BASEMEM.
13676    WRITE_BACK specifies whether the generated instruction should include an
13677    update of the base register.
13678    OFFSETP is used to pass an offset to and from this function; this offset
13679    is not used when constructing the address (instead BASEMEM should have an
13680    appropriate offset in its address), it is used only for setting
13681    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13682
13683 static rtx
13684 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13685                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13686 {
13687   rtx mems[MAX_LDM_STM_OPS];
13688   HOST_WIDE_INT offset = *offsetp;
13689   int i;
13690
13691   gcc_assert (count <= MAX_LDM_STM_OPS);
13692
13693   if (GET_CODE (basereg) == PLUS)
13694     basereg = XEXP (basereg, 0);
13695
13696   for (i = 0; i < count; i++)
13697     {
13698       rtx addr = plus_constant (Pmode, basereg, i * 4);
13699       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13700       offset += 4;
13701     }
13702
13703   if (write_back)
13704     *offsetp = offset;
13705
13706   if (is_load)
13707     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13708                                     write_back ? 4 * count : 0);
13709   else
13710     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13711                                      write_back ? 4 * count : 0);
13712 }
13713
13714 rtx
13715 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13716                        rtx basemem, HOST_WIDE_INT *offsetp)
13717 {
13718   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13719                               offsetp);
13720 }
13721
13722 rtx
13723 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13724                         rtx basemem, HOST_WIDE_INT *offsetp)
13725 {
13726   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13727                               offsetp);
13728 }
13729
13730 /* Called from a peephole2 expander to turn a sequence of loads into an
13731    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13732    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13733    is true if we can reorder the registers because they are used commutatively
13734    subsequently.
13735    Returns true iff we could generate a new instruction.  */
13736
13737 bool
13738 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13739 {
13740   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13741   rtx mems[MAX_LDM_STM_OPS];
13742   int i, j, base_reg;
13743   rtx base_reg_rtx;
13744   HOST_WIDE_INT offset;
13745   int write_back = FALSE;
13746   int ldm_case;
13747   rtx addr;
13748
13749   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13750                                      &base_reg, &offset, !sort_regs);
13751
13752   if (ldm_case == 0)
13753     return false;
13754
13755   if (sort_regs)
13756     for (i = 0; i < nops - 1; i++)
13757       for (j = i + 1; j < nops; j++)
13758         if (regs[i] > regs[j])
13759           {
13760             int t = regs[i];
13761             regs[i] = regs[j];
13762             regs[j] = t;
13763           }
13764   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13765
13766   if (TARGET_THUMB1)
13767     {
13768       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13769       gcc_assert (ldm_case == 1 || ldm_case == 5);
13770       write_back = TRUE;
13771     }
13772
13773   if (ldm_case == 5)
13774     {
13775       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13776       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13777       offset = 0;
13778       if (!TARGET_THUMB1)
13779         base_reg_rtx = newbase;
13780     }
13781
13782   for (i = 0; i < nops; i++)
13783     {
13784       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13785       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13786                                               SImode, addr, 0);
13787     }
13788   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13789                                       write_back ? offset + i * 4 : 0));
13790   return true;
13791 }
13792
13793 /* Called from a peephole2 expander to turn a sequence of stores into an
13794    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13795    NOPS indicates how many separate stores we are trying to combine.
13796    Returns true iff we could generate a new instruction.  */
13797
13798 bool
13799 gen_stm_seq (rtx *operands, int nops)
13800 {
13801   int i;
13802   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13803   rtx mems[MAX_LDM_STM_OPS];
13804   int base_reg;
13805   rtx base_reg_rtx;
13806   HOST_WIDE_INT offset;
13807   int write_back = FALSE;
13808   int stm_case;
13809   rtx addr;
13810   bool base_reg_dies;
13811
13812   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13813                                       mem_order, &base_reg, &offset, true);
13814
13815   if (stm_case == 0)
13816     return false;
13817
13818   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13819
13820   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13821   if (TARGET_THUMB1)
13822     {
13823       gcc_assert (base_reg_dies);
13824       write_back = TRUE;
13825     }
13826
13827   if (stm_case == 5)
13828     {
13829       gcc_assert (base_reg_dies);
13830       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13831       offset = 0;
13832     }
13833
13834   addr = plus_constant (Pmode, base_reg_rtx, offset);
13835
13836   for (i = 0; i < nops; i++)
13837     {
13838       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13839       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13840                                               SImode, addr, 0);
13841     }
13842   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13843                                        write_back ? offset + i * 4 : 0));
13844   return true;
13845 }
13846
13847 /* Called from a peephole2 expander to turn a sequence of stores that are
13848    preceded by constant loads into an STM instruction.  OPERANDS are the
13849    operands found by the peephole matcher; NOPS indicates how many
13850    separate stores we are trying to combine; there are 2 * NOPS
13851    instructions in the peephole.
13852    Returns true iff we could generate a new instruction.  */
13853
13854 bool
13855 gen_const_stm_seq (rtx *operands, int nops)
13856 {
13857   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13858   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13859   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13860   rtx mems[MAX_LDM_STM_OPS];
13861   int base_reg;
13862   rtx base_reg_rtx;
13863   HOST_WIDE_INT offset;
13864   int write_back = FALSE;
13865   int stm_case;
13866   rtx addr;
13867   bool base_reg_dies;
13868   int i, j;
13869   HARD_REG_SET allocated;
13870
13871   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13872                                       mem_order, &base_reg, &offset, false);
13873
13874   if (stm_case == 0)
13875     return false;
13876
13877   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13878
13879   /* If the same register is used more than once, try to find a free
13880      register.  */
13881   CLEAR_HARD_REG_SET (allocated);
13882   for (i = 0; i < nops; i++)
13883     {
13884       for (j = i + 1; j < nops; j++)
13885         if (regs[i] == regs[j])
13886           {
13887             rtx t = peep2_find_free_register (0, nops * 2,
13888                                               TARGET_THUMB1 ? "l" : "r",
13889                                               SImode, &allocated);
13890             if (t == NULL_RTX)
13891               return false;
13892             reg_rtxs[i] = t;
13893             regs[i] = REGNO (t);
13894           }
13895     }
13896
13897   /* Compute an ordering that maps the register numbers to an ascending
13898      sequence.  */
13899   reg_order[0] = 0;
13900   for (i = 0; i < nops; i++)
13901     if (regs[i] < regs[reg_order[0]])
13902       reg_order[0] = i;
13903
13904   for (i = 1; i < nops; i++)
13905     {
13906       int this_order = reg_order[i - 1];
13907       for (j = 0; j < nops; j++)
13908         if (regs[j] > regs[reg_order[i - 1]]
13909             && (this_order == reg_order[i - 1]
13910                 || regs[j] < regs[this_order]))
13911           this_order = j;
13912       reg_order[i] = this_order;
13913     }
13914
13915   /* Ensure that registers that must be live after the instruction end
13916      up with the correct value.  */
13917   for (i = 0; i < nops; i++)
13918     {
13919       int this_order = reg_order[i];
13920       if ((this_order != mem_order[i]
13921            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13922           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13923         return false;
13924     }
13925
13926   /* Load the constants.  */
13927   for (i = 0; i < nops; i++)
13928     {
13929       rtx op = operands[2 * nops + mem_order[i]];
13930       sorted_regs[i] = regs[reg_order[i]];
13931       emit_move_insn (reg_rtxs[reg_order[i]], op);
13932     }
13933
13934   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13935
13936   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13937   if (TARGET_THUMB1)
13938     {
13939       gcc_assert (base_reg_dies);
13940       write_back = TRUE;
13941     }
13942
13943   if (stm_case == 5)
13944     {
13945       gcc_assert (base_reg_dies);
13946       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13947       offset = 0;
13948     }
13949
13950   addr = plus_constant (Pmode, base_reg_rtx, offset);
13951
13952   for (i = 0; i < nops; i++)
13953     {
13954       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13955       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13956                                               SImode, addr, 0);
13957     }
13958   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13959                                        write_back ? offset + i * 4 : 0));
13960   return true;
13961 }
13962
13963 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13964    unaligned copies on processors which support unaligned semantics for those
13965    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13966    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13967    An interleave factor of 1 (the minimum) will perform no interleaving.
13968    Load/store multiple are used for aligned addresses where possible.  */
13969
13970 static void
13971 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13972                                    HOST_WIDE_INT length,
13973                                    unsigned int interleave_factor)
13974 {
13975   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13976   int *regnos = XALLOCAVEC (int, interleave_factor);
13977   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13978   HOST_WIDE_INT i, j;
13979   HOST_WIDE_INT remaining = length, words;
13980   rtx halfword_tmp = NULL, byte_tmp = NULL;
13981   rtx dst, src;
13982   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13983   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13984   HOST_WIDE_INT srcoffset, dstoffset;
13985   HOST_WIDE_INT src_autoinc, dst_autoinc;
13986   rtx mem, addr;
13987
13988   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13989
13990   /* Use hard registers if we have aligned source or destination so we can use
13991      load/store multiple with contiguous registers.  */
13992   if (dst_aligned || src_aligned)
13993     for (i = 0; i < interleave_factor; i++)
13994       regs[i] = gen_rtx_REG (SImode, i);
13995   else
13996     for (i = 0; i < interleave_factor; i++)
13997       regs[i] = gen_reg_rtx (SImode);
13998
13999   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14000   src = copy_addr_to_reg (XEXP (srcbase, 0));
14001
14002   srcoffset = dstoffset = 0;
14003
14004   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14005      For copying the last bytes we want to subtract this offset again.  */
14006   src_autoinc = dst_autoinc = 0;
14007
14008   for (i = 0; i < interleave_factor; i++)
14009     regnos[i] = i;
14010
14011   /* Copy BLOCK_SIZE_BYTES chunks.  */
14012
14013   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14014     {
14015       /* Load words.  */
14016       if (src_aligned && interleave_factor > 1)
14017         {
14018           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14019                                             TRUE, srcbase, &srcoffset));
14020           src_autoinc += UNITS_PER_WORD * interleave_factor;
14021         }
14022       else
14023         {
14024           for (j = 0; j < interleave_factor; j++)
14025             {
14026               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14027                                                  - src_autoinc));
14028               mem = adjust_automodify_address (srcbase, SImode, addr,
14029                                                srcoffset + j * UNITS_PER_WORD);
14030               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14031             }
14032           srcoffset += block_size_bytes;
14033         }
14034
14035       /* Store words.  */
14036       if (dst_aligned && interleave_factor > 1)
14037         {
14038           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14039                                              TRUE, dstbase, &dstoffset));
14040           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14041         }
14042       else
14043         {
14044           for (j = 0; j < interleave_factor; j++)
14045             {
14046               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14047                                                  - dst_autoinc));
14048               mem = adjust_automodify_address (dstbase, SImode, addr,
14049                                                dstoffset + j * UNITS_PER_WORD);
14050               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14051             }
14052           dstoffset += block_size_bytes;
14053         }
14054
14055       remaining -= block_size_bytes;
14056     }
14057
14058   /* Copy any whole words left (note these aren't interleaved with any
14059      subsequent halfword/byte load/stores in the interests of simplicity).  */
14060
14061   words = remaining / UNITS_PER_WORD;
14062
14063   gcc_assert (words < interleave_factor);
14064
14065   if (src_aligned && words > 1)
14066     {
14067       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14068                                         &srcoffset));
14069       src_autoinc += UNITS_PER_WORD * words;
14070     }
14071   else
14072     {
14073       for (j = 0; j < words; j++)
14074         {
14075           addr = plus_constant (Pmode, src,
14076                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14077           mem = adjust_automodify_address (srcbase, SImode, addr,
14078                                            srcoffset + j * UNITS_PER_WORD);
14079           if (src_aligned)
14080             emit_move_insn (regs[j], mem);
14081           else
14082             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14083         }
14084       srcoffset += words * UNITS_PER_WORD;
14085     }
14086
14087   if (dst_aligned && words > 1)
14088     {
14089       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14090                                          &dstoffset));
14091       dst_autoinc += words * UNITS_PER_WORD;
14092     }
14093   else
14094     {
14095       for (j = 0; j < words; j++)
14096         {
14097           addr = plus_constant (Pmode, dst,
14098                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14099           mem = adjust_automodify_address (dstbase, SImode, addr,
14100                                            dstoffset + j * UNITS_PER_WORD);
14101           if (dst_aligned)
14102             emit_move_insn (mem, regs[j]);
14103           else
14104             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14105         }
14106       dstoffset += words * UNITS_PER_WORD;
14107     }
14108
14109   remaining -= words * UNITS_PER_WORD;
14110
14111   gcc_assert (remaining < 4);
14112
14113   /* Copy a halfword if necessary.  */
14114
14115   if (remaining >= 2)
14116     {
14117       halfword_tmp = gen_reg_rtx (SImode);
14118
14119       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14120       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14121       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14122
14123       /* Either write out immediately, or delay until we've loaded the last
14124          byte, depending on interleave factor.  */
14125       if (interleave_factor == 1)
14126         {
14127           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14128           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14129           emit_insn (gen_unaligned_storehi (mem,
14130                        gen_lowpart (HImode, halfword_tmp)));
14131           halfword_tmp = NULL;
14132           dstoffset += 2;
14133         }
14134
14135       remaining -= 2;
14136       srcoffset += 2;
14137     }
14138
14139   gcc_assert (remaining < 2);
14140
14141   /* Copy last byte.  */
14142
14143   if ((remaining & 1) != 0)
14144     {
14145       byte_tmp = gen_reg_rtx (SImode);
14146
14147       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14148       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14149       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14150
14151       if (interleave_factor == 1)
14152         {
14153           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14154           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14155           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14156           byte_tmp = NULL;
14157           dstoffset++;
14158         }
14159
14160       remaining--;
14161       srcoffset++;
14162     }
14163
14164   /* Store last halfword if we haven't done so already.  */
14165
14166   if (halfword_tmp)
14167     {
14168       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14169       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14170       emit_insn (gen_unaligned_storehi (mem,
14171                    gen_lowpart (HImode, halfword_tmp)));
14172       dstoffset += 2;
14173     }
14174
14175   /* Likewise for last byte.  */
14176
14177   if (byte_tmp)
14178     {
14179       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14180       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14181       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14182       dstoffset++;
14183     }
14184
14185   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14186 }
14187
14188 /* From mips_adjust_block_mem:
14189
14190    Helper function for doing a loop-based block operation on memory
14191    reference MEM.  Each iteration of the loop will operate on LENGTH
14192    bytes of MEM.
14193
14194    Create a new base register for use within the loop and point it to
14195    the start of MEM.  Create a new memory reference that uses this
14196    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14197
14198 static void
14199 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14200                       rtx *loop_mem)
14201 {
14202   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14203
14204   /* Although the new mem does not refer to a known location,
14205      it does keep up to LENGTH bytes of alignment.  */
14206   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14207   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14208 }
14209
14210 /* From mips_block_move_loop:
14211
14212    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14213    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14214    the memory regions do not overlap.  */
14215
14216 static void
14217 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14218                                unsigned int interleave_factor,
14219                                HOST_WIDE_INT bytes_per_iter)
14220 {
14221   rtx src_reg, dest_reg, final_src, test;
14222   HOST_WIDE_INT leftover;
14223
14224   leftover = length % bytes_per_iter;
14225   length -= leftover;
14226
14227   /* Create registers and memory references for use within the loop.  */
14228   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14229   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14230
14231   /* Calculate the value that SRC_REG should have after the last iteration of
14232      the loop.  */
14233   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14234                                    0, 0, OPTAB_WIDEN);
14235
14236   /* Emit the start of the loop.  */
14237   rtx_code_label *label = gen_label_rtx ();
14238   emit_label (label);
14239
14240   /* Emit the loop body.  */
14241   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14242                                      interleave_factor);
14243
14244   /* Move on to the next block.  */
14245   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14246   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14247
14248   /* Emit the loop condition.  */
14249   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14250   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14251
14252   /* Mop up any left-over bytes.  */
14253   if (leftover)
14254     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14255 }
14256
14257 /* Emit a block move when either the source or destination is unaligned (not
14258    aligned to a four-byte boundary).  This may need further tuning depending on
14259    core type, optimize_size setting, etc.  */
14260
14261 static int
14262 arm_movmemqi_unaligned (rtx *operands)
14263 {
14264   HOST_WIDE_INT length = INTVAL (operands[2]);
14265
14266   if (optimize_size)
14267     {
14268       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14269       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14270       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14271          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14272          or dst_aligned though: allow more interleaving in those cases since the
14273          resulting code can be smaller.  */
14274       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14275       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14276
14277       if (length > 12)
14278         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14279                                        interleave_factor, bytes_per_iter);
14280       else
14281         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14282                                            interleave_factor);
14283     }
14284   else
14285     {
14286       /* Note that the loop created by arm_block_move_unaligned_loop may be
14287          subject to loop unrolling, which makes tuning this condition a little
14288          redundant.  */
14289       if (length > 32)
14290         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14291       else
14292         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14293     }
14294
14295   return 1;
14296 }
14297
14298 int
14299 arm_gen_movmemqi (rtx *operands)
14300 {
14301   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14302   HOST_WIDE_INT srcoffset, dstoffset;
14303   rtx src, dst, srcbase, dstbase;
14304   rtx part_bytes_reg = NULL;
14305   rtx mem;
14306
14307   if (!CONST_INT_P (operands[2])
14308       || !CONST_INT_P (operands[3])
14309       || INTVAL (operands[2]) > 64)
14310     return 0;
14311
14312   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14313     return arm_movmemqi_unaligned (operands);
14314
14315   if (INTVAL (operands[3]) & 3)
14316     return 0;
14317
14318   dstbase = operands[0];
14319   srcbase = operands[1];
14320
14321   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14322   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14323
14324   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14325   out_words_to_go = INTVAL (operands[2]) / 4;
14326   last_bytes = INTVAL (operands[2]) & 3;
14327   dstoffset = srcoffset = 0;
14328
14329   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14330     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14331
14332   while (in_words_to_go >= 2)
14333     {
14334       if (in_words_to_go > 4)
14335         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14336                                           TRUE, srcbase, &srcoffset));
14337       else
14338         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14339                                           src, FALSE, srcbase,
14340                                           &srcoffset));
14341
14342       if (out_words_to_go)
14343         {
14344           if (out_words_to_go > 4)
14345             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14346                                                TRUE, dstbase, &dstoffset));
14347           else if (out_words_to_go != 1)
14348             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14349                                                out_words_to_go, dst,
14350                                                (last_bytes == 0
14351                                                 ? FALSE : TRUE),
14352                                                dstbase, &dstoffset));
14353           else
14354             {
14355               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14356               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14357               if (last_bytes != 0)
14358                 {
14359                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14360                   dstoffset += 4;
14361                 }
14362             }
14363         }
14364
14365       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14366       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14367     }
14368
14369   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14370   if (out_words_to_go)
14371     {
14372       rtx sreg;
14373
14374       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14375       sreg = copy_to_reg (mem);
14376
14377       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14378       emit_move_insn (mem, sreg);
14379       in_words_to_go--;
14380
14381       gcc_assert (!in_words_to_go);     /* Sanity check */
14382     }
14383
14384   if (in_words_to_go)
14385     {
14386       gcc_assert (in_words_to_go > 0);
14387
14388       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14389       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14390     }
14391
14392   gcc_assert (!last_bytes || part_bytes_reg);
14393
14394   if (BYTES_BIG_ENDIAN && last_bytes)
14395     {
14396       rtx tmp = gen_reg_rtx (SImode);
14397
14398       /* The bytes we want are in the top end of the word.  */
14399       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14400                               GEN_INT (8 * (4 - last_bytes))));
14401       part_bytes_reg = tmp;
14402
14403       while (last_bytes)
14404         {
14405           mem = adjust_automodify_address (dstbase, QImode,
14406                                            plus_constant (Pmode, dst,
14407                                                           last_bytes - 1),
14408                                            dstoffset + last_bytes - 1);
14409           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14410
14411           if (--last_bytes)
14412             {
14413               tmp = gen_reg_rtx (SImode);
14414               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14415               part_bytes_reg = tmp;
14416             }
14417         }
14418
14419     }
14420   else
14421     {
14422       if (last_bytes > 1)
14423         {
14424           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14425           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14426           last_bytes -= 2;
14427           if (last_bytes)
14428             {
14429               rtx tmp = gen_reg_rtx (SImode);
14430               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14431               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14432               part_bytes_reg = tmp;
14433               dstoffset += 2;
14434             }
14435         }
14436
14437       if (last_bytes)
14438         {
14439           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14440           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14441         }
14442     }
14443
14444   return 1;
14445 }
14446
14447 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14448 by mode size.  */
14449 inline static rtx
14450 next_consecutive_mem (rtx mem)
14451 {
14452   machine_mode mode = GET_MODE (mem);
14453   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14454   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14455
14456   return adjust_automodify_address (mem, mode, addr, offset);
14457 }
14458
14459 /* Copy using LDRD/STRD instructions whenever possible.
14460    Returns true upon success. */
14461 bool
14462 gen_movmem_ldrd_strd (rtx *operands)
14463 {
14464   unsigned HOST_WIDE_INT len;
14465   HOST_WIDE_INT align;
14466   rtx src, dst, base;
14467   rtx reg0;
14468   bool src_aligned, dst_aligned;
14469   bool src_volatile, dst_volatile;
14470
14471   gcc_assert (CONST_INT_P (operands[2]));
14472   gcc_assert (CONST_INT_P (operands[3]));
14473
14474   len = UINTVAL (operands[2]);
14475   if (len > 64)
14476     return false;
14477
14478   /* Maximum alignment we can assume for both src and dst buffers.  */
14479   align = INTVAL (operands[3]);
14480
14481   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14482     return false;
14483
14484   /* Place src and dst addresses in registers
14485      and update the corresponding mem rtx.  */
14486   dst = operands[0];
14487   dst_volatile = MEM_VOLATILE_P (dst);
14488   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14489   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14490   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14491
14492   src = operands[1];
14493   src_volatile = MEM_VOLATILE_P (src);
14494   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14495   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14496   src = adjust_automodify_address (src, VOIDmode, base, 0);
14497
14498   if (!unaligned_access && !(src_aligned && dst_aligned))
14499     return false;
14500
14501   if (src_volatile || dst_volatile)
14502     return false;
14503
14504   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14505   if (!(dst_aligned || src_aligned))
14506     return arm_gen_movmemqi (operands);
14507
14508   /* If the either src or dst is unaligned we'll be accessing it as pairs
14509      of unaligned SImode accesses.  Otherwise we can generate DImode
14510      ldrd/strd instructions.  */
14511   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14512   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14513
14514   while (len >= 8)
14515     {
14516       len -= 8;
14517       reg0 = gen_reg_rtx (DImode);
14518       rtx low_reg = NULL_RTX;
14519       rtx hi_reg = NULL_RTX;
14520
14521       if (!src_aligned || !dst_aligned)
14522         {
14523           low_reg = gen_lowpart (SImode, reg0);
14524           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14525         }
14526       if (src_aligned)
14527         emit_move_insn (reg0, src);
14528       else
14529         {
14530           emit_insn (gen_unaligned_loadsi (low_reg, src));
14531           src = next_consecutive_mem (src);
14532           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14533         }
14534
14535       if (dst_aligned)
14536         emit_move_insn (dst, reg0);
14537       else
14538         {
14539           emit_insn (gen_unaligned_storesi (dst, low_reg));
14540           dst = next_consecutive_mem (dst);
14541           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14542         }
14543
14544       src = next_consecutive_mem (src);
14545       dst = next_consecutive_mem (dst);
14546     }
14547
14548   gcc_assert (len < 8);
14549   if (len >= 4)
14550     {
14551       /* More than a word but less than a double-word to copy.  Copy a word.  */
14552       reg0 = gen_reg_rtx (SImode);
14553       src = adjust_address (src, SImode, 0);
14554       dst = adjust_address (dst, SImode, 0);
14555       if (src_aligned)
14556         emit_move_insn (reg0, src);
14557       else
14558         emit_insn (gen_unaligned_loadsi (reg0, src));
14559
14560       if (dst_aligned)
14561         emit_move_insn (dst, reg0);
14562       else
14563         emit_insn (gen_unaligned_storesi (dst, reg0));
14564
14565       src = next_consecutive_mem (src);
14566       dst = next_consecutive_mem (dst);
14567       len -= 4;
14568     }
14569
14570   if (len == 0)
14571     return true;
14572
14573   /* Copy the remaining bytes.  */
14574   if (len >= 2)
14575     {
14576       dst = adjust_address (dst, HImode, 0);
14577       src = adjust_address (src, HImode, 0);
14578       reg0 = gen_reg_rtx (SImode);
14579       if (src_aligned)
14580         emit_insn (gen_zero_extendhisi2 (reg0, src));
14581       else
14582         emit_insn (gen_unaligned_loadhiu (reg0, src));
14583
14584       if (dst_aligned)
14585         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14586       else
14587         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14588
14589       src = next_consecutive_mem (src);
14590       dst = next_consecutive_mem (dst);
14591       if (len == 2)
14592         return true;
14593     }
14594
14595   dst = adjust_address (dst, QImode, 0);
14596   src = adjust_address (src, QImode, 0);
14597   reg0 = gen_reg_rtx (QImode);
14598   emit_move_insn (reg0, src);
14599   emit_move_insn (dst, reg0);
14600   return true;
14601 }
14602
14603 /* Select a dominance comparison mode if possible for a test of the general
14604    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14605    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14606    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14607    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14608    In all cases OP will be either EQ or NE, but we don't need to know which
14609    here.  If we are unable to support a dominance comparison we return
14610    CC mode.  This will then fail to match for the RTL expressions that
14611    generate this call.  */
14612 machine_mode
14613 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14614 {
14615   enum rtx_code cond1, cond2;
14616   int swapped = 0;
14617
14618   /* Currently we will probably get the wrong result if the individual
14619      comparisons are not simple.  This also ensures that it is safe to
14620      reverse a comparison if necessary.  */
14621   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14622        != CCmode)
14623       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14624           != CCmode))
14625     return CCmode;
14626
14627   /* The if_then_else variant of this tests the second condition if the
14628      first passes, but is true if the first fails.  Reverse the first
14629      condition to get a true "inclusive-or" expression.  */
14630   if (cond_or == DOM_CC_NX_OR_Y)
14631     cond1 = reverse_condition (cond1);
14632
14633   /* If the comparisons are not equal, and one doesn't dominate the other,
14634      then we can't do this.  */
14635   if (cond1 != cond2
14636       && !comparison_dominates_p (cond1, cond2)
14637       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14638     return CCmode;
14639
14640   if (swapped)
14641     std::swap (cond1, cond2);
14642
14643   switch (cond1)
14644     {
14645     case EQ:
14646       if (cond_or == DOM_CC_X_AND_Y)
14647         return CC_DEQmode;
14648
14649       switch (cond2)
14650         {
14651         case EQ: return CC_DEQmode;
14652         case LE: return CC_DLEmode;
14653         case LEU: return CC_DLEUmode;
14654         case GE: return CC_DGEmode;
14655         case GEU: return CC_DGEUmode;
14656         default: gcc_unreachable ();
14657         }
14658
14659     case LT:
14660       if (cond_or == DOM_CC_X_AND_Y)
14661         return CC_DLTmode;
14662
14663       switch (cond2)
14664         {
14665         case  LT:
14666             return CC_DLTmode;
14667         case LE:
14668           return CC_DLEmode;
14669         case NE:
14670           return CC_DNEmode;
14671         default:
14672           gcc_unreachable ();
14673         }
14674
14675     case GT:
14676       if (cond_or == DOM_CC_X_AND_Y)
14677         return CC_DGTmode;
14678
14679       switch (cond2)
14680         {
14681         case GT:
14682           return CC_DGTmode;
14683         case GE:
14684           return CC_DGEmode;
14685         case NE:
14686           return CC_DNEmode;
14687         default:
14688           gcc_unreachable ();
14689         }
14690
14691     case LTU:
14692       if (cond_or == DOM_CC_X_AND_Y)
14693         return CC_DLTUmode;
14694
14695       switch (cond2)
14696         {
14697         case LTU:
14698           return CC_DLTUmode;
14699         case LEU:
14700           return CC_DLEUmode;
14701         case NE:
14702           return CC_DNEmode;
14703         default:
14704           gcc_unreachable ();
14705         }
14706
14707     case GTU:
14708       if (cond_or == DOM_CC_X_AND_Y)
14709         return CC_DGTUmode;
14710
14711       switch (cond2)
14712         {
14713         case GTU:
14714           return CC_DGTUmode;
14715         case GEU:
14716           return CC_DGEUmode;
14717         case NE:
14718           return CC_DNEmode;
14719         default:
14720           gcc_unreachable ();
14721         }
14722
14723     /* The remaining cases only occur when both comparisons are the
14724        same.  */
14725     case NE:
14726       gcc_assert (cond1 == cond2);
14727       return CC_DNEmode;
14728
14729     case LE:
14730       gcc_assert (cond1 == cond2);
14731       return CC_DLEmode;
14732
14733     case GE:
14734       gcc_assert (cond1 == cond2);
14735       return CC_DGEmode;
14736
14737     case LEU:
14738       gcc_assert (cond1 == cond2);
14739       return CC_DLEUmode;
14740
14741     case GEU:
14742       gcc_assert (cond1 == cond2);
14743       return CC_DGEUmode;
14744
14745     default:
14746       gcc_unreachable ();
14747     }
14748 }
14749
14750 machine_mode
14751 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14752 {
14753   /* All floating point compares return CCFP if it is an equality
14754      comparison, and CCFPE otherwise.  */
14755   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14756     {
14757       switch (op)
14758         {
14759         case EQ:
14760         case NE:
14761         case UNORDERED:
14762         case ORDERED:
14763         case UNLT:
14764         case UNLE:
14765         case UNGT:
14766         case UNGE:
14767         case UNEQ:
14768         case LTGT:
14769           return CCFPmode;
14770
14771         case LT:
14772         case LE:
14773         case GT:
14774         case GE:
14775           return CCFPEmode;
14776
14777         default:
14778           gcc_unreachable ();
14779         }
14780     }
14781
14782   /* A compare with a shifted operand.  Because of canonicalization, the
14783      comparison will have to be swapped when we emit the assembler.  */
14784   if (GET_MODE (y) == SImode
14785       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14786       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14787           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14788           || GET_CODE (x) == ROTATERT))
14789     return CC_SWPmode;
14790
14791   /* This operation is performed swapped, but since we only rely on the Z
14792      flag we don't need an additional mode.  */
14793   if (GET_MODE (y) == SImode
14794       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14795       && GET_CODE (x) == NEG
14796       && (op == EQ || op == NE))
14797     return CC_Zmode;
14798
14799   /* This is a special case that is used by combine to allow a
14800      comparison of a shifted byte load to be split into a zero-extend
14801      followed by a comparison of the shifted integer (only valid for
14802      equalities and unsigned inequalities).  */
14803   if (GET_MODE (x) == SImode
14804       && GET_CODE (x) == ASHIFT
14805       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14806       && GET_CODE (XEXP (x, 0)) == SUBREG
14807       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14808       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14809       && (op == EQ || op == NE
14810           || op == GEU || op == GTU || op == LTU || op == LEU)
14811       && CONST_INT_P (y))
14812     return CC_Zmode;
14813
14814   /* A construct for a conditional compare, if the false arm contains
14815      0, then both conditions must be true, otherwise either condition
14816      must be true.  Not all conditions are possible, so CCmode is
14817      returned if it can't be done.  */
14818   if (GET_CODE (x) == IF_THEN_ELSE
14819       && (XEXP (x, 2) == const0_rtx
14820           || XEXP (x, 2) == const1_rtx)
14821       && COMPARISON_P (XEXP (x, 0))
14822       && COMPARISON_P (XEXP (x, 1)))
14823     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14824                                          INTVAL (XEXP (x, 2)));
14825
14826   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14827   if (GET_CODE (x) == AND
14828       && (op == EQ || op == NE)
14829       && COMPARISON_P (XEXP (x, 0))
14830       && COMPARISON_P (XEXP (x, 1)))
14831     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14832                                          DOM_CC_X_AND_Y);
14833
14834   if (GET_CODE (x) == IOR
14835       && (op == EQ || op == NE)
14836       && COMPARISON_P (XEXP (x, 0))
14837       && COMPARISON_P (XEXP (x, 1)))
14838     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14839                                          DOM_CC_X_OR_Y);
14840
14841   /* An operation (on Thumb) where we want to test for a single bit.
14842      This is done by shifting that bit up into the top bit of a
14843      scratch register; we can then branch on the sign bit.  */
14844   if (TARGET_THUMB1
14845       && GET_MODE (x) == SImode
14846       && (op == EQ || op == NE)
14847       && GET_CODE (x) == ZERO_EXTRACT
14848       && XEXP (x, 1) == const1_rtx)
14849     return CC_Nmode;
14850
14851   /* An operation that sets the condition codes as a side-effect, the
14852      V flag is not set correctly, so we can only use comparisons where
14853      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14854      instead.)  */
14855   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14856   if (GET_MODE (x) == SImode
14857       && y == const0_rtx
14858       && (op == EQ || op == NE || op == LT || op == GE)
14859       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14860           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14861           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14862           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14863           || GET_CODE (x) == LSHIFTRT
14864           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14865           || GET_CODE (x) == ROTATERT
14866           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14867     return CC_NOOVmode;
14868
14869   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14870     return CC_Zmode;
14871
14872   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14873       && GET_CODE (x) == PLUS
14874       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14875     return CC_Cmode;
14876
14877   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14878     {
14879       switch (op)
14880         {
14881         case EQ:
14882         case NE:
14883           /* A DImode comparison against zero can be implemented by
14884              or'ing the two halves together.  */
14885           if (y == const0_rtx)
14886             return CC_Zmode;
14887
14888           /* We can do an equality test in three Thumb instructions.  */
14889           if (!TARGET_32BIT)
14890             return CC_Zmode;
14891
14892           /* FALLTHROUGH */
14893
14894         case LTU:
14895         case LEU:
14896         case GTU:
14897         case GEU:
14898           /* DImode unsigned comparisons can be implemented by cmp +
14899              cmpeq without a scratch register.  Not worth doing in
14900              Thumb-2.  */
14901           if (TARGET_32BIT)
14902             return CC_CZmode;
14903
14904           /* FALLTHROUGH */
14905
14906         case LT:
14907         case LE:
14908         case GT:
14909         case GE:
14910           /* DImode signed and unsigned comparisons can be implemented
14911              by cmp + sbcs with a scratch register, but that does not
14912              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14913           gcc_assert (op != EQ && op != NE);
14914           return CC_NCVmode;
14915
14916         default:
14917           gcc_unreachable ();
14918         }
14919     }
14920
14921   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14922     return GET_MODE (x);
14923
14924   return CCmode;
14925 }
14926
14927 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14928    return the rtx for register 0 in the proper mode.  FP means this is a
14929    floating point compare: I don't think that it is needed on the arm.  */
14930 rtx
14931 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14932 {
14933   machine_mode mode;
14934   rtx cc_reg;
14935   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14936
14937   /* We might have X as a constant, Y as a register because of the predicates
14938      used for cmpdi.  If so, force X to a register here.  */
14939   if (dimode_comparison && !REG_P (x))
14940     x = force_reg (DImode, x);
14941
14942   mode = SELECT_CC_MODE (code, x, y);
14943   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14944
14945   if (dimode_comparison
14946       && mode != CC_CZmode)
14947     {
14948       rtx clobber, set;
14949
14950       /* To compare two non-zero values for equality, XOR them and
14951          then compare against zero.  Not used for ARM mode; there
14952          CC_CZmode is cheaper.  */
14953       if (mode == CC_Zmode && y != const0_rtx)
14954         {
14955           gcc_assert (!reload_completed);
14956           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14957           y = const0_rtx;
14958         }
14959
14960       /* A scratch register is required.  */
14961       if (reload_completed)
14962         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14963       else
14964         scratch = gen_rtx_SCRATCH (SImode);
14965
14966       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14967       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14968       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14969     }
14970   else
14971     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14972
14973   return cc_reg;
14974 }
14975
14976 /* Generate a sequence of insns that will generate the correct return
14977    address mask depending on the physical architecture that the program
14978    is running on.  */
14979 rtx
14980 arm_gen_return_addr_mask (void)
14981 {
14982   rtx reg = gen_reg_rtx (Pmode);
14983
14984   emit_insn (gen_return_addr_mask (reg));
14985   return reg;
14986 }
14987
14988 void
14989 arm_reload_in_hi (rtx *operands)
14990 {
14991   rtx ref = operands[1];
14992   rtx base, scratch;
14993   HOST_WIDE_INT offset = 0;
14994
14995   if (GET_CODE (ref) == SUBREG)
14996     {
14997       offset = SUBREG_BYTE (ref);
14998       ref = SUBREG_REG (ref);
14999     }
15000
15001   if (REG_P (ref))
15002     {
15003       /* We have a pseudo which has been spilt onto the stack; there
15004          are two cases here: the first where there is a simple
15005          stack-slot replacement and a second where the stack-slot is
15006          out of range, or is used as a subreg.  */
15007       if (reg_equiv_mem (REGNO (ref)))
15008         {
15009           ref = reg_equiv_mem (REGNO (ref));
15010           base = find_replacement (&XEXP (ref, 0));
15011         }
15012       else
15013         /* The slot is out of range, or was dressed up in a SUBREG.  */
15014         base = reg_equiv_address (REGNO (ref));
15015
15016       /* PR 62554: If there is no equivalent memory location then just move
15017          the value as an SImode register move.  This happens when the target
15018          architecture variant does not have an HImode register move.  */
15019       if (base == NULL)
15020         {
15021           gcc_assert (REG_P (operands[0]));
15022           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15023                                 gen_rtx_SUBREG (SImode, ref, 0)));
15024           return;
15025         }
15026     }
15027   else
15028     base = find_replacement (&XEXP (ref, 0));
15029
15030   /* Handle the case where the address is too complex to be offset by 1.  */
15031   if (GET_CODE (base) == MINUS
15032       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15033     {
15034       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15035
15036       emit_set_insn (base_plus, base);
15037       base = base_plus;
15038     }
15039   else if (GET_CODE (base) == PLUS)
15040     {
15041       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15042       HOST_WIDE_INT hi, lo;
15043
15044       offset += INTVAL (XEXP (base, 1));
15045       base = XEXP (base, 0);
15046
15047       /* Rework the address into a legal sequence of insns.  */
15048       /* Valid range for lo is -4095 -> 4095 */
15049       lo = (offset >= 0
15050             ? (offset & 0xfff)
15051             : -((-offset) & 0xfff));
15052
15053       /* Corner case, if lo is the max offset then we would be out of range
15054          once we have added the additional 1 below, so bump the msb into the
15055          pre-loading insn(s).  */
15056       if (lo == 4095)
15057         lo &= 0x7ff;
15058
15059       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15060              ^ (HOST_WIDE_INT) 0x80000000)
15061             - (HOST_WIDE_INT) 0x80000000);
15062
15063       gcc_assert (hi + lo == offset);
15064
15065       if (hi != 0)
15066         {
15067           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15068
15069           /* Get the base address; addsi3 knows how to handle constants
15070              that require more than one insn.  */
15071           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15072           base = base_plus;
15073           offset = lo;
15074         }
15075     }
15076
15077   /* Operands[2] may overlap operands[0] (though it won't overlap
15078      operands[1]), that's why we asked for a DImode reg -- so we can
15079      use the bit that does not overlap.  */
15080   if (REGNO (operands[2]) == REGNO (operands[0]))
15081     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15082   else
15083     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15084
15085   emit_insn (gen_zero_extendqisi2 (scratch,
15086                                    gen_rtx_MEM (QImode,
15087                                                 plus_constant (Pmode, base,
15088                                                                offset))));
15089   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15090                                    gen_rtx_MEM (QImode,
15091                                                 plus_constant (Pmode, base,
15092                                                                offset + 1))));
15093   if (!BYTES_BIG_ENDIAN)
15094     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15095                    gen_rtx_IOR (SImode,
15096                                 gen_rtx_ASHIFT
15097                                 (SImode,
15098                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15099                                  GEN_INT (8)),
15100                                 scratch));
15101   else
15102     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15103                    gen_rtx_IOR (SImode,
15104                                 gen_rtx_ASHIFT (SImode, scratch,
15105                                                 GEN_INT (8)),
15106                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15107 }
15108
15109 /* Handle storing a half-word to memory during reload by synthesizing as two
15110    byte stores.  Take care not to clobber the input values until after we
15111    have moved them somewhere safe.  This code assumes that if the DImode
15112    scratch in operands[2] overlaps either the input value or output address
15113    in some way, then that value must die in this insn (we absolutely need
15114    two scratch registers for some corner cases).  */
15115 void
15116 arm_reload_out_hi (rtx *operands)
15117 {
15118   rtx ref = operands[0];
15119   rtx outval = operands[1];
15120   rtx base, scratch;
15121   HOST_WIDE_INT offset = 0;
15122
15123   if (GET_CODE (ref) == SUBREG)
15124     {
15125       offset = SUBREG_BYTE (ref);
15126       ref = SUBREG_REG (ref);
15127     }
15128
15129   if (REG_P (ref))
15130     {
15131       /* We have a pseudo which has been spilt onto the stack; there
15132          are two cases here: the first where there is a simple
15133          stack-slot replacement and a second where the stack-slot is
15134          out of range, or is used as a subreg.  */
15135       if (reg_equiv_mem (REGNO (ref)))
15136         {
15137           ref = reg_equiv_mem (REGNO (ref));
15138           base = find_replacement (&XEXP (ref, 0));
15139         }
15140       else
15141         /* The slot is out of range, or was dressed up in a SUBREG.  */
15142         base = reg_equiv_address (REGNO (ref));
15143
15144       /* PR 62254: If there is no equivalent memory location then just move
15145          the value as an SImode register move.  This happens when the target
15146          architecture variant does not have an HImode register move.  */
15147       if (base == NULL)
15148         {
15149           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15150
15151           if (REG_P (outval))
15152             {
15153               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15154                                     gen_rtx_SUBREG (SImode, outval, 0)));
15155             }
15156           else /* SUBREG_P (outval)  */
15157             {
15158               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15159                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15160                                       SUBREG_REG (outval)));
15161               else
15162                 /* FIXME: Handle other cases ?  */
15163                 gcc_unreachable ();
15164             }
15165           return;
15166         }
15167     }
15168   else
15169     base = find_replacement (&XEXP (ref, 0));
15170
15171   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15172
15173   /* Handle the case where the address is too complex to be offset by 1.  */
15174   if (GET_CODE (base) == MINUS
15175       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15176     {
15177       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15178
15179       /* Be careful not to destroy OUTVAL.  */
15180       if (reg_overlap_mentioned_p (base_plus, outval))
15181         {
15182           /* Updating base_plus might destroy outval, see if we can
15183              swap the scratch and base_plus.  */
15184           if (!reg_overlap_mentioned_p (scratch, outval))
15185             std::swap (scratch, base_plus);
15186           else
15187             {
15188               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15189
15190               /* Be conservative and copy OUTVAL into the scratch now,
15191                  this should only be necessary if outval is a subreg
15192                  of something larger than a word.  */
15193               /* XXX Might this clobber base?  I can't see how it can,
15194                  since scratch is known to overlap with OUTVAL, and
15195                  must be wider than a word.  */
15196               emit_insn (gen_movhi (scratch_hi, outval));
15197               outval = scratch_hi;
15198             }
15199         }
15200
15201       emit_set_insn (base_plus, base);
15202       base = base_plus;
15203     }
15204   else if (GET_CODE (base) == PLUS)
15205     {
15206       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15207       HOST_WIDE_INT hi, lo;
15208
15209       offset += INTVAL (XEXP (base, 1));
15210       base = XEXP (base, 0);
15211
15212       /* Rework the address into a legal sequence of insns.  */
15213       /* Valid range for lo is -4095 -> 4095 */
15214       lo = (offset >= 0
15215             ? (offset & 0xfff)
15216             : -((-offset) & 0xfff));
15217
15218       /* Corner case, if lo is the max offset then we would be out of range
15219          once we have added the additional 1 below, so bump the msb into the
15220          pre-loading insn(s).  */
15221       if (lo == 4095)
15222         lo &= 0x7ff;
15223
15224       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15225              ^ (HOST_WIDE_INT) 0x80000000)
15226             - (HOST_WIDE_INT) 0x80000000);
15227
15228       gcc_assert (hi + lo == offset);
15229
15230       if (hi != 0)
15231         {
15232           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15233
15234           /* Be careful not to destroy OUTVAL.  */
15235           if (reg_overlap_mentioned_p (base_plus, outval))
15236             {
15237               /* Updating base_plus might destroy outval, see if we
15238                  can swap the scratch and base_plus.  */
15239               if (!reg_overlap_mentioned_p (scratch, outval))
15240                 std::swap (scratch, base_plus);
15241               else
15242                 {
15243                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15244
15245                   /* Be conservative and copy outval into scratch now,
15246                      this should only be necessary if outval is a
15247                      subreg of something larger than a word.  */
15248                   /* XXX Might this clobber base?  I can't see how it
15249                      can, since scratch is known to overlap with
15250                      outval.  */
15251                   emit_insn (gen_movhi (scratch_hi, outval));
15252                   outval = scratch_hi;
15253                 }
15254             }
15255
15256           /* Get the base address; addsi3 knows how to handle constants
15257              that require more than one insn.  */
15258           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15259           base = base_plus;
15260           offset = lo;
15261         }
15262     }
15263
15264   if (BYTES_BIG_ENDIAN)
15265     {
15266       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15267                                          plus_constant (Pmode, base,
15268                                                         offset + 1)),
15269                             gen_lowpart (QImode, outval)));
15270       emit_insn (gen_lshrsi3 (scratch,
15271                               gen_rtx_SUBREG (SImode, outval, 0),
15272                               GEN_INT (8)));
15273       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15274                                                                 offset)),
15275                             gen_lowpart (QImode, scratch)));
15276     }
15277   else
15278     {
15279       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15280                                                                 offset)),
15281                             gen_lowpart (QImode, outval)));
15282       emit_insn (gen_lshrsi3 (scratch,
15283                               gen_rtx_SUBREG (SImode, outval, 0),
15284                               GEN_INT (8)));
15285       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15286                                          plus_constant (Pmode, base,
15287                                                         offset + 1)),
15288                             gen_lowpart (QImode, scratch)));
15289     }
15290 }
15291
15292 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15293    (padded to the size of a word) should be passed in a register.  */
15294
15295 static bool
15296 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15297 {
15298   if (TARGET_AAPCS_BASED)
15299     return must_pass_in_stack_var_size (mode, type);
15300   else
15301     return must_pass_in_stack_var_size_or_pad (mode, type);
15302 }
15303
15304
15305 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15306    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15307    the default.  For AAPCS based ABIs small aggregate types are placed
15308    in the lowest memory address.  */
15309
15310 static pad_direction
15311 arm_function_arg_padding (machine_mode mode, const_tree type)
15312 {
15313   if (!TARGET_AAPCS_BASED)
15314     return default_function_arg_padding (mode, type);
15315
15316   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15317     return PAD_DOWNWARD;
15318
15319   return PAD_UPWARD;
15320 }
15321
15322
15323 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15324    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15325    register has useful data, and return the opposite if the most
15326    significant byte does.  */
15327
15328 bool
15329 arm_pad_reg_upward (machine_mode mode,
15330                     tree type, int first ATTRIBUTE_UNUSED)
15331 {
15332   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15333     {
15334       /* For AAPCS, small aggregates, small fixed-point types,
15335          and small complex types are always padded upwards.  */
15336       if (type)
15337         {
15338           if ((AGGREGATE_TYPE_P (type)
15339                || TREE_CODE (type) == COMPLEX_TYPE
15340                || FIXED_POINT_TYPE_P (type))
15341               && int_size_in_bytes (type) <= 4)
15342             return true;
15343         }
15344       else
15345         {
15346           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15347               && GET_MODE_SIZE (mode) <= 4)
15348             return true;
15349         }
15350     }
15351
15352   /* Otherwise, use default padding.  */
15353   return !BYTES_BIG_ENDIAN;
15354 }
15355
15356 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15357    assuming that the address in the base register is word aligned.  */
15358 bool
15359 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15360 {
15361   HOST_WIDE_INT max_offset;
15362
15363   /* Offset must be a multiple of 4 in Thumb mode.  */
15364   if (TARGET_THUMB2 && ((offset & 3) != 0))
15365     return false;
15366
15367   if (TARGET_THUMB2)
15368     max_offset = 1020;
15369   else if (TARGET_ARM)
15370     max_offset = 255;
15371   else
15372     return false;
15373
15374   return ((offset <= max_offset) && (offset >= -max_offset));
15375 }
15376
15377 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15378    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15379    Assumes that the address in the base register RN is word aligned.  Pattern
15380    guarantees that both memory accesses use the same base register,
15381    the offsets are constants within the range, and the gap between the offsets is 4.
15382    If preload complete then check that registers are legal.  WBACK indicates whether
15383    address is updated.  LOAD indicates whether memory access is load or store.  */
15384 bool
15385 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15386                        bool wback, bool load)
15387 {
15388   unsigned int t, t2, n;
15389
15390   if (!reload_completed)
15391     return true;
15392
15393   if (!offset_ok_for_ldrd_strd (offset))
15394     return false;
15395
15396   t = REGNO (rt);
15397   t2 = REGNO (rt2);
15398   n = REGNO (rn);
15399
15400   if ((TARGET_THUMB2)
15401       && ((wback && (n == t || n == t2))
15402           || (t == SP_REGNUM)
15403           || (t == PC_REGNUM)
15404           || (t2 == SP_REGNUM)
15405           || (t2 == PC_REGNUM)
15406           || (!load && (n == PC_REGNUM))
15407           || (load && (t == t2))
15408           /* Triggers Cortex-M3 LDRD errata.  */
15409           || (!wback && load && fix_cm3_ldrd && (n == t))))
15410     return false;
15411
15412   if ((TARGET_ARM)
15413       && ((wback && (n == t || n == t2))
15414           || (t2 == PC_REGNUM)
15415           || (t % 2 != 0)   /* First destination register is not even.  */
15416           || (t2 != t + 1)
15417           /* PC can be used as base register (for offset addressing only),
15418              but it is depricated.  */
15419           || (n == PC_REGNUM)))
15420     return false;
15421
15422   return true;
15423 }
15424
15425 /* Return true if a 64-bit access with alignment ALIGN and with a
15426    constant offset OFFSET from the base pointer is permitted on this
15427    architecture.  */
15428 static bool
15429 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15430 {
15431   return (unaligned_access
15432           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15433           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15434 }
15435
15436 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15437    operand MEM's address contains an immediate offset from the base
15438    register and has no side effects, in which case it sets BASE,
15439    OFFSET and ALIGN accordingly.  */
15440 static bool
15441 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15442 {
15443   rtx addr;
15444
15445   gcc_assert (base != NULL && offset != NULL);
15446
15447   /* TODO: Handle more general memory operand patterns, such as
15448      PRE_DEC and PRE_INC.  */
15449
15450   if (side_effects_p (mem))
15451     return false;
15452
15453   /* Can't deal with subregs.  */
15454   if (GET_CODE (mem) == SUBREG)
15455     return false;
15456
15457   gcc_assert (MEM_P (mem));
15458
15459   *offset = const0_rtx;
15460   *align = MEM_ALIGN (mem);
15461
15462   addr = XEXP (mem, 0);
15463
15464   /* If addr isn't valid for DImode, then we can't handle it.  */
15465   if (!arm_legitimate_address_p (DImode, addr,
15466                                  reload_in_progress || reload_completed))
15467     return false;
15468
15469   if (REG_P (addr))
15470     {
15471       *base = addr;
15472       return true;
15473     }
15474   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15475     {
15476       *base = XEXP (addr, 0);
15477       *offset = XEXP (addr, 1);
15478       return (REG_P (*base) && CONST_INT_P (*offset));
15479     }
15480
15481   return false;
15482 }
15483
15484 /* Called from a peephole2 to replace two word-size accesses with a
15485    single LDRD/STRD instruction.  Returns true iff we can generate a
15486    new instruction sequence.  That is, both accesses use the same base
15487    register and the gap between constant offsets is 4.  This function
15488    may reorder its operands to match ldrd/strd RTL templates.
15489    OPERANDS are the operands found by the peephole matcher;
15490    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15491    corresponding memory operands.  LOAD indicaates whether the access
15492    is load or store.  CONST_STORE indicates a store of constant
15493    integer values held in OPERANDS[4,5] and assumes that the pattern
15494    is of length 4 insn, for the purpose of checking dead registers.
15495    COMMUTE indicates that register operands may be reordered.  */
15496 bool
15497 gen_operands_ldrd_strd (rtx *operands, bool load,
15498                         bool const_store, bool commute)
15499 {
15500   int nops = 2;
15501   HOST_WIDE_INT offsets[2], offset, align[2];
15502   rtx base = NULL_RTX;
15503   rtx cur_base, cur_offset, tmp;
15504   int i, gap;
15505   HARD_REG_SET regset;
15506
15507   gcc_assert (!const_store || !load);
15508   /* Check that the memory references are immediate offsets from the
15509      same base register.  Extract the base register, the destination
15510      registers, and the corresponding memory offsets.  */
15511   for (i = 0; i < nops; i++)
15512     {
15513       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15514                                  &align[i]))
15515         return false;
15516
15517       if (i == 0)
15518         base = cur_base;
15519       else if (REGNO (base) != REGNO (cur_base))
15520         return false;
15521
15522       offsets[i] = INTVAL (cur_offset);
15523       if (GET_CODE (operands[i]) == SUBREG)
15524         {
15525           tmp = SUBREG_REG (operands[i]);
15526           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15527           operands[i] = tmp;
15528         }
15529     }
15530
15531   /* Make sure there is no dependency between the individual loads.  */
15532   if (load && REGNO (operands[0]) == REGNO (base))
15533     return false; /* RAW */
15534
15535   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15536     return false; /* WAW */
15537
15538   /* If the same input register is used in both stores
15539      when storing different constants, try to find a free register.
15540      For example, the code
15541         mov r0, 0
15542         str r0, [r2]
15543         mov r0, 1
15544         str r0, [r2, #4]
15545      can be transformed into
15546         mov r1, 0
15547         mov r0, 1
15548         strd r1, r0, [r2]
15549      in Thumb mode assuming that r1 is free.
15550      For ARM mode do the same but only if the starting register
15551      can be made to be even.  */
15552   if (const_store
15553       && REGNO (operands[0]) == REGNO (operands[1])
15554       && INTVAL (operands[4]) != INTVAL (operands[5]))
15555     {
15556     if (TARGET_THUMB2)
15557       {
15558         CLEAR_HARD_REG_SET (regset);
15559         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15560         if (tmp == NULL_RTX)
15561           return false;
15562
15563         /* Use the new register in the first load to ensure that
15564            if the original input register is not dead after peephole,
15565            then it will have the correct constant value.  */
15566         operands[0] = tmp;
15567       }
15568     else if (TARGET_ARM)
15569       {
15570         int regno = REGNO (operands[0]);
15571         if (!peep2_reg_dead_p (4, operands[0]))
15572           {
15573             /* When the input register is even and is not dead after the
15574                pattern, it has to hold the second constant but we cannot
15575                form a legal STRD in ARM mode with this register as the second
15576                register.  */
15577             if (regno % 2 == 0)
15578               return false;
15579
15580             /* Is regno-1 free? */
15581             SET_HARD_REG_SET (regset);
15582             CLEAR_HARD_REG_BIT(regset, regno - 1);
15583             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15584             if (tmp == NULL_RTX)
15585               return false;
15586
15587             operands[0] = tmp;
15588           }
15589         else
15590           {
15591             /* Find a DImode register.  */
15592             CLEAR_HARD_REG_SET (regset);
15593             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15594             if (tmp != NULL_RTX)
15595               {
15596                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15597                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15598               }
15599             else
15600               {
15601                 /* Can we use the input register to form a DI register?  */
15602                 SET_HARD_REG_SET (regset);
15603                 CLEAR_HARD_REG_BIT(regset,
15604                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15605                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15606                 if (tmp == NULL_RTX)
15607                   return false;
15608                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15609               }
15610           }
15611
15612         gcc_assert (operands[0] != NULL_RTX);
15613         gcc_assert (operands[1] != NULL_RTX);
15614         gcc_assert (REGNO (operands[0]) % 2 == 0);
15615         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15616       }
15617     }
15618
15619   /* Make sure the instructions are ordered with lower memory access first.  */
15620   if (offsets[0] > offsets[1])
15621     {
15622       gap = offsets[0] - offsets[1];
15623       offset = offsets[1];
15624
15625       /* Swap the instructions such that lower memory is accessed first.  */
15626       std::swap (operands[0], operands[1]);
15627       std::swap (operands[2], operands[3]);
15628       std::swap (align[0], align[1]);
15629       if (const_store)
15630         std::swap (operands[4], operands[5]);
15631     }
15632   else
15633     {
15634       gap = offsets[1] - offsets[0];
15635       offset = offsets[0];
15636     }
15637
15638   /* Make sure accesses are to consecutive memory locations.  */
15639   if (gap != 4)
15640     return false;
15641
15642   if (!align_ok_ldrd_strd (align[0], offset))
15643     return false;
15644
15645   /* Make sure we generate legal instructions.  */
15646   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15647                              false, load))
15648     return true;
15649
15650   /* In Thumb state, where registers are almost unconstrained, there
15651      is little hope to fix it.  */
15652   if (TARGET_THUMB2)
15653     return false;
15654
15655   if (load && commute)
15656     {
15657       /* Try reordering registers.  */
15658       std::swap (operands[0], operands[1]);
15659       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15660                                  false, load))
15661         return true;
15662     }
15663
15664   if (const_store)
15665     {
15666       /* If input registers are dead after this pattern, they can be
15667          reordered or replaced by other registers that are free in the
15668          current pattern.  */
15669       if (!peep2_reg_dead_p (4, operands[0])
15670           || !peep2_reg_dead_p (4, operands[1]))
15671         return false;
15672
15673       /* Try to reorder the input registers.  */
15674       /* For example, the code
15675            mov r0, 0
15676            mov r1, 1
15677            str r1, [r2]
15678            str r0, [r2, #4]
15679          can be transformed into
15680            mov r1, 0
15681            mov r0, 1
15682            strd r0, [r2]
15683       */
15684       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15685                                   false, false))
15686         {
15687           std::swap (operands[0], operands[1]);
15688           return true;
15689         }
15690
15691       /* Try to find a free DI register.  */
15692       CLEAR_HARD_REG_SET (regset);
15693       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15694       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15695       while (true)
15696         {
15697           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15698           if (tmp == NULL_RTX)
15699             return false;
15700
15701           /* DREG must be an even-numbered register in DImode.
15702              Split it into SI registers.  */
15703           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15704           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15705           gcc_assert (operands[0] != NULL_RTX);
15706           gcc_assert (operands[1] != NULL_RTX);
15707           gcc_assert (REGNO (operands[0]) % 2 == 0);
15708           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15709
15710           return (operands_ok_ldrd_strd (operands[0], operands[1],
15711                                          base, offset,
15712                                          false, load));
15713         }
15714     }
15715
15716   return false;
15717 }
15718
15719
15720
15721 \f
15722 /* Print a symbolic form of X to the debug file, F.  */
15723 static void
15724 arm_print_value (FILE *f, rtx x)
15725 {
15726   switch (GET_CODE (x))
15727     {
15728     case CONST_INT:
15729       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15730       return;
15731
15732     case CONST_DOUBLE:
15733       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15734       return;
15735
15736     case CONST_VECTOR:
15737       {
15738         int i;
15739
15740         fprintf (f, "<");
15741         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15742           {
15743             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15744             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15745               fputc (',', f);
15746           }
15747         fprintf (f, ">");
15748       }
15749       return;
15750
15751     case CONST_STRING:
15752       fprintf (f, "\"%s\"", XSTR (x, 0));
15753       return;
15754
15755     case SYMBOL_REF:
15756       fprintf (f, "`%s'", XSTR (x, 0));
15757       return;
15758
15759     case LABEL_REF:
15760       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15761       return;
15762
15763     case CONST:
15764       arm_print_value (f, XEXP (x, 0));
15765       return;
15766
15767     case PLUS:
15768       arm_print_value (f, XEXP (x, 0));
15769       fprintf (f, "+");
15770       arm_print_value (f, XEXP (x, 1));
15771       return;
15772
15773     case PC:
15774       fprintf (f, "pc");
15775       return;
15776
15777     default:
15778       fprintf (f, "????");
15779       return;
15780     }
15781 }
15782 \f
15783 /* Routines for manipulation of the constant pool.  */
15784
15785 /* Arm instructions cannot load a large constant directly into a
15786    register; they have to come from a pc relative load.  The constant
15787    must therefore be placed in the addressable range of the pc
15788    relative load.  Depending on the precise pc relative load
15789    instruction the range is somewhere between 256 bytes and 4k.  This
15790    means that we often have to dump a constant inside a function, and
15791    generate code to branch around it.
15792
15793    It is important to minimize this, since the branches will slow
15794    things down and make the code larger.
15795
15796    Normally we can hide the table after an existing unconditional
15797    branch so that there is no interruption of the flow, but in the
15798    worst case the code looks like this:
15799
15800         ldr     rn, L1
15801         ...
15802         b       L2
15803         align
15804         L1:     .long value
15805         L2:
15806         ...
15807
15808         ldr     rn, L3
15809         ...
15810         b       L4
15811         align
15812         L3:     .long value
15813         L4:
15814         ...
15815
15816    We fix this by performing a scan after scheduling, which notices
15817    which instructions need to have their operands fetched from the
15818    constant table and builds the table.
15819
15820    The algorithm starts by building a table of all the constants that
15821    need fixing up and all the natural barriers in the function (places
15822    where a constant table can be dropped without breaking the flow).
15823    For each fixup we note how far the pc-relative replacement will be
15824    able to reach and the offset of the instruction into the function.
15825
15826    Having built the table we then group the fixes together to form
15827    tables that are as large as possible (subject to addressing
15828    constraints) and emit each table of constants after the last
15829    barrier that is within range of all the instructions in the group.
15830    If a group does not contain a barrier, then we forcibly create one
15831    by inserting a jump instruction into the flow.  Once the table has
15832    been inserted, the insns are then modified to reference the
15833    relevant entry in the pool.
15834
15835    Possible enhancements to the algorithm (not implemented) are:
15836
15837    1) For some processors and object formats, there may be benefit in
15838    aligning the pools to the start of cache lines; this alignment
15839    would need to be taken into account when calculating addressability
15840    of a pool.  */
15841
15842 /* These typedefs are located at the start of this file, so that
15843    they can be used in the prototypes there.  This comment is to
15844    remind readers of that fact so that the following structures
15845    can be understood more easily.
15846
15847      typedef struct minipool_node    Mnode;
15848      typedef struct minipool_fixup   Mfix;  */
15849
15850 struct minipool_node
15851 {
15852   /* Doubly linked chain of entries.  */
15853   Mnode * next;
15854   Mnode * prev;
15855   /* The maximum offset into the code that this entry can be placed.  While
15856      pushing fixes for forward references, all entries are sorted in order
15857      of increasing max_address.  */
15858   HOST_WIDE_INT max_address;
15859   /* Similarly for an entry inserted for a backwards ref.  */
15860   HOST_WIDE_INT min_address;
15861   /* The number of fixes referencing this entry.  This can become zero
15862      if we "unpush" an entry.  In this case we ignore the entry when we
15863      come to emit the code.  */
15864   int refcount;
15865   /* The offset from the start of the minipool.  */
15866   HOST_WIDE_INT offset;
15867   /* The value in table.  */
15868   rtx value;
15869   /* The mode of value.  */
15870   machine_mode mode;
15871   /* The size of the value.  With iWMMXt enabled
15872      sizes > 4 also imply an alignment of 8-bytes.  */
15873   int fix_size;
15874 };
15875
15876 struct minipool_fixup
15877 {
15878   Mfix *            next;
15879   rtx_insn *        insn;
15880   HOST_WIDE_INT     address;
15881   rtx *             loc;
15882   machine_mode mode;
15883   int               fix_size;
15884   rtx               value;
15885   Mnode *           minipool;
15886   HOST_WIDE_INT     forwards;
15887   HOST_WIDE_INT     backwards;
15888 };
15889
15890 /* Fixes less than a word need padding out to a word boundary.  */
15891 #define MINIPOOL_FIX_SIZE(mode) \
15892   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15893
15894 static Mnode *  minipool_vector_head;
15895 static Mnode *  minipool_vector_tail;
15896 static rtx_code_label   *minipool_vector_label;
15897 static int      minipool_pad;
15898
15899 /* The linked list of all minipool fixes required for this function.  */
15900 Mfix *          minipool_fix_head;
15901 Mfix *          minipool_fix_tail;
15902 /* The fix entry for the current minipool, once it has been placed.  */
15903 Mfix *          minipool_barrier;
15904
15905 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15906 #define JUMP_TABLES_IN_TEXT_SECTION 0
15907 #endif
15908
15909 static HOST_WIDE_INT
15910 get_jump_table_size (rtx_jump_table_data *insn)
15911 {
15912   /* ADDR_VECs only take room if read-only data does into the text
15913      section.  */
15914   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15915     {
15916       rtx body = PATTERN (insn);
15917       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15918       HOST_WIDE_INT size;
15919       HOST_WIDE_INT modesize;
15920
15921       modesize = GET_MODE_SIZE (GET_MODE (body));
15922       size = modesize * XVECLEN (body, elt);
15923       switch (modesize)
15924         {
15925         case 1:
15926           /* Round up size  of TBB table to a halfword boundary.  */
15927           size = (size + 1) & ~HOST_WIDE_INT_1;
15928           break;
15929         case 2:
15930           /* No padding necessary for TBH.  */
15931           break;
15932         case 4:
15933           /* Add two bytes for alignment on Thumb.  */
15934           if (TARGET_THUMB)
15935             size += 2;
15936           break;
15937         default:
15938           gcc_unreachable ();
15939         }
15940       return size;
15941     }
15942
15943   return 0;
15944 }
15945
15946 /* Return the maximum amount of padding that will be inserted before
15947    label LABEL.  */
15948
15949 static HOST_WIDE_INT
15950 get_label_padding (rtx label)
15951 {
15952   HOST_WIDE_INT align, min_insn_size;
15953
15954   align = 1 << label_to_alignment (label);
15955   min_insn_size = TARGET_THUMB ? 2 : 4;
15956   return align > min_insn_size ? align - min_insn_size : 0;
15957 }
15958
15959 /* Move a minipool fix MP from its current location to before MAX_MP.
15960    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15961    constraints may need updating.  */
15962 static Mnode *
15963 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15964                                HOST_WIDE_INT max_address)
15965 {
15966   /* The code below assumes these are different.  */
15967   gcc_assert (mp != max_mp);
15968
15969   if (max_mp == NULL)
15970     {
15971       if (max_address < mp->max_address)
15972         mp->max_address = max_address;
15973     }
15974   else
15975     {
15976       if (max_address > max_mp->max_address - mp->fix_size)
15977         mp->max_address = max_mp->max_address - mp->fix_size;
15978       else
15979         mp->max_address = max_address;
15980
15981       /* Unlink MP from its current position.  Since max_mp is non-null,
15982        mp->prev must be non-null.  */
15983       mp->prev->next = mp->next;
15984       if (mp->next != NULL)
15985         mp->next->prev = mp->prev;
15986       else
15987         minipool_vector_tail = mp->prev;
15988
15989       /* Re-insert it before MAX_MP.  */
15990       mp->next = max_mp;
15991       mp->prev = max_mp->prev;
15992       max_mp->prev = mp;
15993
15994       if (mp->prev != NULL)
15995         mp->prev->next = mp;
15996       else
15997         minipool_vector_head = mp;
15998     }
15999
16000   /* Save the new entry.  */
16001   max_mp = mp;
16002
16003   /* Scan over the preceding entries and adjust their addresses as
16004      required.  */
16005   while (mp->prev != NULL
16006          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16007     {
16008       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16009       mp = mp->prev;
16010     }
16011
16012   return max_mp;
16013 }
16014
16015 /* Add a constant to the minipool for a forward reference.  Returns the
16016    node added or NULL if the constant will not fit in this pool.  */
16017 static Mnode *
16018 add_minipool_forward_ref (Mfix *fix)
16019 {
16020   /* If set, max_mp is the first pool_entry that has a lower
16021      constraint than the one we are trying to add.  */
16022   Mnode *       max_mp = NULL;
16023   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16024   Mnode *       mp;
16025
16026   /* If the minipool starts before the end of FIX->INSN then this FIX
16027      can not be placed into the current pool.  Furthermore, adding the
16028      new constant pool entry may cause the pool to start FIX_SIZE bytes
16029      earlier.  */
16030   if (minipool_vector_head &&
16031       (fix->address + get_attr_length (fix->insn)
16032        >= minipool_vector_head->max_address - fix->fix_size))
16033     return NULL;
16034
16035   /* Scan the pool to see if a constant with the same value has
16036      already been added.  While we are doing this, also note the
16037      location where we must insert the constant if it doesn't already
16038      exist.  */
16039   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16040     {
16041       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16042           && fix->mode == mp->mode
16043           && (!LABEL_P (fix->value)
16044               || (CODE_LABEL_NUMBER (fix->value)
16045                   == CODE_LABEL_NUMBER (mp->value)))
16046           && rtx_equal_p (fix->value, mp->value))
16047         {
16048           /* More than one fix references this entry.  */
16049           mp->refcount++;
16050           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16051         }
16052
16053       /* Note the insertion point if necessary.  */
16054       if (max_mp == NULL
16055           && mp->max_address > max_address)
16056         max_mp = mp;
16057
16058       /* If we are inserting an 8-bytes aligned quantity and
16059          we have not already found an insertion point, then
16060          make sure that all such 8-byte aligned quantities are
16061          placed at the start of the pool.  */
16062       if (ARM_DOUBLEWORD_ALIGN
16063           && max_mp == NULL
16064           && fix->fix_size >= 8
16065           && mp->fix_size < 8)
16066         {
16067           max_mp = mp;
16068           max_address = mp->max_address;
16069         }
16070     }
16071
16072   /* The value is not currently in the minipool, so we need to create
16073      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16074      the end of the list since the placement is less constrained than
16075      any existing entry.  Otherwise, we insert the new fix before
16076      MAX_MP and, if necessary, adjust the constraints on the other
16077      entries.  */
16078   mp = XNEW (Mnode);
16079   mp->fix_size = fix->fix_size;
16080   mp->mode = fix->mode;
16081   mp->value = fix->value;
16082   mp->refcount = 1;
16083   /* Not yet required for a backwards ref.  */
16084   mp->min_address = -65536;
16085
16086   if (max_mp == NULL)
16087     {
16088       mp->max_address = max_address;
16089       mp->next = NULL;
16090       mp->prev = minipool_vector_tail;
16091
16092       if (mp->prev == NULL)
16093         {
16094           minipool_vector_head = mp;
16095           minipool_vector_label = gen_label_rtx ();
16096         }
16097       else
16098         mp->prev->next = mp;
16099
16100       minipool_vector_tail = mp;
16101     }
16102   else
16103     {
16104       if (max_address > max_mp->max_address - mp->fix_size)
16105         mp->max_address = max_mp->max_address - mp->fix_size;
16106       else
16107         mp->max_address = max_address;
16108
16109       mp->next = max_mp;
16110       mp->prev = max_mp->prev;
16111       max_mp->prev = mp;
16112       if (mp->prev != NULL)
16113         mp->prev->next = mp;
16114       else
16115         minipool_vector_head = mp;
16116     }
16117
16118   /* Save the new entry.  */
16119   max_mp = mp;
16120
16121   /* Scan over the preceding entries and adjust their addresses as
16122      required.  */
16123   while (mp->prev != NULL
16124          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16125     {
16126       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16127       mp = mp->prev;
16128     }
16129
16130   return max_mp;
16131 }
16132
16133 static Mnode *
16134 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16135                                 HOST_WIDE_INT  min_address)
16136 {
16137   HOST_WIDE_INT offset;
16138
16139   /* The code below assumes these are different.  */
16140   gcc_assert (mp != min_mp);
16141
16142   if (min_mp == NULL)
16143     {
16144       if (min_address > mp->min_address)
16145         mp->min_address = min_address;
16146     }
16147   else
16148     {
16149       /* We will adjust this below if it is too loose.  */
16150       mp->min_address = min_address;
16151
16152       /* Unlink MP from its current position.  Since min_mp is non-null,
16153          mp->next must be non-null.  */
16154       mp->next->prev = mp->prev;
16155       if (mp->prev != NULL)
16156         mp->prev->next = mp->next;
16157       else
16158         minipool_vector_head = mp->next;
16159
16160       /* Reinsert it after MIN_MP.  */
16161       mp->prev = min_mp;
16162       mp->next = min_mp->next;
16163       min_mp->next = mp;
16164       if (mp->next != NULL)
16165         mp->next->prev = mp;
16166       else
16167         minipool_vector_tail = mp;
16168     }
16169
16170   min_mp = mp;
16171
16172   offset = 0;
16173   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16174     {
16175       mp->offset = offset;
16176       if (mp->refcount > 0)
16177         offset += mp->fix_size;
16178
16179       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16180         mp->next->min_address = mp->min_address + mp->fix_size;
16181     }
16182
16183   return min_mp;
16184 }
16185
16186 /* Add a constant to the minipool for a backward reference.  Returns the
16187    node added or NULL if the constant will not fit in this pool.
16188
16189    Note that the code for insertion for a backwards reference can be
16190    somewhat confusing because the calculated offsets for each fix do
16191    not take into account the size of the pool (which is still under
16192    construction.  */
16193 static Mnode *
16194 add_minipool_backward_ref (Mfix *fix)
16195 {
16196   /* If set, min_mp is the last pool_entry that has a lower constraint
16197      than the one we are trying to add.  */
16198   Mnode *min_mp = NULL;
16199   /* This can be negative, since it is only a constraint.  */
16200   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16201   Mnode *mp;
16202
16203   /* If we can't reach the current pool from this insn, or if we can't
16204      insert this entry at the end of the pool without pushing other
16205      fixes out of range, then we don't try.  This ensures that we
16206      can't fail later on.  */
16207   if (min_address >= minipool_barrier->address
16208       || (minipool_vector_tail->min_address + fix->fix_size
16209           >= minipool_barrier->address))
16210     return NULL;
16211
16212   /* Scan the pool to see if a constant with the same value has
16213      already been added.  While we are doing this, also note the
16214      location where we must insert the constant if it doesn't already
16215      exist.  */
16216   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16217     {
16218       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16219           && fix->mode == mp->mode
16220           && (!LABEL_P (fix->value)
16221               || (CODE_LABEL_NUMBER (fix->value)
16222                   == CODE_LABEL_NUMBER (mp->value)))
16223           && rtx_equal_p (fix->value, mp->value)
16224           /* Check that there is enough slack to move this entry to the
16225              end of the table (this is conservative).  */
16226           && (mp->max_address
16227               > (minipool_barrier->address
16228                  + minipool_vector_tail->offset
16229                  + minipool_vector_tail->fix_size)))
16230         {
16231           mp->refcount++;
16232           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16233         }
16234
16235       if (min_mp != NULL)
16236         mp->min_address += fix->fix_size;
16237       else
16238         {
16239           /* Note the insertion point if necessary.  */
16240           if (mp->min_address < min_address)
16241             {
16242               /* For now, we do not allow the insertion of 8-byte alignment
16243                  requiring nodes anywhere but at the start of the pool.  */
16244               if (ARM_DOUBLEWORD_ALIGN
16245                   && fix->fix_size >= 8 && mp->fix_size < 8)
16246                 return NULL;
16247               else
16248                 min_mp = mp;
16249             }
16250           else if (mp->max_address
16251                    < minipool_barrier->address + mp->offset + fix->fix_size)
16252             {
16253               /* Inserting before this entry would push the fix beyond
16254                  its maximum address (which can happen if we have
16255                  re-located a forwards fix); force the new fix to come
16256                  after it.  */
16257               if (ARM_DOUBLEWORD_ALIGN
16258                   && fix->fix_size >= 8 && mp->fix_size < 8)
16259                 return NULL;
16260               else
16261                 {
16262                   min_mp = mp;
16263                   min_address = mp->min_address + fix->fix_size;
16264                 }
16265             }
16266           /* Do not insert a non-8-byte aligned quantity before 8-byte
16267              aligned quantities.  */
16268           else if (ARM_DOUBLEWORD_ALIGN
16269                    && fix->fix_size < 8
16270                    && mp->fix_size >= 8)
16271             {
16272               min_mp = mp;
16273               min_address = mp->min_address + fix->fix_size;
16274             }
16275         }
16276     }
16277
16278   /* We need to create a new entry.  */
16279   mp = XNEW (Mnode);
16280   mp->fix_size = fix->fix_size;
16281   mp->mode = fix->mode;
16282   mp->value = fix->value;
16283   mp->refcount = 1;
16284   mp->max_address = minipool_barrier->address + 65536;
16285
16286   mp->min_address = min_address;
16287
16288   if (min_mp == NULL)
16289     {
16290       mp->prev = NULL;
16291       mp->next = minipool_vector_head;
16292
16293       if (mp->next == NULL)
16294         {
16295           minipool_vector_tail = mp;
16296           minipool_vector_label = gen_label_rtx ();
16297         }
16298       else
16299         mp->next->prev = mp;
16300
16301       minipool_vector_head = mp;
16302     }
16303   else
16304     {
16305       mp->next = min_mp->next;
16306       mp->prev = min_mp;
16307       min_mp->next = mp;
16308
16309       if (mp->next != NULL)
16310         mp->next->prev = mp;
16311       else
16312         minipool_vector_tail = mp;
16313     }
16314
16315   /* Save the new entry.  */
16316   min_mp = mp;
16317
16318   if (mp->prev)
16319     mp = mp->prev;
16320   else
16321     mp->offset = 0;
16322
16323   /* Scan over the following entries and adjust their offsets.  */
16324   while (mp->next != NULL)
16325     {
16326       if (mp->next->min_address < mp->min_address + mp->fix_size)
16327         mp->next->min_address = mp->min_address + mp->fix_size;
16328
16329       if (mp->refcount)
16330         mp->next->offset = mp->offset + mp->fix_size;
16331       else
16332         mp->next->offset = mp->offset;
16333
16334       mp = mp->next;
16335     }
16336
16337   return min_mp;
16338 }
16339
16340 static void
16341 assign_minipool_offsets (Mfix *barrier)
16342 {
16343   HOST_WIDE_INT offset = 0;
16344   Mnode *mp;
16345
16346   minipool_barrier = barrier;
16347
16348   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16349     {
16350       mp->offset = offset;
16351
16352       if (mp->refcount > 0)
16353         offset += mp->fix_size;
16354     }
16355 }
16356
16357 /* Output the literal table */
16358 static void
16359 dump_minipool (rtx_insn *scan)
16360 {
16361   Mnode * mp;
16362   Mnode * nmp;
16363   int align64 = 0;
16364
16365   if (ARM_DOUBLEWORD_ALIGN)
16366     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16367       if (mp->refcount > 0 && mp->fix_size >= 8)
16368         {
16369           align64 = 1;
16370           break;
16371         }
16372
16373   if (dump_file)
16374     fprintf (dump_file,
16375              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16376              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16377
16378   scan = emit_label_after (gen_label_rtx (), scan);
16379   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16380   scan = emit_label_after (minipool_vector_label, scan);
16381
16382   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16383     {
16384       if (mp->refcount > 0)
16385         {
16386           if (dump_file)
16387             {
16388               fprintf (dump_file,
16389                        ";;  Offset %u, min %ld, max %ld ",
16390                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16391                        (unsigned long) mp->max_address);
16392               arm_print_value (dump_file, mp->value);
16393               fputc ('\n', dump_file);
16394             }
16395
16396           rtx val = copy_rtx (mp->value);
16397
16398           switch (GET_MODE_SIZE (mp->mode))
16399             {
16400 #ifdef HAVE_consttable_1
16401             case 1:
16402               scan = emit_insn_after (gen_consttable_1 (val), scan);
16403               break;
16404
16405 #endif
16406 #ifdef HAVE_consttable_2
16407             case 2:
16408               scan = emit_insn_after (gen_consttable_2 (val), scan);
16409               break;
16410
16411 #endif
16412 #ifdef HAVE_consttable_4
16413             case 4:
16414               scan = emit_insn_after (gen_consttable_4 (val), scan);
16415               break;
16416
16417 #endif
16418 #ifdef HAVE_consttable_8
16419             case 8:
16420               scan = emit_insn_after (gen_consttable_8 (val), scan);
16421               break;
16422
16423 #endif
16424 #ifdef HAVE_consttable_16
16425             case 16:
16426               scan = emit_insn_after (gen_consttable_16 (val), scan);
16427               break;
16428
16429 #endif
16430             default:
16431               gcc_unreachable ();
16432             }
16433         }
16434
16435       nmp = mp->next;
16436       free (mp);
16437     }
16438
16439   minipool_vector_head = minipool_vector_tail = NULL;
16440   scan = emit_insn_after (gen_consttable_end (), scan);
16441   scan = emit_barrier_after (scan);
16442 }
16443
16444 /* Return the cost of forcibly inserting a barrier after INSN.  */
16445 static int
16446 arm_barrier_cost (rtx_insn *insn)
16447 {
16448   /* Basing the location of the pool on the loop depth is preferable,
16449      but at the moment, the basic block information seems to be
16450      corrupt by this stage of the compilation.  */
16451   int base_cost = 50;
16452   rtx_insn *next = next_nonnote_insn (insn);
16453
16454   if (next != NULL && LABEL_P (next))
16455     base_cost -= 20;
16456
16457   switch (GET_CODE (insn))
16458     {
16459     case CODE_LABEL:
16460       /* It will always be better to place the table before the label, rather
16461          than after it.  */
16462       return 50;
16463
16464     case INSN:
16465     case CALL_INSN:
16466       return base_cost;
16467
16468     case JUMP_INSN:
16469       return base_cost - 10;
16470
16471     default:
16472       return base_cost + 10;
16473     }
16474 }
16475
16476 /* Find the best place in the insn stream in the range
16477    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16478    Create the barrier by inserting a jump and add a new fix entry for
16479    it.  */
16480 static Mfix *
16481 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16482 {
16483   HOST_WIDE_INT count = 0;
16484   rtx_barrier *barrier;
16485   rtx_insn *from = fix->insn;
16486   /* The instruction after which we will insert the jump.  */
16487   rtx_insn *selected = NULL;
16488   int selected_cost;
16489   /* The address at which the jump instruction will be placed.  */
16490   HOST_WIDE_INT selected_address;
16491   Mfix * new_fix;
16492   HOST_WIDE_INT max_count = max_address - fix->address;
16493   rtx_code_label *label = gen_label_rtx ();
16494
16495   selected_cost = arm_barrier_cost (from);
16496   selected_address = fix->address;
16497
16498   while (from && count < max_count)
16499     {
16500       rtx_jump_table_data *tmp;
16501       int new_cost;
16502
16503       /* This code shouldn't have been called if there was a natural barrier
16504          within range.  */
16505       gcc_assert (!BARRIER_P (from));
16506
16507       /* Count the length of this insn.  This must stay in sync with the
16508          code that pushes minipool fixes.  */
16509       if (LABEL_P (from))
16510         count += get_label_padding (from);
16511       else
16512         count += get_attr_length (from);
16513
16514       /* If there is a jump table, add its length.  */
16515       if (tablejump_p (from, NULL, &tmp))
16516         {
16517           count += get_jump_table_size (tmp);
16518
16519           /* Jump tables aren't in a basic block, so base the cost on
16520              the dispatch insn.  If we select this location, we will
16521              still put the pool after the table.  */
16522           new_cost = arm_barrier_cost (from);
16523
16524           if (count < max_count
16525               && (!selected || new_cost <= selected_cost))
16526             {
16527               selected = tmp;
16528               selected_cost = new_cost;
16529               selected_address = fix->address + count;
16530             }
16531
16532           /* Continue after the dispatch table.  */
16533           from = NEXT_INSN (tmp);
16534           continue;
16535         }
16536
16537       new_cost = arm_barrier_cost (from);
16538
16539       if (count < max_count
16540           && (!selected || new_cost <= selected_cost))
16541         {
16542           selected = from;
16543           selected_cost = new_cost;
16544           selected_address = fix->address + count;
16545         }
16546
16547       from = NEXT_INSN (from);
16548     }
16549
16550   /* Make sure that we found a place to insert the jump.  */
16551   gcc_assert (selected);
16552
16553   /* Make sure we do not split a call and its corresponding
16554      CALL_ARG_LOCATION note.  */
16555   if (CALL_P (selected))
16556     {
16557       rtx_insn *next = NEXT_INSN (selected);
16558       if (next && NOTE_P (next)
16559           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16560           selected = next;
16561     }
16562
16563   /* Create a new JUMP_INSN that branches around a barrier.  */
16564   from = emit_jump_insn_after (gen_jump (label), selected);
16565   JUMP_LABEL (from) = label;
16566   barrier = emit_barrier_after (from);
16567   emit_label_after (label, barrier);
16568
16569   /* Create a minipool barrier entry for the new barrier.  */
16570   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16571   new_fix->insn = barrier;
16572   new_fix->address = selected_address;
16573   new_fix->next = fix->next;
16574   fix->next = new_fix;
16575
16576   return new_fix;
16577 }
16578
16579 /* Record that there is a natural barrier in the insn stream at
16580    ADDRESS.  */
16581 static void
16582 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16583 {
16584   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16585
16586   fix->insn = insn;
16587   fix->address = address;
16588
16589   fix->next = NULL;
16590   if (minipool_fix_head != NULL)
16591     minipool_fix_tail->next = fix;
16592   else
16593     minipool_fix_head = fix;
16594
16595   minipool_fix_tail = fix;
16596 }
16597
16598 /* Record INSN, which will need fixing up to load a value from the
16599    minipool.  ADDRESS is the offset of the insn since the start of the
16600    function; LOC is a pointer to the part of the insn which requires
16601    fixing; VALUE is the constant that must be loaded, which is of type
16602    MODE.  */
16603 static void
16604 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16605                    machine_mode mode, rtx value)
16606 {
16607   gcc_assert (!arm_disable_literal_pool);
16608   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16609
16610   fix->insn = insn;
16611   fix->address = address;
16612   fix->loc = loc;
16613   fix->mode = mode;
16614   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16615   fix->value = value;
16616   fix->forwards = get_attr_pool_range (insn);
16617   fix->backwards = get_attr_neg_pool_range (insn);
16618   fix->minipool = NULL;
16619
16620   /* If an insn doesn't have a range defined for it, then it isn't
16621      expecting to be reworked by this code.  Better to stop now than
16622      to generate duff assembly code.  */
16623   gcc_assert (fix->forwards || fix->backwards);
16624
16625   /* If an entry requires 8-byte alignment then assume all constant pools
16626      require 4 bytes of padding.  Trying to do this later on a per-pool
16627      basis is awkward because existing pool entries have to be modified.  */
16628   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16629     minipool_pad = 4;
16630
16631   if (dump_file)
16632     {
16633       fprintf (dump_file,
16634                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16635                GET_MODE_NAME (mode),
16636                INSN_UID (insn), (unsigned long) address,
16637                -1 * (long)fix->backwards, (long)fix->forwards);
16638       arm_print_value (dump_file, fix->value);
16639       fprintf (dump_file, "\n");
16640     }
16641
16642   /* Add it to the chain of fixes.  */
16643   fix->next = NULL;
16644
16645   if (minipool_fix_head != NULL)
16646     minipool_fix_tail->next = fix;
16647   else
16648     minipool_fix_head = fix;
16649
16650   minipool_fix_tail = fix;
16651 }
16652
16653 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16654    Returns the number of insns needed, or 99 if we always want to synthesize
16655    the value.  */
16656 int
16657 arm_max_const_double_inline_cost ()
16658 {
16659   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16660 }
16661
16662 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16663    Returns the number of insns needed, or 99 if we don't know how to
16664    do it.  */
16665 int
16666 arm_const_double_inline_cost (rtx val)
16667 {
16668   rtx lowpart, highpart;
16669   machine_mode mode;
16670
16671   mode = GET_MODE (val);
16672
16673   if (mode == VOIDmode)
16674     mode = DImode;
16675
16676   gcc_assert (GET_MODE_SIZE (mode) == 8);
16677
16678   lowpart = gen_lowpart (SImode, val);
16679   highpart = gen_highpart_mode (SImode, mode, val);
16680
16681   gcc_assert (CONST_INT_P (lowpart));
16682   gcc_assert (CONST_INT_P (highpart));
16683
16684   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16685                             NULL_RTX, NULL_RTX, 0, 0)
16686           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16687                               NULL_RTX, NULL_RTX, 0, 0));
16688 }
16689
16690 /* Cost of loading a SImode constant.  */
16691 static inline int
16692 arm_const_inline_cost (enum rtx_code code, rtx val)
16693 {
16694   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16695                            NULL_RTX, NULL_RTX, 1, 0);
16696 }
16697
16698 /* Return true if it is worthwhile to split a 64-bit constant into two
16699    32-bit operations.  This is the case if optimizing for size, or
16700    if we have load delay slots, or if one 32-bit part can be done with
16701    a single data operation.  */
16702 bool
16703 arm_const_double_by_parts (rtx val)
16704 {
16705   machine_mode mode = GET_MODE (val);
16706   rtx part;
16707
16708   if (optimize_size || arm_ld_sched)
16709     return true;
16710
16711   if (mode == VOIDmode)
16712     mode = DImode;
16713
16714   part = gen_highpart_mode (SImode, mode, val);
16715
16716   gcc_assert (CONST_INT_P (part));
16717
16718   if (const_ok_for_arm (INTVAL (part))
16719       || const_ok_for_arm (~INTVAL (part)))
16720     return true;
16721
16722   part = gen_lowpart (SImode, val);
16723
16724   gcc_assert (CONST_INT_P (part));
16725
16726   if (const_ok_for_arm (INTVAL (part))
16727       || const_ok_for_arm (~INTVAL (part)))
16728     return true;
16729
16730   return false;
16731 }
16732
16733 /* Return true if it is possible to inline both the high and low parts
16734    of a 64-bit constant into 32-bit data processing instructions.  */
16735 bool
16736 arm_const_double_by_immediates (rtx val)
16737 {
16738   machine_mode mode = GET_MODE (val);
16739   rtx part;
16740
16741   if (mode == VOIDmode)
16742     mode = DImode;
16743
16744   part = gen_highpart_mode (SImode, mode, val);
16745
16746   gcc_assert (CONST_INT_P (part));
16747
16748   if (!const_ok_for_arm (INTVAL (part)))
16749     return false;
16750
16751   part = gen_lowpart (SImode, val);
16752
16753   gcc_assert (CONST_INT_P (part));
16754
16755   if (!const_ok_for_arm (INTVAL (part)))
16756     return false;
16757
16758   return true;
16759 }
16760
16761 /* Scan INSN and note any of its operands that need fixing.
16762    If DO_PUSHES is false we do not actually push any of the fixups
16763    needed.  */
16764 static void
16765 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16766 {
16767   int opno;
16768
16769   extract_constrain_insn (insn);
16770
16771   if (recog_data.n_alternatives == 0)
16772     return;
16773
16774   /* Fill in recog_op_alt with information about the constraints of
16775      this insn.  */
16776   preprocess_constraints (insn);
16777
16778   const operand_alternative *op_alt = which_op_alt ();
16779   for (opno = 0; opno < recog_data.n_operands; opno++)
16780     {
16781       /* Things we need to fix can only occur in inputs.  */
16782       if (recog_data.operand_type[opno] != OP_IN)
16783         continue;
16784
16785       /* If this alternative is a memory reference, then any mention
16786          of constants in this alternative is really to fool reload
16787          into allowing us to accept one there.  We need to fix them up
16788          now so that we output the right code.  */
16789       if (op_alt[opno].memory_ok)
16790         {
16791           rtx op = recog_data.operand[opno];
16792
16793           if (CONSTANT_P (op))
16794             {
16795               if (do_pushes)
16796                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16797                                    recog_data.operand_mode[opno], op);
16798             }
16799           else if (MEM_P (op)
16800                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16801                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16802             {
16803               if (do_pushes)
16804                 {
16805                   rtx cop = avoid_constant_pool_reference (op);
16806
16807                   /* Casting the address of something to a mode narrower
16808                      than a word can cause avoid_constant_pool_reference()
16809                      to return the pool reference itself.  That's no good to
16810                      us here.  Lets just hope that we can use the
16811                      constant pool value directly.  */
16812                   if (op == cop)
16813                     cop = get_pool_constant (XEXP (op, 0));
16814
16815                   push_minipool_fix (insn, address,
16816                                      recog_data.operand_loc[opno],
16817                                      recog_data.operand_mode[opno], cop);
16818                 }
16819
16820             }
16821         }
16822     }
16823
16824   return;
16825 }
16826
16827 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16828    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16829    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16830    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16831    or four masks, depending on whether it is being computed for a
16832    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16833    respectively.  The tree for the type of the argument or a field within an
16834    argument is passed in ARG_TYPE, the current register this argument or field
16835    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16836    argument or field starts at is passed in STARTING_BIT and the last used bit
16837    is kept in LAST_USED_BIT which is also updated accordingly.  */
16838
16839 static unsigned HOST_WIDE_INT
16840 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16841                                uint32_t * padding_bits_to_clear,
16842                                unsigned starting_bit, int * last_used_bit)
16843
16844 {
16845   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16846
16847   if (TREE_CODE (arg_type) == RECORD_TYPE)
16848     {
16849       unsigned current_bit = starting_bit;
16850       tree field;
16851       long int offset, size;
16852
16853
16854       field = TYPE_FIELDS (arg_type);
16855       while (field)
16856         {
16857           /* The offset within a structure is always an offset from
16858              the start of that structure.  Make sure we take that into the
16859              calculation of the register based offset that we use here.  */
16860           offset = starting_bit;
16861           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16862           offset %= 32;
16863
16864           /* This is the actual size of the field, for bitfields this is the
16865              bitfield width and not the container size.  */
16866           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16867
16868           if (*last_used_bit != offset)
16869             {
16870               if (offset < *last_used_bit)
16871                 {
16872                   /* This field's offset is before the 'last_used_bit', that
16873                      means this field goes on the next register.  So we need to
16874                      pad the rest of the current register and increase the
16875                      register number.  */
16876                   uint32_t mask;
16877                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16878                   mask++;
16879
16880                   padding_bits_to_clear[*regno] |= mask;
16881                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16882                   (*regno)++;
16883                 }
16884               else
16885                 {
16886                   /* Otherwise we pad the bits between the last field's end and
16887                      the start of the new field.  */
16888                   uint32_t mask;
16889
16890                   mask = ((uint32_t)-1) >> (32 - offset);
16891                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16892                   padding_bits_to_clear[*regno] |= mask;
16893                 }
16894               current_bit = offset;
16895             }
16896
16897           /* Calculate further padding bits for inner structs/unions too.  */
16898           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16899             {
16900               *last_used_bit = current_bit;
16901               not_to_clear_reg_mask
16902                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16903                                                   padding_bits_to_clear, offset,
16904                                                   last_used_bit);
16905             }
16906           else
16907             {
16908               /* Update 'current_bit' with this field's size.  If the
16909                  'current_bit' lies in a subsequent register, update 'regno' and
16910                  reset 'current_bit' to point to the current bit in that new
16911                  register.  */
16912               current_bit += size;
16913               while (current_bit >= 32)
16914                 {
16915                   current_bit-=32;
16916                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16917                   (*regno)++;
16918                 }
16919               *last_used_bit = current_bit;
16920             }
16921
16922           field = TREE_CHAIN (field);
16923         }
16924       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16925     }
16926   else if (TREE_CODE (arg_type) == UNION_TYPE)
16927     {
16928       tree field, field_t;
16929       int i, regno_t, field_size;
16930       int max_reg = -1;
16931       int max_bit = -1;
16932       uint32_t mask;
16933       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16934         = {-1, -1, -1, -1};
16935
16936       /* To compute the padding bits in a union we only consider bits as
16937          padding bits if they are always either a padding bit or fall outside a
16938          fields size for all fields in the union.  */
16939       field = TYPE_FIELDS (arg_type);
16940       while (field)
16941         {
16942           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16943             = {0U, 0U, 0U, 0U};
16944           int last_used_bit_t = *last_used_bit;
16945           regno_t = *regno;
16946           field_t = TREE_TYPE (field);
16947
16948           /* If the field's type is either a record or a union make sure to
16949              compute their padding bits too.  */
16950           if (RECORD_OR_UNION_TYPE_P (field_t))
16951             not_to_clear_reg_mask
16952               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16953                                                 &padding_bits_to_clear_t[0],
16954                                                 starting_bit, &last_used_bit_t);
16955           else
16956             {
16957               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16958               regno_t = (field_size / 32) + *regno;
16959               last_used_bit_t = (starting_bit + field_size) % 32;
16960             }
16961
16962           for (i = *regno; i < regno_t; i++)
16963             {
16964               /* For all but the last register used by this field only keep the
16965                  padding bits that were padding bits in this field.  */
16966               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16967             }
16968
16969             /* For the last register, keep all padding bits that were padding
16970                bits in this field and any padding bits that are still valid
16971                as padding bits but fall outside of this field's size.  */
16972             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16973             padding_bits_to_clear_res[regno_t]
16974               &= padding_bits_to_clear_t[regno_t] | mask;
16975
16976           /* Update the maximum size of the fields in terms of registers used
16977              ('max_reg') and the 'last_used_bit' in said register.  */
16978           if (max_reg < regno_t)
16979             {
16980               max_reg = regno_t;
16981               max_bit = last_used_bit_t;
16982             }
16983           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16984             max_bit = last_used_bit_t;
16985
16986           field = TREE_CHAIN (field);
16987         }
16988
16989       /* Update the current padding_bits_to_clear using the intersection of the
16990          padding bits of all the fields.  */
16991       for (i=*regno; i < max_reg; i++)
16992         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16993
16994       /* Do not keep trailing padding bits, we do not know yet whether this
16995          is the end of the argument.  */
16996       mask = ((uint32_t) 1 << max_bit) - 1;
16997       padding_bits_to_clear[max_reg]
16998         |= padding_bits_to_clear_res[max_reg] & mask;
16999
17000       *regno = max_reg;
17001       *last_used_bit = max_bit;
17002     }
17003   else
17004     /* This function should only be used for structs and unions.  */
17005     gcc_unreachable ();
17006
17007   return not_to_clear_reg_mask;
17008 }
17009
17010 /* In the context of ARMv8-M Security Extensions, this function is used for both
17011    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17012    registers are used when returning or passing arguments, which is then
17013    returned as a mask.  It will also compute a mask to indicate padding/unused
17014    bits for each of these registers, and passes this through the
17015    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17016    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17017    the starting register used to pass this argument or return value is passed
17018    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17019    for struct and union types.  */
17020
17021 static unsigned HOST_WIDE_INT
17022 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17023                              uint32_t * padding_bits_to_clear)
17024
17025 {
17026   int last_used_bit = 0;
17027   unsigned HOST_WIDE_INT not_to_clear_mask;
17028
17029   if (RECORD_OR_UNION_TYPE_P (arg_type))
17030     {
17031       not_to_clear_mask
17032         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17033                                          padding_bits_to_clear, 0,
17034                                          &last_used_bit);
17035
17036
17037       /* If the 'last_used_bit' is not zero, that means we are still using a
17038          part of the last 'regno'.  In such cases we must clear the trailing
17039          bits.  Otherwise we are not using regno and we should mark it as to
17040          clear.  */
17041       if (last_used_bit != 0)
17042         padding_bits_to_clear[regno]
17043           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17044       else
17045         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17046     }
17047   else
17048     {
17049       not_to_clear_mask = 0;
17050       /* We are not dealing with structs nor unions.  So these arguments may be
17051          passed in floating point registers too.  In some cases a BLKmode is
17052          used when returning or passing arguments in multiple VFP registers.  */
17053       if (GET_MODE (arg_rtx) == BLKmode)
17054         {
17055           int i, arg_regs;
17056           rtx reg;
17057
17058           /* This should really only occur when dealing with the hard-float
17059              ABI.  */
17060           gcc_assert (TARGET_HARD_FLOAT_ABI);
17061
17062           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17063             {
17064               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17065               gcc_assert (REG_P (reg));
17066
17067               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17068
17069               /* If we are dealing with DF mode, make sure we don't
17070                  clear either of the registers it addresses.  */
17071               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17072               if (arg_regs > 1)
17073                 {
17074                   unsigned HOST_WIDE_INT mask;
17075                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17076                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17077                   not_to_clear_mask |= mask;
17078                 }
17079             }
17080         }
17081       else
17082         {
17083           /* Otherwise we can rely on the MODE to determine how many registers
17084              are being used by this argument.  */
17085           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17086           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17087           if (arg_regs > 1)
17088             {
17089               unsigned HOST_WIDE_INT
17090               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17091               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17092               not_to_clear_mask |= mask;
17093             }
17094         }
17095     }
17096
17097   return not_to_clear_mask;
17098 }
17099
17100 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17101    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17102    are to be fully cleared, using the value in register CLEARING_REG if more
17103    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17104    the bits that needs to be cleared in caller-saved core registers, with
17105    SCRATCH_REG used as a scratch register for that clearing.
17106
17107    NOTE: one of three following assertions must hold:
17108    - SCRATCH_REG is a low register
17109    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17110      in TO_CLEAR_BITMAP)
17111    - CLEARING_REG is a low register.  */
17112
17113 static void
17114 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17115                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17116 {
17117   bool saved_clearing = false;
17118   rtx saved_clearing_reg = NULL_RTX;
17119   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17120
17121   gcc_assert (arm_arch_cmse);
17122
17123   if (!bitmap_empty_p (to_clear_bitmap))
17124     {
17125       minregno = bitmap_first_set_bit (to_clear_bitmap);
17126       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17127     }
17128   clearing_regno = REGNO (clearing_reg);
17129
17130   /* Clear padding bits.  */
17131   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17132   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17133     {
17134       uint64_t mask;
17135       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17136
17137       if (padding_bits_to_clear[i] == 0)
17138         continue;
17139
17140       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17141          CLEARING_REG as scratch.  */
17142       if (TARGET_THUMB1
17143           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17144         {
17145           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17146              such that we can use clearing_reg to clear the unused bits in the
17147              arguments.  */
17148           if ((clearing_regno > maxregno
17149                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17150               && !saved_clearing)
17151             {
17152               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17153               emit_move_insn (scratch_reg, clearing_reg);
17154               saved_clearing = true;
17155               saved_clearing_reg = scratch_reg;
17156             }
17157           scratch_reg = clearing_reg;
17158         }
17159
17160       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17161       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17162       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17163
17164       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17165       mask = (~padding_bits_to_clear[i]) >> 16;
17166       rtx16 = gen_int_mode (16, SImode);
17167       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17168       if (mask)
17169         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17170
17171       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17172     }
17173   if (saved_clearing)
17174     emit_move_insn (clearing_reg, saved_clearing_reg);
17175
17176
17177   /* Clear full registers.  */
17178
17179   /* If not marked for clearing, clearing_reg already does not contain
17180      any secret.  */
17181   if (clearing_regno <= maxregno
17182       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17183     {
17184       emit_move_insn (clearing_reg, const0_rtx);
17185       emit_use (clearing_reg);
17186       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17187     }
17188
17189   for (regno = minregno; regno <= maxregno; regno++)
17190     {
17191       if (!bitmap_bit_p (to_clear_bitmap, regno))
17192         continue;
17193
17194       if (IS_VFP_REGNUM (regno))
17195         {
17196           /* If regno is an even vfp register and its successor is also to
17197              be cleared, use vmov.  */
17198           if (TARGET_VFP_DOUBLE
17199               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17200               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17201             {
17202               emit_move_insn (gen_rtx_REG (DFmode, regno),
17203                               CONST1_RTX (DFmode));
17204               emit_use (gen_rtx_REG (DFmode, regno));
17205               regno++;
17206             }
17207           else
17208             {
17209               emit_move_insn (gen_rtx_REG (SFmode, regno),
17210                               CONST1_RTX (SFmode));
17211               emit_use (gen_rtx_REG (SFmode, regno));
17212             }
17213         }
17214       else
17215         {
17216           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17217           emit_use (gen_rtx_REG (SImode, regno));
17218         }
17219     }
17220 }
17221
17222 /* Clears caller saved registers not used to pass arguments before a
17223    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17224    registers is done in __gnu_cmse_nonsecure_call libcall.
17225    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17226
17227 static void
17228 cmse_nonsecure_call_clear_caller_saved (void)
17229 {
17230   basic_block bb;
17231
17232   FOR_EACH_BB_FN (bb, cfun)
17233     {
17234       rtx_insn *insn;
17235
17236       FOR_BB_INSNS (bb, insn)
17237         {
17238           unsigned address_regnum, regno, maxregno =
17239             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17240           auto_sbitmap to_clear_bitmap (maxregno + 1);
17241           rtx_insn *seq;
17242           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17243           rtx address;
17244           CUMULATIVE_ARGS args_so_far_v;
17245           cumulative_args_t args_so_far;
17246           tree arg_type, fntype;
17247           bool first_param = true;
17248           function_args_iterator args_iter;
17249           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17250
17251           if (!NONDEBUG_INSN_P (insn))
17252             continue;
17253
17254           if (!CALL_P (insn))
17255             continue;
17256
17257           pat = PATTERN (insn);
17258           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17259           call = XVECEXP (pat, 0, 0);
17260
17261           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17262           if (GET_CODE (call) == SET)
17263               call = SET_SRC (call);
17264
17265           /* Check if it is a cmse_nonsecure_call.  */
17266           unspec = XEXP (call, 0);
17267           if (GET_CODE (unspec) != UNSPEC
17268               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17269             continue;
17270
17271           /* Determine the caller-saved registers we need to clear.  */
17272           bitmap_clear (to_clear_bitmap);
17273           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17274
17275           /* Only look at the caller-saved floating point registers in case of
17276              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17277              lazy store and loads which clear both caller- and callee-saved
17278              registers.  */
17279           if (TARGET_HARD_FLOAT_ABI)
17280             {
17281               auto_sbitmap float_bitmap (maxregno + 1);
17282
17283               bitmap_clear (float_bitmap);
17284               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17285                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17286               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17287             }
17288
17289           /* Make sure the register used to hold the function address is not
17290              cleared.  */
17291           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17292           gcc_assert (MEM_P (address));
17293           gcc_assert (REG_P (XEXP (address, 0)));
17294           address_regnum = REGNO (XEXP (address, 0));
17295           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17296             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17297
17298           /* Set basic block of call insn so that df rescan is performed on
17299              insns inserted here.  */
17300           set_block_for_insn (insn, bb);
17301           df_set_flags (DF_DEFER_INSN_RESCAN);
17302           start_sequence ();
17303
17304           /* Make sure the scheduler doesn't schedule other insns beyond
17305              here.  */
17306           emit_insn (gen_blockage ());
17307
17308           /* Walk through all arguments and clear registers appropriately.
17309           */
17310           fntype = TREE_TYPE (MEM_EXPR (address));
17311           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17312                                     NULL_TREE);
17313           args_so_far = pack_cumulative_args (&args_so_far_v);
17314           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17315             {
17316               rtx arg_rtx;
17317               uint64_t to_clear_args_mask;
17318               machine_mode arg_mode = TYPE_MODE (arg_type);
17319
17320               if (VOID_TYPE_P (arg_type))
17321                 continue;
17322
17323               if (!first_param)
17324                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17325                                           true);
17326
17327               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17328                                           true);
17329               gcc_assert (REG_P (arg_rtx));
17330               to_clear_args_mask
17331                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17332                                              REGNO (arg_rtx),
17333                                              &padding_bits_to_clear[0]);
17334               if (to_clear_args_mask)
17335                 {
17336                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17337                     {
17338                       if (to_clear_args_mask & (1ULL << regno))
17339                         bitmap_clear_bit (to_clear_bitmap, regno);
17340                     }
17341                 }
17342
17343               first_param = false;
17344             }
17345
17346           /* We use right shift and left shift to clear the LSB of the address
17347              we jump to instead of using bic, to avoid having to use an extra
17348              register on Thumb-1.  */
17349           clearing_reg = XEXP (address, 0);
17350           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17351           emit_insn (gen_rtx_SET (clearing_reg, shift));
17352           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17353           emit_insn (gen_rtx_SET (clearing_reg, shift));
17354
17355           /* Clear caller-saved registers that leak before doing a non-secure
17356              call.  */
17357           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17358           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17359                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17360
17361           seq = get_insns ();
17362           end_sequence ();
17363           emit_insn_before (seq, insn);
17364         }
17365     }
17366 }
17367
17368 /* Rewrite move insn into subtract of 0 if the condition codes will
17369    be useful in next conditional jump insn.  */
17370
17371 static void
17372 thumb1_reorg (void)
17373 {
17374   basic_block bb;
17375
17376   FOR_EACH_BB_FN (bb, cfun)
17377     {
17378       rtx dest, src;
17379       rtx cmp, op0, op1, set = NULL;
17380       rtx_insn *prev, *insn = BB_END (bb);
17381       bool insn_clobbered = false;
17382
17383       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17384         insn = PREV_INSN (insn);
17385
17386       /* Find the last cbranchsi4_insn in basic block BB.  */
17387       if (insn == BB_HEAD (bb)
17388           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17389         continue;
17390
17391       /* Get the register with which we are comparing.  */
17392       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17393       op0 = XEXP (cmp, 0);
17394       op1 = XEXP (cmp, 1);
17395
17396       /* Check that comparison is against ZERO.  */
17397       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17398         continue;
17399
17400       /* Find the first flag setting insn before INSN in basic block BB.  */
17401       gcc_assert (insn != BB_HEAD (bb));
17402       for (prev = PREV_INSN (insn);
17403            (!insn_clobbered
17404             && prev != BB_HEAD (bb)
17405             && (NOTE_P (prev)
17406                 || DEBUG_INSN_P (prev)
17407                 || ((set = single_set (prev)) != NULL
17408                     && get_attr_conds (prev) == CONDS_NOCOND)));
17409            prev = PREV_INSN (prev))
17410         {
17411           if (reg_set_p (op0, prev))
17412             insn_clobbered = true;
17413         }
17414
17415       /* Skip if op0 is clobbered by insn other than prev. */
17416       if (insn_clobbered)
17417         continue;
17418
17419       if (!set)
17420         continue;
17421
17422       dest = SET_DEST (set);
17423       src = SET_SRC (set);
17424       if (!low_register_operand (dest, SImode)
17425           || !low_register_operand (src, SImode))
17426         continue;
17427
17428       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17429          in INSN.  Both src and dest of the move insn are checked.  */
17430       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17431         {
17432           dest = copy_rtx (dest);
17433           src = copy_rtx (src);
17434           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17435           PATTERN (prev) = gen_rtx_SET (dest, src);
17436           INSN_CODE (prev) = -1;
17437           /* Set test register in INSN to dest.  */
17438           XEXP (cmp, 0) = copy_rtx (dest);
17439           INSN_CODE (insn) = -1;
17440         }
17441     }
17442 }
17443
17444 /* Convert instructions to their cc-clobbering variant if possible, since
17445    that allows us to use smaller encodings.  */
17446
17447 static void
17448 thumb2_reorg (void)
17449 {
17450   basic_block bb;
17451   regset_head live;
17452
17453   INIT_REG_SET (&live);
17454
17455   /* We are freeing block_for_insn in the toplev to keep compatibility
17456      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17457   compute_bb_for_insn ();
17458   df_analyze ();
17459
17460   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17461
17462   FOR_EACH_BB_FN (bb, cfun)
17463     {
17464       if ((current_tune->disparage_flag_setting_t16_encodings
17465            == tune_params::DISPARAGE_FLAGS_ALL)
17466           && optimize_bb_for_speed_p (bb))
17467         continue;
17468
17469       rtx_insn *insn;
17470       Convert_Action action = SKIP;
17471       Convert_Action action_for_partial_flag_setting
17472         = ((current_tune->disparage_flag_setting_t16_encodings
17473             != tune_params::DISPARAGE_FLAGS_NEITHER)
17474            && optimize_bb_for_speed_p (bb))
17475           ? SKIP : CONV;
17476
17477       COPY_REG_SET (&live, DF_LR_OUT (bb));
17478       df_simulate_initialize_backwards (bb, &live);
17479       FOR_BB_INSNS_REVERSE (bb, insn)
17480         {
17481           if (NONJUMP_INSN_P (insn)
17482               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17483               && GET_CODE (PATTERN (insn)) == SET)
17484             {
17485               action = SKIP;
17486               rtx pat = PATTERN (insn);
17487               rtx dst = XEXP (pat, 0);
17488               rtx src = XEXP (pat, 1);
17489               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17490
17491               if (UNARY_P (src) || BINARY_P (src))
17492                   op0 = XEXP (src, 0);
17493
17494               if (BINARY_P (src))
17495                   op1 = XEXP (src, 1);
17496
17497               if (low_register_operand (dst, SImode))
17498                 {
17499                   switch (GET_CODE (src))
17500                     {
17501                     case PLUS:
17502                       /* Adding two registers and storing the result
17503                          in the first source is already a 16-bit
17504                          operation.  */
17505                       if (rtx_equal_p (dst, op0)
17506                           && register_operand (op1, SImode))
17507                         break;
17508
17509                       if (low_register_operand (op0, SImode))
17510                         {
17511                           /* ADDS <Rd>,<Rn>,<Rm>  */
17512                           if (low_register_operand (op1, SImode))
17513                             action = CONV;
17514                           /* ADDS <Rdn>,#<imm8>  */
17515                           /* SUBS <Rdn>,#<imm8>  */
17516                           else if (rtx_equal_p (dst, op0)
17517                                    && CONST_INT_P (op1)
17518                                    && IN_RANGE (INTVAL (op1), -255, 255))
17519                             action = CONV;
17520                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17521                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17522                           else if (CONST_INT_P (op1)
17523                                    && IN_RANGE (INTVAL (op1), -7, 7))
17524                             action = CONV;
17525                         }
17526                       /* ADCS <Rd>, <Rn>  */
17527                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17528                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17529                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17530                                                        SImode)
17531                               && COMPARISON_P (op1)
17532                               && cc_register (XEXP (op1, 0), VOIDmode)
17533                               && maybe_get_arm_condition_code (op1) == ARM_CS
17534                               && XEXP (op1, 1) == const0_rtx)
17535                         action = CONV;
17536                       break;
17537
17538                     case MINUS:
17539                       /* RSBS <Rd>,<Rn>,#0
17540                          Not handled here: see NEG below.  */
17541                       /* SUBS <Rd>,<Rn>,#<imm3>
17542                          SUBS <Rdn>,#<imm8>
17543                          Not handled here: see PLUS above.  */
17544                       /* SUBS <Rd>,<Rn>,<Rm>  */
17545                       if (low_register_operand (op0, SImode)
17546                           && low_register_operand (op1, SImode))
17547                             action = CONV;
17548                       break;
17549
17550                     case MULT:
17551                       /* MULS <Rdm>,<Rn>,<Rdm>
17552                          As an exception to the rule, this is only used
17553                          when optimizing for size since MULS is slow on all
17554                          known implementations.  We do not even want to use
17555                          MULS in cold code, if optimizing for speed, so we
17556                          test the global flag here.  */
17557                       if (!optimize_size)
17558                         break;
17559                       /* Fall through.  */
17560                     case AND:
17561                     case IOR:
17562                     case XOR:
17563                       /* ANDS <Rdn>,<Rm>  */
17564                       if (rtx_equal_p (dst, op0)
17565                           && low_register_operand (op1, SImode))
17566                         action = action_for_partial_flag_setting;
17567                       else if (rtx_equal_p (dst, op1)
17568                                && low_register_operand (op0, SImode))
17569                         action = action_for_partial_flag_setting == SKIP
17570                                  ? SKIP : SWAP_CONV;
17571                       break;
17572
17573                     case ASHIFTRT:
17574                     case ASHIFT:
17575                     case LSHIFTRT:
17576                       /* ASRS <Rdn>,<Rm> */
17577                       /* LSRS <Rdn>,<Rm> */
17578                       /* LSLS <Rdn>,<Rm> */
17579                       if (rtx_equal_p (dst, op0)
17580                           && low_register_operand (op1, SImode))
17581                         action = action_for_partial_flag_setting;
17582                       /* ASRS <Rd>,<Rm>,#<imm5> */
17583                       /* LSRS <Rd>,<Rm>,#<imm5> */
17584                       /* LSLS <Rd>,<Rm>,#<imm5> */
17585                       else if (low_register_operand (op0, SImode)
17586                                && CONST_INT_P (op1)
17587                                && IN_RANGE (INTVAL (op1), 0, 31))
17588                         action = action_for_partial_flag_setting;
17589                       break;
17590
17591                     case ROTATERT:
17592                       /* RORS <Rdn>,<Rm>  */
17593                       if (rtx_equal_p (dst, op0)
17594                           && low_register_operand (op1, SImode))
17595                         action = action_for_partial_flag_setting;
17596                       break;
17597
17598                     case NOT:
17599                       /* MVNS <Rd>,<Rm>  */
17600                       if (low_register_operand (op0, SImode))
17601                         action = action_for_partial_flag_setting;
17602                       break;
17603
17604                     case NEG:
17605                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17606                       if (low_register_operand (op0, SImode))
17607                         action = CONV;
17608                       break;
17609
17610                     case CONST_INT:
17611                       /* MOVS <Rd>,#<imm8>  */
17612                       if (CONST_INT_P (src)
17613                           && IN_RANGE (INTVAL (src), 0, 255))
17614                         action = action_for_partial_flag_setting;
17615                       break;
17616
17617                     case REG:
17618                       /* MOVS and MOV<c> with registers have different
17619                          encodings, so are not relevant here.  */
17620                       break;
17621
17622                     default:
17623                       break;
17624                     }
17625                 }
17626
17627               if (action != SKIP)
17628                 {
17629                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17630                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17631                   rtvec vec;
17632
17633                   if (action == SWAP_CONV)
17634                     {
17635                       src = copy_rtx (src);
17636                       XEXP (src, 0) = op1;
17637                       XEXP (src, 1) = op0;
17638                       pat = gen_rtx_SET (dst, src);
17639                       vec = gen_rtvec (2, pat, clobber);
17640                     }
17641                   else /* action == CONV */
17642                     vec = gen_rtvec (2, pat, clobber);
17643
17644                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17645                   INSN_CODE (insn) = -1;
17646                 }
17647             }
17648
17649           if (NONDEBUG_INSN_P (insn))
17650             df_simulate_one_insn_backwards (bb, insn, &live);
17651         }
17652     }
17653
17654   CLEAR_REG_SET (&live);
17655 }
17656
17657 /* Gcc puts the pool in the wrong place for ARM, since we can only
17658    load addresses a limited distance around the pc.  We do some
17659    special munging to move the constant pool values to the correct
17660    point in the code.  */
17661 static void
17662 arm_reorg (void)
17663 {
17664   rtx_insn *insn;
17665   HOST_WIDE_INT address = 0;
17666   Mfix * fix;
17667
17668   if (use_cmse)
17669     cmse_nonsecure_call_clear_caller_saved ();
17670   if (TARGET_THUMB1)
17671     thumb1_reorg ();
17672   else if (TARGET_THUMB2)
17673     thumb2_reorg ();
17674
17675   /* Ensure all insns that must be split have been split at this point.
17676      Otherwise, the pool placement code below may compute incorrect
17677      insn lengths.  Note that when optimizing, all insns have already
17678      been split at this point.  */
17679   if (!optimize)
17680     split_all_insns_noflow ();
17681
17682   /* Make sure we do not attempt to create a literal pool even though it should
17683      no longer be necessary to create any.  */
17684   if (arm_disable_literal_pool)
17685     return ;
17686
17687   minipool_fix_head = minipool_fix_tail = NULL;
17688
17689   /* The first insn must always be a note, or the code below won't
17690      scan it properly.  */
17691   insn = get_insns ();
17692   gcc_assert (NOTE_P (insn));
17693   minipool_pad = 0;
17694
17695   /* Scan all the insns and record the operands that will need fixing.  */
17696   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17697     {
17698       if (BARRIER_P (insn))
17699         push_minipool_barrier (insn, address);
17700       else if (INSN_P (insn))
17701         {
17702           rtx_jump_table_data *table;
17703
17704           note_invalid_constants (insn, address, true);
17705           address += get_attr_length (insn);
17706
17707           /* If the insn is a vector jump, add the size of the table
17708              and skip the table.  */
17709           if (tablejump_p (insn, NULL, &table))
17710             {
17711               address += get_jump_table_size (table);
17712               insn = table;
17713             }
17714         }
17715       else if (LABEL_P (insn))
17716         /* Add the worst-case padding due to alignment.  We don't add
17717            the _current_ padding because the minipool insertions
17718            themselves might change it.  */
17719         address += get_label_padding (insn);
17720     }
17721
17722   fix = minipool_fix_head;
17723
17724   /* Now scan the fixups and perform the required changes.  */
17725   while (fix)
17726     {
17727       Mfix * ftmp;
17728       Mfix * fdel;
17729       Mfix *  last_added_fix;
17730       Mfix * last_barrier = NULL;
17731       Mfix * this_fix;
17732
17733       /* Skip any further barriers before the next fix.  */
17734       while (fix && BARRIER_P (fix->insn))
17735         fix = fix->next;
17736
17737       /* No more fixes.  */
17738       if (fix == NULL)
17739         break;
17740
17741       last_added_fix = NULL;
17742
17743       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17744         {
17745           if (BARRIER_P (ftmp->insn))
17746             {
17747               if (ftmp->address >= minipool_vector_head->max_address)
17748                 break;
17749
17750               last_barrier = ftmp;
17751             }
17752           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17753             break;
17754
17755           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17756         }
17757
17758       /* If we found a barrier, drop back to that; any fixes that we
17759          could have reached but come after the barrier will now go in
17760          the next mini-pool.  */
17761       if (last_barrier != NULL)
17762         {
17763           /* Reduce the refcount for those fixes that won't go into this
17764              pool after all.  */
17765           for (fdel = last_barrier->next;
17766                fdel && fdel != ftmp;
17767                fdel = fdel->next)
17768             {
17769               fdel->minipool->refcount--;
17770               fdel->minipool = NULL;
17771             }
17772
17773           ftmp = last_barrier;
17774         }
17775       else
17776         {
17777           /* ftmp is first fix that we can't fit into this pool and
17778              there no natural barriers that we could use.  Insert a
17779              new barrier in the code somewhere between the previous
17780              fix and this one, and arrange to jump around it.  */
17781           HOST_WIDE_INT max_address;
17782
17783           /* The last item on the list of fixes must be a barrier, so
17784              we can never run off the end of the list of fixes without
17785              last_barrier being set.  */
17786           gcc_assert (ftmp);
17787
17788           max_address = minipool_vector_head->max_address;
17789           /* Check that there isn't another fix that is in range that
17790              we couldn't fit into this pool because the pool was
17791              already too large: we need to put the pool before such an
17792              instruction.  The pool itself may come just after the
17793              fix because create_fix_barrier also allows space for a
17794              jump instruction.  */
17795           if (ftmp->address < max_address)
17796             max_address = ftmp->address + 1;
17797
17798           last_barrier = create_fix_barrier (last_added_fix, max_address);
17799         }
17800
17801       assign_minipool_offsets (last_barrier);
17802
17803       while (ftmp)
17804         {
17805           if (!BARRIER_P (ftmp->insn)
17806               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17807                   == NULL))
17808             break;
17809
17810           ftmp = ftmp->next;
17811         }
17812
17813       /* Scan over the fixes we have identified for this pool, fixing them
17814          up and adding the constants to the pool itself.  */
17815       for (this_fix = fix; this_fix && ftmp != this_fix;
17816            this_fix = this_fix->next)
17817         if (!BARRIER_P (this_fix->insn))
17818           {
17819             rtx addr
17820               = plus_constant (Pmode,
17821                                gen_rtx_LABEL_REF (VOIDmode,
17822                                                   minipool_vector_label),
17823                                this_fix->minipool->offset);
17824             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17825           }
17826
17827       dump_minipool (last_barrier->insn);
17828       fix = ftmp;
17829     }
17830
17831   /* From now on we must synthesize any constants that we can't handle
17832      directly.  This can happen if the RTL gets split during final
17833      instruction generation.  */
17834   cfun->machine->after_arm_reorg = 1;
17835
17836   /* Free the minipool memory.  */
17837   obstack_free (&minipool_obstack, minipool_startobj);
17838 }
17839 \f
17840 /* Routines to output assembly language.  */
17841
17842 /* Return string representation of passed in real value.  */
17843 static const char *
17844 fp_const_from_val (REAL_VALUE_TYPE *r)
17845 {
17846   if (!fp_consts_inited)
17847     init_fp_table ();
17848
17849   gcc_assert (real_equal (r, &value_fp0));
17850   return "0";
17851 }
17852
17853 /* OPERANDS[0] is the entire list of insns that constitute pop,
17854    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17855    is in the list, UPDATE is true iff the list contains explicit
17856    update of base register.  */
17857 void
17858 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17859                          bool update)
17860 {
17861   int i;
17862   char pattern[100];
17863   int offset;
17864   const char *conditional;
17865   int num_saves = XVECLEN (operands[0], 0);
17866   unsigned int regno;
17867   unsigned int regno_base = REGNO (operands[1]);
17868   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17869
17870   offset = 0;
17871   offset += update ? 1 : 0;
17872   offset += return_pc ? 1 : 0;
17873
17874   /* Is the base register in the list?  */
17875   for (i = offset; i < num_saves; i++)
17876     {
17877       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17878       /* If SP is in the list, then the base register must be SP.  */
17879       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17880       /* If base register is in the list, there must be no explicit update.  */
17881       if (regno == regno_base)
17882         gcc_assert (!update);
17883     }
17884
17885   conditional = reverse ? "%?%D0" : "%?%d0";
17886   /* Can't use POP if returning from an interrupt.  */
17887   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17888     sprintf (pattern, "pop%s\t{", conditional);
17889   else
17890     {
17891       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17892          It's just a convention, their semantics are identical.  */
17893       if (regno_base == SP_REGNUM)
17894         sprintf (pattern, "ldmfd%s\t", conditional);
17895       else if (update)
17896         sprintf (pattern, "ldmia%s\t", conditional);
17897       else
17898         sprintf (pattern, "ldm%s\t", conditional);
17899
17900       strcat (pattern, reg_names[regno_base]);
17901       if (update)
17902         strcat (pattern, "!, {");
17903       else
17904         strcat (pattern, ", {");
17905     }
17906
17907   /* Output the first destination register.  */
17908   strcat (pattern,
17909           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17910
17911   /* Output the rest of the destination registers.  */
17912   for (i = offset + 1; i < num_saves; i++)
17913     {
17914       strcat (pattern, ", ");
17915       strcat (pattern,
17916               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17917     }
17918
17919   strcat (pattern, "}");
17920
17921   if (interrupt_p && return_pc)
17922     strcat (pattern, "^");
17923
17924   output_asm_insn (pattern, &cond);
17925 }
17926
17927
17928 /* Output the assembly for a store multiple.  */
17929
17930 const char *
17931 vfp_output_vstmd (rtx * operands)
17932 {
17933   char pattern[100];
17934   int p;
17935   int base;
17936   int i;
17937   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17938                    ? XEXP (operands[0], 0)
17939                    : XEXP (XEXP (operands[0], 0), 0);
17940   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17941
17942   if (push_p)
17943     strcpy (pattern, "vpush%?.64\t{%P1");
17944   else
17945     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17946
17947   p = strlen (pattern);
17948
17949   gcc_assert (REG_P (operands[1]));
17950
17951   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17952   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17953     {
17954       p += sprintf (&pattern[p], ", d%d", base + i);
17955     }
17956   strcpy (&pattern[p], "}");
17957
17958   output_asm_insn (pattern, operands);
17959   return "";
17960 }
17961
17962
17963 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17964    number of bytes pushed.  */
17965
17966 static int
17967 vfp_emit_fstmd (int base_reg, int count)
17968 {
17969   rtx par;
17970   rtx dwarf;
17971   rtx tmp, reg;
17972   int i;
17973
17974   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17975      register pairs are stored by a store multiple insn.  We avoid this
17976      by pushing an extra pair.  */
17977   if (count == 2 && !arm_arch6)
17978     {
17979       if (base_reg == LAST_VFP_REGNUM - 3)
17980         base_reg -= 2;
17981       count++;
17982     }
17983
17984   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17985      larger stores into multiple parts (up to a maximum of two, in
17986      practice).  */
17987   if (count > 16)
17988     {
17989       int saved;
17990       /* NOTE: base_reg is an internal register number, so each D register
17991          counts as 2.  */
17992       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17993       saved += vfp_emit_fstmd (base_reg, 16);
17994       return saved;
17995     }
17996
17997   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17998   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17999
18000   reg = gen_rtx_REG (DFmode, base_reg);
18001   base_reg += 2;
18002
18003   XVECEXP (par, 0, 0)
18004     = gen_rtx_SET (gen_frame_mem
18005                    (BLKmode,
18006                     gen_rtx_PRE_MODIFY (Pmode,
18007                                         stack_pointer_rtx,
18008                                         plus_constant
18009                                         (Pmode, stack_pointer_rtx,
18010                                          - (count * 8)))
18011                     ),
18012                    gen_rtx_UNSPEC (BLKmode,
18013                                    gen_rtvec (1, reg),
18014                                    UNSPEC_PUSH_MULT));
18015
18016   tmp = gen_rtx_SET (stack_pointer_rtx,
18017                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18018   RTX_FRAME_RELATED_P (tmp) = 1;
18019   XVECEXP (dwarf, 0, 0) = tmp;
18020
18021   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18022   RTX_FRAME_RELATED_P (tmp) = 1;
18023   XVECEXP (dwarf, 0, 1) = tmp;
18024
18025   for (i = 1; i < count; i++)
18026     {
18027       reg = gen_rtx_REG (DFmode, base_reg);
18028       base_reg += 2;
18029       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18030
18031       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18032                                         plus_constant (Pmode,
18033                                                        stack_pointer_rtx,
18034                                                        i * 8)),
18035                          reg);
18036       RTX_FRAME_RELATED_P (tmp) = 1;
18037       XVECEXP (dwarf, 0, i + 1) = tmp;
18038     }
18039
18040   par = emit_insn (par);
18041   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18042   RTX_FRAME_RELATED_P (par) = 1;
18043
18044   return count * 8;
18045 }
18046
18047 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18048    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18049
18050 bool
18051 detect_cmse_nonsecure_call (tree addr)
18052 {
18053   if (!addr)
18054     return FALSE;
18055
18056   tree fntype = TREE_TYPE (addr);
18057   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18058                                     TYPE_ATTRIBUTES (fntype)))
18059     return TRUE;
18060   return FALSE;
18061 }
18062
18063
18064 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18065    the call target.  */
18066
18067 void
18068 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18069 {
18070   rtx insn;
18071
18072   insn = emit_call_insn (pat);
18073
18074   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18075      If the call might use such an entry, add a use of the PIC register
18076      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18077   if (TARGET_VXWORKS_RTP
18078       && flag_pic
18079       && !sibcall
18080       && GET_CODE (addr) == SYMBOL_REF
18081       && (SYMBOL_REF_DECL (addr)
18082           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18083           : !SYMBOL_REF_LOCAL_P (addr)))
18084     {
18085       require_pic_register ();
18086       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18087     }
18088
18089   if (TARGET_AAPCS_BASED)
18090     {
18091       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18092          linker.  We need to add an IP clobber to allow setting
18093          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18094          is not needed since it's a fixed register.  */
18095       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18096       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18097     }
18098 }
18099
18100 /* Output a 'call' insn.  */
18101 const char *
18102 output_call (rtx *operands)
18103 {
18104   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
18105
18106   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18107   if (REGNO (operands[0]) == LR_REGNUM)
18108     {
18109       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18110       output_asm_insn ("mov%?\t%0, %|lr", operands);
18111     }
18112
18113   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18114
18115   if (TARGET_INTERWORK || arm_arch4t)
18116     output_asm_insn ("bx%?\t%0", operands);
18117   else
18118     output_asm_insn ("mov%?\t%|pc, %0", operands);
18119
18120   return "";
18121 }
18122
18123 /* Output a move from arm registers to arm registers of a long double
18124    OPERANDS[0] is the destination.
18125    OPERANDS[1] is the source.  */
18126 const char *
18127 output_mov_long_double_arm_from_arm (rtx *operands)
18128 {
18129   /* We have to be careful here because the two might overlap.  */
18130   int dest_start = REGNO (operands[0]);
18131   int src_start = REGNO (operands[1]);
18132   rtx ops[2];
18133   int i;
18134
18135   if (dest_start < src_start)
18136     {
18137       for (i = 0; i < 3; i++)
18138         {
18139           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18140           ops[1] = gen_rtx_REG (SImode, src_start + i);
18141           output_asm_insn ("mov%?\t%0, %1", ops);
18142         }
18143     }
18144   else
18145     {
18146       for (i = 2; i >= 0; i--)
18147         {
18148           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18149           ops[1] = gen_rtx_REG (SImode, src_start + i);
18150           output_asm_insn ("mov%?\t%0, %1", ops);
18151         }
18152     }
18153
18154   return "";
18155 }
18156
18157 void
18158 arm_emit_movpair (rtx dest, rtx src)
18159  {
18160   /* If the src is an immediate, simplify it.  */
18161   if (CONST_INT_P (src))
18162     {
18163       HOST_WIDE_INT val = INTVAL (src);
18164       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18165       if ((val >> 16) & 0x0000ffff)
18166         {
18167           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18168                                                GEN_INT (16)),
18169                          GEN_INT ((val >> 16) & 0x0000ffff));
18170           rtx_insn *insn = get_last_insn ();
18171           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18172         }
18173       return;
18174     }
18175    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18176    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18177    rtx_insn *insn = get_last_insn ();
18178    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18179  }
18180
18181 /* Output a move between double words.  It must be REG<-MEM
18182    or MEM<-REG.  */
18183 const char *
18184 output_move_double (rtx *operands, bool emit, int *count)
18185 {
18186   enum rtx_code code0 = GET_CODE (operands[0]);
18187   enum rtx_code code1 = GET_CODE (operands[1]);
18188   rtx otherops[3];
18189   if (count)
18190     *count = 1;
18191
18192   /* The only case when this might happen is when
18193      you are looking at the length of a DImode instruction
18194      that has an invalid constant in it.  */
18195   if (code0 == REG && code1 != MEM)
18196     {
18197       gcc_assert (!emit);
18198       *count = 2;
18199       return "";
18200     }
18201
18202   if (code0 == REG)
18203     {
18204       unsigned int reg0 = REGNO (operands[0]);
18205
18206       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18207
18208       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18209
18210       switch (GET_CODE (XEXP (operands[1], 0)))
18211         {
18212         case REG:
18213
18214           if (emit)
18215             {
18216               if (TARGET_LDRD
18217                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18218                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18219               else
18220                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18221             }
18222           break;
18223
18224         case PRE_INC:
18225           gcc_assert (TARGET_LDRD);
18226           if (emit)
18227             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18228           break;
18229
18230         case PRE_DEC:
18231           if (emit)
18232             {
18233               if (TARGET_LDRD)
18234                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18235               else
18236                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18237             }
18238           break;
18239
18240         case POST_INC:
18241           if (emit)
18242             {
18243               if (TARGET_LDRD)
18244                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18245               else
18246                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18247             }
18248           break;
18249
18250         case POST_DEC:
18251           gcc_assert (TARGET_LDRD);
18252           if (emit)
18253             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18254           break;
18255
18256         case PRE_MODIFY:
18257         case POST_MODIFY:
18258           /* Autoicrement addressing modes should never have overlapping
18259              base and destination registers, and overlapping index registers
18260              are already prohibited, so this doesn't need to worry about
18261              fix_cm3_ldrd.  */
18262           otherops[0] = operands[0];
18263           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18264           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18265
18266           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18267             {
18268               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18269                 {
18270                   /* Registers overlap so split out the increment.  */
18271                   if (emit)
18272                     {
18273                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18274                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18275                     }
18276                   if (count)
18277                     *count = 2;
18278                 }
18279               else
18280                 {
18281                   /* Use a single insn if we can.
18282                      FIXME: IWMMXT allows offsets larger than ldrd can
18283                      handle, fix these up with a pair of ldr.  */
18284                   if (TARGET_THUMB2
18285                       || !CONST_INT_P (otherops[2])
18286                       || (INTVAL (otherops[2]) > -256
18287                           && INTVAL (otherops[2]) < 256))
18288                     {
18289                       if (emit)
18290                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18291                     }
18292                   else
18293                     {
18294                       if (emit)
18295                         {
18296                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18297                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18298                         }
18299                       if (count)
18300                         *count = 2;
18301
18302                     }
18303                 }
18304             }
18305           else
18306             {
18307               /* Use a single insn if we can.
18308                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18309                  fix these up with a pair of ldr.  */
18310               if (TARGET_THUMB2
18311                   || !CONST_INT_P (otherops[2])
18312                   || (INTVAL (otherops[2]) > -256
18313                       && INTVAL (otherops[2]) < 256))
18314                 {
18315                   if (emit)
18316                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18317                 }
18318               else
18319                 {
18320                   if (emit)
18321                     {
18322                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18323                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18324                     }
18325                   if (count)
18326                     *count = 2;
18327                 }
18328             }
18329           break;
18330
18331         case LABEL_REF:
18332         case CONST:
18333           /* We might be able to use ldrd %0, %1 here.  However the range is
18334              different to ldr/adr, and it is broken on some ARMv7-M
18335              implementations.  */
18336           /* Use the second register of the pair to avoid problematic
18337              overlap.  */
18338           otherops[1] = operands[1];
18339           if (emit)
18340             output_asm_insn ("adr%?\t%0, %1", otherops);
18341           operands[1] = otherops[0];
18342           if (emit)
18343             {
18344               if (TARGET_LDRD)
18345                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18346               else
18347                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18348             }
18349
18350           if (count)
18351             *count = 2;
18352           break;
18353
18354           /* ??? This needs checking for thumb2.  */
18355         default:
18356           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18357                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18358             {
18359               otherops[0] = operands[0];
18360               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18361               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18362
18363               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18364                 {
18365                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18366                     {
18367                       switch ((int) INTVAL (otherops[2]))
18368                         {
18369                         case -8:
18370                           if (emit)
18371                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18372                           return "";
18373                         case -4:
18374                           if (TARGET_THUMB2)
18375                             break;
18376                           if (emit)
18377                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18378                           return "";
18379                         case 4:
18380                           if (TARGET_THUMB2)
18381                             break;
18382                           if (emit)
18383                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18384                           return "";
18385                         }
18386                     }
18387                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18388                   operands[1] = otherops[0];
18389                   if (TARGET_LDRD
18390                       && (REG_P (otherops[2])
18391                           || TARGET_THUMB2
18392                           || (CONST_INT_P (otherops[2])
18393                               && INTVAL (otherops[2]) > -256
18394                               && INTVAL (otherops[2]) < 256)))
18395                     {
18396                       if (reg_overlap_mentioned_p (operands[0],
18397                                                    otherops[2]))
18398                         {
18399                           /* Swap base and index registers over to
18400                              avoid a conflict.  */
18401                           std::swap (otherops[1], otherops[2]);
18402                         }
18403                       /* If both registers conflict, it will usually
18404                          have been fixed by a splitter.  */
18405                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18406                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18407                         {
18408                           if (emit)
18409                             {
18410                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18411                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18412                             }
18413                           if (count)
18414                             *count = 2;
18415                         }
18416                       else
18417                         {
18418                           otherops[0] = operands[0];
18419                           if (emit)
18420                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18421                         }
18422                       return "";
18423                     }
18424
18425                   if (CONST_INT_P (otherops[2]))
18426                     {
18427                       if (emit)
18428                         {
18429                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18430                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18431                           else
18432                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18433                         }
18434                     }
18435                   else
18436                     {
18437                       if (emit)
18438                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18439                     }
18440                 }
18441               else
18442                 {
18443                   if (emit)
18444                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18445                 }
18446
18447               if (count)
18448                 *count = 2;
18449
18450               if (TARGET_LDRD)
18451                 return "ldrd%?\t%0, [%1]";
18452
18453               return "ldmia%?\t%1, %M0";
18454             }
18455           else
18456             {
18457               otherops[1] = adjust_address (operands[1], SImode, 4);
18458               /* Take care of overlapping base/data reg.  */
18459               if (reg_mentioned_p (operands[0], operands[1]))
18460                 {
18461                   if (emit)
18462                     {
18463                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18464                       output_asm_insn ("ldr%?\t%0, %1", operands);
18465                     }
18466                   if (count)
18467                     *count = 2;
18468
18469                 }
18470               else
18471                 {
18472                   if (emit)
18473                     {
18474                       output_asm_insn ("ldr%?\t%0, %1", operands);
18475                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18476                     }
18477                   if (count)
18478                     *count = 2;
18479                 }
18480             }
18481         }
18482     }
18483   else
18484     {
18485       /* Constraints should ensure this.  */
18486       gcc_assert (code0 == MEM && code1 == REG);
18487       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18488                   || (TARGET_ARM && TARGET_LDRD));
18489
18490       switch (GET_CODE (XEXP (operands[0], 0)))
18491         {
18492         case REG:
18493           if (emit)
18494             {
18495               if (TARGET_LDRD)
18496                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18497               else
18498                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18499             }
18500           break;
18501
18502         case PRE_INC:
18503           gcc_assert (TARGET_LDRD);
18504           if (emit)
18505             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18506           break;
18507
18508         case PRE_DEC:
18509           if (emit)
18510             {
18511               if (TARGET_LDRD)
18512                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18513               else
18514                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18515             }
18516           break;
18517
18518         case POST_INC:
18519           if (emit)
18520             {
18521               if (TARGET_LDRD)
18522                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18523               else
18524                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18525             }
18526           break;
18527
18528         case POST_DEC:
18529           gcc_assert (TARGET_LDRD);
18530           if (emit)
18531             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18532           break;
18533
18534         case PRE_MODIFY:
18535         case POST_MODIFY:
18536           otherops[0] = operands[1];
18537           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18538           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18539
18540           /* IWMMXT allows offsets larger than ldrd can handle,
18541              fix these up with a pair of ldr.  */
18542           if (!TARGET_THUMB2
18543               && CONST_INT_P (otherops[2])
18544               && (INTVAL(otherops[2]) <= -256
18545                   || INTVAL(otherops[2]) >= 256))
18546             {
18547               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18548                 {
18549                   if (emit)
18550                     {
18551                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18552                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18553                     }
18554                   if (count)
18555                     *count = 2;
18556                 }
18557               else
18558                 {
18559                   if (emit)
18560                     {
18561                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18562                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18563                     }
18564                   if (count)
18565                     *count = 2;
18566                 }
18567             }
18568           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18569             {
18570               if (emit)
18571                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18572             }
18573           else
18574             {
18575               if (emit)
18576                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18577             }
18578           break;
18579
18580         case PLUS:
18581           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18582           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18583             {
18584               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18585                 {
18586                 case -8:
18587                   if (emit)
18588                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18589                   return "";
18590
18591                 case -4:
18592                   if (TARGET_THUMB2)
18593                     break;
18594                   if (emit)
18595                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18596                   return "";
18597
18598                 case 4:
18599                   if (TARGET_THUMB2)
18600                     break;
18601                   if (emit)
18602                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18603                   return "";
18604                 }
18605             }
18606           if (TARGET_LDRD
18607               && (REG_P (otherops[2])
18608                   || TARGET_THUMB2
18609                   || (CONST_INT_P (otherops[2])
18610                       && INTVAL (otherops[2]) > -256
18611                       && INTVAL (otherops[2]) < 256)))
18612             {
18613               otherops[0] = operands[1];
18614               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18615               if (emit)
18616                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18617               return "";
18618             }
18619           /* Fall through */
18620
18621         default:
18622           otherops[0] = adjust_address (operands[0], SImode, 4);
18623           otherops[1] = operands[1];
18624           if (emit)
18625             {
18626               output_asm_insn ("str%?\t%1, %0", operands);
18627               output_asm_insn ("str%?\t%H1, %0", otherops);
18628             }
18629           if (count)
18630             *count = 2;
18631         }
18632     }
18633
18634   return "";
18635 }
18636
18637 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18638    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18639
18640 const char *
18641 output_move_quad (rtx *operands)
18642 {
18643   if (REG_P (operands[0]))
18644     {
18645       /* Load, or reg->reg move.  */
18646
18647       if (MEM_P (operands[1]))
18648         {
18649           switch (GET_CODE (XEXP (operands[1], 0)))
18650             {
18651             case REG:
18652               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18653               break;
18654
18655             case LABEL_REF:
18656             case CONST:
18657               output_asm_insn ("adr%?\t%0, %1", operands);
18658               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18659               break;
18660
18661             default:
18662               gcc_unreachable ();
18663             }
18664         }
18665       else
18666         {
18667           rtx ops[2];
18668           int dest, src, i;
18669
18670           gcc_assert (REG_P (operands[1]));
18671
18672           dest = REGNO (operands[0]);
18673           src = REGNO (operands[1]);
18674
18675           /* This seems pretty dumb, but hopefully GCC won't try to do it
18676              very often.  */
18677           if (dest < src)
18678             for (i = 0; i < 4; i++)
18679               {
18680                 ops[0] = gen_rtx_REG (SImode, dest + i);
18681                 ops[1] = gen_rtx_REG (SImode, src + i);
18682                 output_asm_insn ("mov%?\t%0, %1", ops);
18683               }
18684           else
18685             for (i = 3; i >= 0; i--)
18686               {
18687                 ops[0] = gen_rtx_REG (SImode, dest + i);
18688                 ops[1] = gen_rtx_REG (SImode, src + i);
18689                 output_asm_insn ("mov%?\t%0, %1", ops);
18690               }
18691         }
18692     }
18693   else
18694     {
18695       gcc_assert (MEM_P (operands[0]));
18696       gcc_assert (REG_P (operands[1]));
18697       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18698
18699       switch (GET_CODE (XEXP (operands[0], 0)))
18700         {
18701         case REG:
18702           output_asm_insn ("stm%?\t%m0, %M1", operands);
18703           break;
18704
18705         default:
18706           gcc_unreachable ();
18707         }
18708     }
18709
18710   return "";
18711 }
18712
18713 /* Output a VFP load or store instruction.  */
18714
18715 const char *
18716 output_move_vfp (rtx *operands)
18717 {
18718   rtx reg, mem, addr, ops[2];
18719   int load = REG_P (operands[0]);
18720   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18721   int sp = (!TARGET_VFP_FP16INST
18722             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18723   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18724   const char *templ;
18725   char buff[50];
18726   machine_mode mode;
18727
18728   reg = operands[!load];
18729   mem = operands[load];
18730
18731   mode = GET_MODE (reg);
18732
18733   gcc_assert (REG_P (reg));
18734   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18735   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18736               || mode == SFmode
18737               || mode == DFmode
18738               || mode == HImode
18739               || mode == SImode
18740               || mode == DImode
18741               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18742   gcc_assert (MEM_P (mem));
18743
18744   addr = XEXP (mem, 0);
18745
18746   switch (GET_CODE (addr))
18747     {
18748     case PRE_DEC:
18749       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18750       ops[0] = XEXP (addr, 0);
18751       ops[1] = reg;
18752       break;
18753
18754     case POST_INC:
18755       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18756       ops[0] = XEXP (addr, 0);
18757       ops[1] = reg;
18758       break;
18759
18760     default:
18761       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18762       ops[0] = reg;
18763       ops[1] = mem;
18764       break;
18765     }
18766
18767   sprintf (buff, templ,
18768            load ? "ld" : "st",
18769            dp ? "64" : sp ? "32" : "16",
18770            dp ? "P" : "",
18771            integer_p ? "\t%@ int" : "");
18772   output_asm_insn (buff, ops);
18773
18774   return "";
18775 }
18776
18777 /* Output a Neon double-word or quad-word load or store, or a load
18778    or store for larger structure modes.
18779
18780    WARNING: The ordering of elements is weird in big-endian mode,
18781    because the EABI requires that vectors stored in memory appear
18782    as though they were stored by a VSTM, as required by the EABI.
18783    GCC RTL defines element ordering based on in-memory order.
18784    This can be different from the architectural ordering of elements
18785    within a NEON register. The intrinsics defined in arm_neon.h use the
18786    NEON register element ordering, not the GCC RTL element ordering.
18787
18788    For example, the in-memory ordering of a big-endian a quadword
18789    vector with 16-bit elements when stored from register pair {d0,d1}
18790    will be (lowest address first, d0[N] is NEON register element N):
18791
18792      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18793
18794    When necessary, quadword registers (dN, dN+1) are moved to ARM
18795    registers from rN in the order:
18796
18797      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18798
18799    So that STM/LDM can be used on vectors in ARM registers, and the
18800    same memory layout will result as if VSTM/VLDM were used.
18801
18802    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18803    possible, which allows use of appropriate alignment tags.
18804    Note that the choice of "64" is independent of the actual vector
18805    element size; this size simply ensures that the behavior is
18806    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18807
18808    Due to limitations of those instructions, use of VST1.64/VLD1.64
18809    is not possible if:
18810     - the address contains PRE_DEC, or
18811     - the mode refers to more than 4 double-word registers
18812
18813    In those cases, it would be possible to replace VSTM/VLDM by a
18814    sequence of instructions; this is not currently implemented since
18815    this is not certain to actually improve performance.  */
18816
18817 const char *
18818 output_move_neon (rtx *operands)
18819 {
18820   rtx reg, mem, addr, ops[2];
18821   int regno, nregs, load = REG_P (operands[0]);
18822   const char *templ;
18823   char buff[50];
18824   machine_mode mode;
18825
18826   reg = operands[!load];
18827   mem = operands[load];
18828
18829   mode = GET_MODE (reg);
18830
18831   gcc_assert (REG_P (reg));
18832   regno = REGNO (reg);
18833   nregs = REG_NREGS (reg) / 2;
18834   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18835               || NEON_REGNO_OK_FOR_QUAD (regno));
18836   gcc_assert (VALID_NEON_DREG_MODE (mode)
18837               || VALID_NEON_QREG_MODE (mode)
18838               || VALID_NEON_STRUCT_MODE (mode));
18839   gcc_assert (MEM_P (mem));
18840
18841   addr = XEXP (mem, 0);
18842
18843   /* Strip off const from addresses like (const (plus (...))).  */
18844   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18845     addr = XEXP (addr, 0);
18846
18847   switch (GET_CODE (addr))
18848     {
18849     case POST_INC:
18850       /* We have to use vldm / vstm for too-large modes.  */
18851       if (nregs > 4)
18852         {
18853           templ = "v%smia%%?\t%%0!, %%h1";
18854           ops[0] = XEXP (addr, 0);
18855         }
18856       else
18857         {
18858           templ = "v%s1.64\t%%h1, %%A0";
18859           ops[0] = mem;
18860         }
18861       ops[1] = reg;
18862       break;
18863
18864     case PRE_DEC:
18865       /* We have to use vldm / vstm in this case, since there is no
18866          pre-decrement form of the vld1 / vst1 instructions.  */
18867       templ = "v%smdb%%?\t%%0!, %%h1";
18868       ops[0] = XEXP (addr, 0);
18869       ops[1] = reg;
18870       break;
18871
18872     case POST_MODIFY:
18873       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18874       gcc_unreachable ();
18875
18876     case REG:
18877       /* We have to use vldm / vstm for too-large modes.  */
18878       if (nregs > 1)
18879         {
18880           if (nregs > 4)
18881             templ = "v%smia%%?\t%%m0, %%h1";
18882           else
18883             templ = "v%s1.64\t%%h1, %%A0";
18884
18885           ops[0] = mem;
18886           ops[1] = reg;
18887           break;
18888         }
18889       /* Fall through.  */
18890     case LABEL_REF:
18891     case PLUS:
18892       {
18893         int i;
18894         int overlap = -1;
18895         for (i = 0; i < nregs; i++)
18896           {
18897             /* We're only using DImode here because it's a convenient size.  */
18898             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18899             ops[1] = adjust_address (mem, DImode, 8 * i);
18900             if (reg_overlap_mentioned_p (ops[0], mem))
18901               {
18902                 gcc_assert (overlap == -1);
18903                 overlap = i;
18904               }
18905             else
18906               {
18907                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18908                 output_asm_insn (buff, ops);
18909               }
18910           }
18911         if (overlap != -1)
18912           {
18913             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18914             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18915             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18916             output_asm_insn (buff, ops);
18917           }
18918
18919         return "";
18920       }
18921
18922     default:
18923       gcc_unreachable ();
18924     }
18925
18926   sprintf (buff, templ, load ? "ld" : "st");
18927   output_asm_insn (buff, ops);
18928
18929   return "";
18930 }
18931
18932 /* Compute and return the length of neon_mov<mode>, where <mode> is
18933    one of VSTRUCT modes: EI, OI, CI or XI.  */
18934 int
18935 arm_attr_length_move_neon (rtx_insn *insn)
18936 {
18937   rtx reg, mem, addr;
18938   int load;
18939   machine_mode mode;
18940
18941   extract_insn_cached (insn);
18942
18943   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18944     {
18945       mode = GET_MODE (recog_data.operand[0]);
18946       switch (mode)
18947         {
18948         case E_EImode:
18949         case E_OImode:
18950           return 8;
18951         case E_CImode:
18952           return 12;
18953         case E_XImode:
18954           return 16;
18955         default:
18956           gcc_unreachable ();
18957         }
18958     }
18959
18960   load = REG_P (recog_data.operand[0]);
18961   reg = recog_data.operand[!load];
18962   mem = recog_data.operand[load];
18963
18964   gcc_assert (MEM_P (mem));
18965
18966   addr = XEXP (mem, 0);
18967
18968   /* Strip off const from addresses like (const (plus (...))).  */
18969   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18970     addr = XEXP (addr, 0);
18971
18972   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18973     {
18974       int insns = REG_NREGS (reg) / 2;
18975       return insns * 4;
18976     }
18977   else
18978     return 4;
18979 }
18980
18981 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18982    return zero.  */
18983
18984 int
18985 arm_address_offset_is_imm (rtx_insn *insn)
18986 {
18987   rtx mem, addr;
18988
18989   extract_insn_cached (insn);
18990
18991   if (REG_P (recog_data.operand[0]))
18992     return 0;
18993
18994   mem = recog_data.operand[0];
18995
18996   gcc_assert (MEM_P (mem));
18997
18998   addr = XEXP (mem, 0);
18999
19000   if (REG_P (addr)
19001       || (GET_CODE (addr) == PLUS
19002           && REG_P (XEXP (addr, 0))
19003           && CONST_INT_P (XEXP (addr, 1))))
19004     return 1;
19005   else
19006     return 0;
19007 }
19008
19009 /* Output an ADD r, s, #n where n may be too big for one instruction.
19010    If adding zero to one register, output nothing.  */
19011 const char *
19012 output_add_immediate (rtx *operands)
19013 {
19014   HOST_WIDE_INT n = INTVAL (operands[2]);
19015
19016   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19017     {
19018       if (n < 0)
19019         output_multi_immediate (operands,
19020                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19021                                 -n);
19022       else
19023         output_multi_immediate (operands,
19024                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19025                                 n);
19026     }
19027
19028   return "";
19029 }
19030
19031 /* Output a multiple immediate operation.
19032    OPERANDS is the vector of operands referred to in the output patterns.
19033    INSTR1 is the output pattern to use for the first constant.
19034    INSTR2 is the output pattern to use for subsequent constants.
19035    IMMED_OP is the index of the constant slot in OPERANDS.
19036    N is the constant value.  */
19037 static const char *
19038 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19039                         int immed_op, HOST_WIDE_INT n)
19040 {
19041 #if HOST_BITS_PER_WIDE_INT > 32
19042   n &= 0xffffffff;
19043 #endif
19044
19045   if (n == 0)
19046     {
19047       /* Quick and easy output.  */
19048       operands[immed_op] = const0_rtx;
19049       output_asm_insn (instr1, operands);
19050     }
19051   else
19052     {
19053       int i;
19054       const char * instr = instr1;
19055
19056       /* Note that n is never zero here (which would give no output).  */
19057       for (i = 0; i < 32; i += 2)
19058         {
19059           if (n & (3 << i))
19060             {
19061               operands[immed_op] = GEN_INT (n & (255 << i));
19062               output_asm_insn (instr, operands);
19063               instr = instr2;
19064               i += 6;
19065             }
19066         }
19067     }
19068
19069   return "";
19070 }
19071
19072 /* Return the name of a shifter operation.  */
19073 static const char *
19074 arm_shift_nmem(enum rtx_code code)
19075 {
19076   switch (code)
19077     {
19078     case ASHIFT:
19079       return ARM_LSL_NAME;
19080
19081     case ASHIFTRT:
19082       return "asr";
19083
19084     case LSHIFTRT:
19085       return "lsr";
19086
19087     case ROTATERT:
19088       return "ror";
19089
19090     default:
19091       abort();
19092     }
19093 }
19094
19095 /* Return the appropriate ARM instruction for the operation code.
19096    The returned result should not be overwritten.  OP is the rtx of the
19097    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19098    was shifted.  */
19099 const char *
19100 arithmetic_instr (rtx op, int shift_first_arg)
19101 {
19102   switch (GET_CODE (op))
19103     {
19104     case PLUS:
19105       return "add";
19106
19107     case MINUS:
19108       return shift_first_arg ? "rsb" : "sub";
19109
19110     case IOR:
19111       return "orr";
19112
19113     case XOR:
19114       return "eor";
19115
19116     case AND:
19117       return "and";
19118
19119     case ASHIFT:
19120     case ASHIFTRT:
19121     case LSHIFTRT:
19122     case ROTATERT:
19123       return arm_shift_nmem(GET_CODE(op));
19124
19125     default:
19126       gcc_unreachable ();
19127     }
19128 }
19129
19130 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19131    for the operation code.  The returned result should not be overwritten.
19132    OP is the rtx code of the shift.
19133    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19134    shift.  */
19135 static const char *
19136 shift_op (rtx op, HOST_WIDE_INT *amountp)
19137 {
19138   const char * mnem;
19139   enum rtx_code code = GET_CODE (op);
19140
19141   switch (code)
19142     {
19143     case ROTATE:
19144       if (!CONST_INT_P (XEXP (op, 1)))
19145         {
19146           output_operand_lossage ("invalid shift operand");
19147           return NULL;
19148         }
19149
19150       code = ROTATERT;
19151       *amountp = 32 - INTVAL (XEXP (op, 1));
19152       mnem = "ror";
19153       break;
19154
19155     case ASHIFT:
19156     case ASHIFTRT:
19157     case LSHIFTRT:
19158     case ROTATERT:
19159       mnem = arm_shift_nmem(code);
19160       if (CONST_INT_P (XEXP (op, 1)))
19161         {
19162           *amountp = INTVAL (XEXP (op, 1));
19163         }
19164       else if (REG_P (XEXP (op, 1)))
19165         {
19166           *amountp = -1;
19167           return mnem;
19168         }
19169       else
19170         {
19171           output_operand_lossage ("invalid shift operand");
19172           return NULL;
19173         }
19174       break;
19175
19176     case MULT:
19177       /* We never have to worry about the amount being other than a
19178          power of 2, since this case can never be reloaded from a reg.  */
19179       if (!CONST_INT_P (XEXP (op, 1)))
19180         {
19181           output_operand_lossage ("invalid shift operand");
19182           return NULL;
19183         }
19184
19185       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19186
19187       /* Amount must be a power of two.  */
19188       if (*amountp & (*amountp - 1))
19189         {
19190           output_operand_lossage ("invalid shift operand");
19191           return NULL;
19192         }
19193
19194       *amountp = exact_log2 (*amountp);
19195       gcc_assert (IN_RANGE (*amountp, 0, 31));
19196       return ARM_LSL_NAME;
19197
19198     default:
19199       output_operand_lossage ("invalid shift operand");
19200       return NULL;
19201     }
19202
19203   /* This is not 100% correct, but follows from the desire to merge
19204      multiplication by a power of 2 with the recognizer for a
19205      shift.  >=32 is not a valid shift for "lsl", so we must try and
19206      output a shift that produces the correct arithmetical result.
19207      Using lsr #32 is identical except for the fact that the carry bit
19208      is not set correctly if we set the flags; but we never use the
19209      carry bit from such an operation, so we can ignore that.  */
19210   if (code == ROTATERT)
19211     /* Rotate is just modulo 32.  */
19212     *amountp &= 31;
19213   else if (*amountp != (*amountp & 31))
19214     {
19215       if (code == ASHIFT)
19216         mnem = "lsr";
19217       *amountp = 32;
19218     }
19219
19220   /* Shifts of 0 are no-ops.  */
19221   if (*amountp == 0)
19222     return NULL;
19223
19224   return mnem;
19225 }
19226
19227 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19228    because /bin/as is horribly restrictive.  The judgement about
19229    whether or not each character is 'printable' (and can be output as
19230    is) or not (and must be printed with an octal escape) must be made
19231    with reference to the *host* character set -- the situation is
19232    similar to that discussed in the comments above pp_c_char in
19233    c-pretty-print.c.  */
19234
19235 #define MAX_ASCII_LEN 51
19236
19237 void
19238 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19239 {
19240   int i;
19241   int len_so_far = 0;
19242
19243   fputs ("\t.ascii\t\"", stream);
19244
19245   for (i = 0; i < len; i++)
19246     {
19247       int c = p[i];
19248
19249       if (len_so_far >= MAX_ASCII_LEN)
19250         {
19251           fputs ("\"\n\t.ascii\t\"", stream);
19252           len_so_far = 0;
19253         }
19254
19255       if (ISPRINT (c))
19256         {
19257           if (c == '\\' || c == '\"')
19258             {
19259               putc ('\\', stream);
19260               len_so_far++;
19261             }
19262           putc (c, stream);
19263           len_so_far++;
19264         }
19265       else
19266         {
19267           fprintf (stream, "\\%03o", c);
19268           len_so_far += 4;
19269         }
19270     }
19271
19272   fputs ("\"\n", stream);
19273 }
19274 \f
19275 /* Whether a register is callee saved or not.  This is necessary because high
19276    registers are marked as caller saved when optimizing for size on Thumb-1
19277    targets despite being callee saved in order to avoid using them.  */
19278 #define callee_saved_reg_p(reg) \
19279   (!call_used_regs[reg] \
19280    || (TARGET_THUMB1 && optimize_size \
19281        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19282
19283 /* Compute the register save mask for registers 0 through 12
19284    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19285
19286 static unsigned long
19287 arm_compute_save_reg0_reg12_mask (void)
19288 {
19289   unsigned long func_type = arm_current_func_type ();
19290   unsigned long save_reg_mask = 0;
19291   unsigned int reg;
19292
19293   if (IS_INTERRUPT (func_type))
19294     {
19295       unsigned int max_reg;
19296       /* Interrupt functions must not corrupt any registers,
19297          even call clobbered ones.  If this is a leaf function
19298          we can just examine the registers used by the RTL, but
19299          otherwise we have to assume that whatever function is
19300          called might clobber anything, and so we have to save
19301          all the call-clobbered registers as well.  */
19302       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19303         /* FIQ handlers have registers r8 - r12 banked, so
19304            we only need to check r0 - r7, Normal ISRs only
19305            bank r14 and r15, so we must check up to r12.
19306            r13 is the stack pointer which is always preserved,
19307            so we do not need to consider it here.  */
19308         max_reg = 7;
19309       else
19310         max_reg = 12;
19311
19312       for (reg = 0; reg <= max_reg; reg++)
19313         if (df_regs_ever_live_p (reg)
19314             || (! crtl->is_leaf && call_used_regs[reg]))
19315           save_reg_mask |= (1 << reg);
19316
19317       /* Also save the pic base register if necessary.  */
19318       if (flag_pic
19319           && !TARGET_SINGLE_PIC_BASE
19320           && arm_pic_register != INVALID_REGNUM
19321           && crtl->uses_pic_offset_table)
19322         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19323     }
19324   else if (IS_VOLATILE(func_type))
19325     {
19326       /* For noreturn functions we historically omitted register saves
19327          altogether.  However this really messes up debugging.  As a
19328          compromise save just the frame pointers.  Combined with the link
19329          register saved elsewhere this should be sufficient to get
19330          a backtrace.  */
19331       if (frame_pointer_needed)
19332         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19333       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19334         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19335       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19336         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19337     }
19338   else
19339     {
19340       /* In the normal case we only need to save those registers
19341          which are call saved and which are used by this function.  */
19342       for (reg = 0; reg <= 11; reg++)
19343         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19344           save_reg_mask |= (1 << reg);
19345
19346       /* Handle the frame pointer as a special case.  */
19347       if (frame_pointer_needed)
19348         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19349
19350       /* If we aren't loading the PIC register,
19351          don't stack it even though it may be live.  */
19352       if (flag_pic
19353           && !TARGET_SINGLE_PIC_BASE
19354           && arm_pic_register != INVALID_REGNUM
19355           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19356               || crtl->uses_pic_offset_table))
19357         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19358
19359       /* The prologue will copy SP into R0, so save it.  */
19360       if (IS_STACKALIGN (func_type))
19361         save_reg_mask |= 1;
19362     }
19363
19364   /* Save registers so the exception handler can modify them.  */
19365   if (crtl->calls_eh_return)
19366     {
19367       unsigned int i;
19368
19369       for (i = 0; ; i++)
19370         {
19371           reg = EH_RETURN_DATA_REGNO (i);
19372           if (reg == INVALID_REGNUM)
19373             break;
19374           save_reg_mask |= 1 << reg;
19375         }
19376     }
19377
19378   return save_reg_mask;
19379 }
19380
19381 /* Return true if r3 is live at the start of the function.  */
19382
19383 static bool
19384 arm_r3_live_at_start_p (void)
19385 {
19386   /* Just look at cfg info, which is still close enough to correct at this
19387      point.  This gives false positives for broken functions that might use
19388      uninitialized data that happens to be allocated in r3, but who cares?  */
19389   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19390 }
19391
19392 /* Compute the number of bytes used to store the static chain register on the
19393    stack, above the stack frame.  We need to know this accurately to get the
19394    alignment of the rest of the stack frame correct.  */
19395
19396 static int
19397 arm_compute_static_chain_stack_bytes (void)
19398 {
19399   /* See the defining assertion in arm_expand_prologue.  */
19400   if (IS_NESTED (arm_current_func_type ())
19401       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19402           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19403                || flag_stack_clash_protection)
19404               && !df_regs_ever_live_p (LR_REGNUM)))
19405       && arm_r3_live_at_start_p ()
19406       && crtl->args.pretend_args_size == 0)
19407     return 4;
19408
19409   return 0;
19410 }
19411
19412 /* Compute a bit mask of which core registers need to be
19413    saved on the stack for the current function.
19414    This is used by arm_compute_frame_layout, which may add extra registers.  */
19415
19416 static unsigned long
19417 arm_compute_save_core_reg_mask (void)
19418 {
19419   unsigned int save_reg_mask = 0;
19420   unsigned long func_type = arm_current_func_type ();
19421   unsigned int reg;
19422
19423   if (IS_NAKED (func_type))
19424     /* This should never really happen.  */
19425     return 0;
19426
19427   /* If we are creating a stack frame, then we must save the frame pointer,
19428      IP (which will hold the old stack pointer), LR and the PC.  */
19429   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19430     save_reg_mask |=
19431       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19432       | (1 << IP_REGNUM)
19433       | (1 << LR_REGNUM)
19434       | (1 << PC_REGNUM);
19435
19436   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19437
19438   /* Decide if we need to save the link register.
19439      Interrupt routines have their own banked link register,
19440      so they never need to save it.
19441      Otherwise if we do not use the link register we do not need to save
19442      it.  If we are pushing other registers onto the stack however, we
19443      can save an instruction in the epilogue by pushing the link register
19444      now and then popping it back into the PC.  This incurs extra memory
19445      accesses though, so we only do it when optimizing for size, and only
19446      if we know that we will not need a fancy return sequence.  */
19447   if (df_regs_ever_live_p (LR_REGNUM)
19448       || (save_reg_mask
19449           && optimize_size
19450           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19451           && !crtl->tail_call_emit
19452           && !crtl->calls_eh_return))
19453     save_reg_mask |= 1 << LR_REGNUM;
19454
19455   if (cfun->machine->lr_save_eliminated)
19456     save_reg_mask &= ~ (1 << LR_REGNUM);
19457
19458   if (TARGET_REALLY_IWMMXT
19459       && ((bit_count (save_reg_mask)
19460            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19461                            arm_compute_static_chain_stack_bytes())
19462            ) % 2) != 0)
19463     {
19464       /* The total number of registers that are going to be pushed
19465          onto the stack is odd.  We need to ensure that the stack
19466          is 64-bit aligned before we start to save iWMMXt registers,
19467          and also before we start to create locals.  (A local variable
19468          might be a double or long long which we will load/store using
19469          an iWMMXt instruction).  Therefore we need to push another
19470          ARM register, so that the stack will be 64-bit aligned.  We
19471          try to avoid using the arg registers (r0 -r3) as they might be
19472          used to pass values in a tail call.  */
19473       for (reg = 4; reg <= 12; reg++)
19474         if ((save_reg_mask & (1 << reg)) == 0)
19475           break;
19476
19477       if (reg <= 12)
19478         save_reg_mask |= (1 << reg);
19479       else
19480         {
19481           cfun->machine->sibcall_blocked = 1;
19482           save_reg_mask |= (1 << 3);
19483         }
19484     }
19485
19486   /* We may need to push an additional register for use initializing the
19487      PIC base register.  */
19488   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19489       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19490     {
19491       reg = thumb_find_work_register (1 << 4);
19492       if (!call_used_regs[reg])
19493         save_reg_mask |= (1 << reg);
19494     }
19495
19496   return save_reg_mask;
19497 }
19498
19499 /* Compute a bit mask of which core registers need to be
19500    saved on the stack for the current function.  */
19501 static unsigned long
19502 thumb1_compute_save_core_reg_mask (void)
19503 {
19504   unsigned long mask;
19505   unsigned reg;
19506
19507   mask = 0;
19508   for (reg = 0; reg < 12; reg ++)
19509     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19510       mask |= 1 << reg;
19511
19512   /* Handle the frame pointer as a special case.  */
19513   if (frame_pointer_needed)
19514     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19515
19516   if (flag_pic
19517       && !TARGET_SINGLE_PIC_BASE
19518       && arm_pic_register != INVALID_REGNUM
19519       && crtl->uses_pic_offset_table)
19520     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19521
19522   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19523   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19524     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19525
19526   /* LR will also be pushed if any lo regs are pushed.  */
19527   if (mask & 0xff || thumb_force_lr_save ())
19528     mask |= (1 << LR_REGNUM);
19529
19530   /* Make sure we have a low work register if we need one.
19531      We will need one if we are going to push a high register,
19532      but we are not currently intending to push a low register.  */
19533   if ((mask & 0xff) == 0
19534       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19535     {
19536       /* Use thumb_find_work_register to choose which register
19537          we will use.  If the register is live then we will
19538          have to push it.  Use LAST_LO_REGNUM as our fallback
19539          choice for the register to select.  */
19540       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19541       /* Make sure the register returned by thumb_find_work_register is
19542          not part of the return value.  */
19543       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19544         reg = LAST_LO_REGNUM;
19545
19546       if (callee_saved_reg_p (reg))
19547         mask |= 1 << reg;
19548     }
19549
19550   /* The 504 below is 8 bytes less than 512 because there are two possible
19551      alignment words.  We can't tell here if they will be present or not so we
19552      have to play it safe and assume that they are. */
19553   if ((CALLER_INTERWORKING_SLOT_SIZE +
19554        ROUND_UP_WORD (get_frame_size ()) +
19555        crtl->outgoing_args_size) >= 504)
19556     {
19557       /* This is the same as the code in thumb1_expand_prologue() which
19558          determines which register to use for stack decrement. */
19559       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19560         if (mask & (1 << reg))
19561           break;
19562
19563       if (reg > LAST_LO_REGNUM)
19564         {
19565           /* Make sure we have a register available for stack decrement. */
19566           mask |= 1 << LAST_LO_REGNUM;
19567         }
19568     }
19569
19570   return mask;
19571 }
19572
19573
19574 /* Return the number of bytes required to save VFP registers.  */
19575 static int
19576 arm_get_vfp_saved_size (void)
19577 {
19578   unsigned int regno;
19579   int count;
19580   int saved;
19581
19582   saved = 0;
19583   /* Space for saved VFP registers.  */
19584   if (TARGET_HARD_FLOAT)
19585     {
19586       count = 0;
19587       for (regno = FIRST_VFP_REGNUM;
19588            regno < LAST_VFP_REGNUM;
19589            regno += 2)
19590         {
19591           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19592               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19593             {
19594               if (count > 0)
19595                 {
19596                   /* Workaround ARM10 VFPr1 bug.  */
19597                   if (count == 2 && !arm_arch6)
19598                     count++;
19599                   saved += count * 8;
19600                 }
19601               count = 0;
19602             }
19603           else
19604             count++;
19605         }
19606       if (count > 0)
19607         {
19608           if (count == 2 && !arm_arch6)
19609             count++;
19610           saved += count * 8;
19611         }
19612     }
19613   return saved;
19614 }
19615
19616
19617 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19618    everything bar the final return instruction.  If simple_return is true,
19619    then do not output epilogue, because it has already been emitted in RTL.
19620
19621    Note: do not forget to update length attribute of corresponding insn pattern
19622    when changing assembly output (eg. length attribute of
19623    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19624    register clearing sequences).  */
19625 const char *
19626 output_return_instruction (rtx operand, bool really_return, bool reverse,
19627                            bool simple_return)
19628 {
19629   char conditional[10];
19630   char instr[100];
19631   unsigned reg;
19632   unsigned long live_regs_mask;
19633   unsigned long func_type;
19634   arm_stack_offsets *offsets;
19635
19636   func_type = arm_current_func_type ();
19637
19638   if (IS_NAKED (func_type))
19639     return "";
19640
19641   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19642     {
19643       /* If this function was declared non-returning, and we have
19644          found a tail call, then we have to trust that the called
19645          function won't return.  */
19646       if (really_return)
19647         {
19648           rtx ops[2];
19649
19650           /* Otherwise, trap an attempted return by aborting.  */
19651           ops[0] = operand;
19652           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19653                                        : "abort");
19654           assemble_external_libcall (ops[1]);
19655           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19656         }
19657
19658       return "";
19659     }
19660
19661   gcc_assert (!cfun->calls_alloca || really_return);
19662
19663   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19664
19665   cfun->machine->return_used_this_function = 1;
19666
19667   offsets = arm_get_frame_offsets ();
19668   live_regs_mask = offsets->saved_regs_mask;
19669
19670   if (!simple_return && live_regs_mask)
19671     {
19672       const char * return_reg;
19673
19674       /* If we do not have any special requirements for function exit
19675          (e.g. interworking) then we can load the return address
19676          directly into the PC.  Otherwise we must load it into LR.  */
19677       if (really_return
19678           && !IS_CMSE_ENTRY (func_type)
19679           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19680         return_reg = reg_names[PC_REGNUM];
19681       else
19682         return_reg = reg_names[LR_REGNUM];
19683
19684       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19685         {
19686           /* There are three possible reasons for the IP register
19687              being saved.  1) a stack frame was created, in which case
19688              IP contains the old stack pointer, or 2) an ISR routine
19689              corrupted it, or 3) it was saved to align the stack on
19690              iWMMXt.  In case 1, restore IP into SP, otherwise just
19691              restore IP.  */
19692           if (frame_pointer_needed)
19693             {
19694               live_regs_mask &= ~ (1 << IP_REGNUM);
19695               live_regs_mask |=   (1 << SP_REGNUM);
19696             }
19697           else
19698             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19699         }
19700
19701       /* On some ARM architectures it is faster to use LDR rather than
19702          LDM to load a single register.  On other architectures, the
19703          cost is the same.  In 26 bit mode, or for exception handlers,
19704          we have to use LDM to load the PC so that the CPSR is also
19705          restored.  */
19706       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19707         if (live_regs_mask == (1U << reg))
19708           break;
19709
19710       if (reg <= LAST_ARM_REGNUM
19711           && (reg != LR_REGNUM
19712               || ! really_return
19713               || ! IS_INTERRUPT (func_type)))
19714         {
19715           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19716                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19717         }
19718       else
19719         {
19720           char *p;
19721           int first = 1;
19722
19723           /* Generate the load multiple instruction to restore the
19724              registers.  Note we can get here, even if
19725              frame_pointer_needed is true, but only if sp already
19726              points to the base of the saved core registers.  */
19727           if (live_regs_mask & (1 << SP_REGNUM))
19728             {
19729               unsigned HOST_WIDE_INT stack_adjust;
19730
19731               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19732               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19733
19734               if (stack_adjust && arm_arch5 && TARGET_ARM)
19735                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19736               else
19737                 {
19738                   /* If we can't use ldmib (SA110 bug),
19739                      then try to pop r3 instead.  */
19740                   if (stack_adjust)
19741                     live_regs_mask |= 1 << 3;
19742
19743                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19744                 }
19745             }
19746           /* For interrupt returns we have to use an LDM rather than
19747              a POP so that we can use the exception return variant.  */
19748           else if (IS_INTERRUPT (func_type))
19749             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19750           else
19751             sprintf (instr, "pop%s\t{", conditional);
19752
19753           p = instr + strlen (instr);
19754
19755           for (reg = 0; reg <= SP_REGNUM; reg++)
19756             if (live_regs_mask & (1 << reg))
19757               {
19758                 int l = strlen (reg_names[reg]);
19759
19760                 if (first)
19761                   first = 0;
19762                 else
19763                   {
19764                     memcpy (p, ", ", 2);
19765                     p += 2;
19766                   }
19767
19768                 memcpy (p, "%|", 2);
19769                 memcpy (p + 2, reg_names[reg], l);
19770                 p += l + 2;
19771               }
19772
19773           if (live_regs_mask & (1 << LR_REGNUM))
19774             {
19775               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19776               /* If returning from an interrupt, restore the CPSR.  */
19777               if (IS_INTERRUPT (func_type))
19778                 strcat (p, "^");
19779             }
19780           else
19781             strcpy (p, "}");
19782         }
19783
19784       output_asm_insn (instr, & operand);
19785
19786       /* See if we need to generate an extra instruction to
19787          perform the actual function return.  */
19788       if (really_return
19789           && func_type != ARM_FT_INTERWORKED
19790           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19791         {
19792           /* The return has already been handled
19793              by loading the LR into the PC.  */
19794           return "";
19795         }
19796     }
19797
19798   if (really_return)
19799     {
19800       switch ((int) ARM_FUNC_TYPE (func_type))
19801         {
19802         case ARM_FT_ISR:
19803         case ARM_FT_FIQ:
19804           /* ??? This is wrong for unified assembly syntax.  */
19805           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19806           break;
19807
19808         case ARM_FT_INTERWORKED:
19809           gcc_assert (arm_arch5 || arm_arch4t);
19810           sprintf (instr, "bx%s\t%%|lr", conditional);
19811           break;
19812
19813         case ARM_FT_EXCEPTION:
19814           /* ??? This is wrong for unified assembly syntax.  */
19815           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19816           break;
19817
19818         default:
19819           if (IS_CMSE_ENTRY (func_type))
19820             {
19821               /* Check if we have to clear the 'GE bits' which is only used if
19822                  parallel add and subtraction instructions are available.  */
19823               if (TARGET_INT_SIMD)
19824                 snprintf (instr, sizeof (instr),
19825                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19826               else
19827                 snprintf (instr, sizeof (instr),
19828                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19829
19830               output_asm_insn (instr, & operand);
19831               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19832                 {
19833                   /* Clear the cumulative exception-status bits (0-4,7) and the
19834                      condition code bits (28-31) of the FPSCR.  We need to
19835                      remember to clear the first scratch register used (IP) and
19836                      save and restore the second (r4).  */
19837                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19838                   output_asm_insn (instr, & operand);
19839                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19840                   output_asm_insn (instr, & operand);
19841                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19842                   output_asm_insn (instr, & operand);
19843                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19844                   output_asm_insn (instr, & operand);
19845                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19846                   output_asm_insn (instr, & operand);
19847                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19848                   output_asm_insn (instr, & operand);
19849                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19850                   output_asm_insn (instr, & operand);
19851                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19852                   output_asm_insn (instr, & operand);
19853                 }
19854               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19855             }
19856           /* Use bx if it's available.  */
19857           else if (arm_arch5 || arm_arch4t)
19858             sprintf (instr, "bx%s\t%%|lr", conditional);
19859           else
19860             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19861           break;
19862         }
19863
19864       output_asm_insn (instr, & operand);
19865     }
19866
19867   return "";
19868 }
19869
19870 /* Output in FILE asm statements needed to declare the NAME of the function
19871    defined by its DECL node.  */
19872
19873 void
19874 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19875 {
19876   size_t cmse_name_len;
19877   char *cmse_name = 0;
19878   char cmse_prefix[] = "__acle_se_";
19879
19880   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19881      extra function label for each function with the 'cmse_nonsecure_entry'
19882      attribute.  This extra function label should be prepended with
19883      '__acle_se_', telling the linker that it needs to create secure gateway
19884      veneers for this function.  */
19885   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19886                                     DECL_ATTRIBUTES (decl)))
19887     {
19888       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19889       cmse_name = XALLOCAVEC (char, cmse_name_len);
19890       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19891       targetm.asm_out.globalize_label (file, cmse_name);
19892
19893       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19894       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19895     }
19896
19897   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19898   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19899   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19900   ASM_OUTPUT_LABEL (file, name);
19901
19902   if (cmse_name)
19903     ASM_OUTPUT_LABEL (file, cmse_name);
19904
19905   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19906 }
19907
19908 /* Write the function name into the code section, directly preceding
19909    the function prologue.
19910
19911    Code will be output similar to this:
19912      t0
19913          .ascii "arm_poke_function_name", 0
19914          .align
19915      t1
19916          .word 0xff000000 + (t1 - t0)
19917      arm_poke_function_name
19918          mov     ip, sp
19919          stmfd   sp!, {fp, ip, lr, pc}
19920          sub     fp, ip, #4
19921
19922    When performing a stack backtrace, code can inspect the value
19923    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19924    at location pc - 12 and the top 8 bits are set, then we know
19925    that there is a function name embedded immediately preceding this
19926    location and has length ((pc[-3]) & 0xff000000).
19927
19928    We assume that pc is declared as a pointer to an unsigned long.
19929
19930    It is of no benefit to output the function name if we are assembling
19931    a leaf function.  These function types will not contain a stack
19932    backtrace structure, therefore it is not possible to determine the
19933    function name.  */
19934 void
19935 arm_poke_function_name (FILE *stream, const char *name)
19936 {
19937   unsigned long alignlength;
19938   unsigned long length;
19939   rtx           x;
19940
19941   length      = strlen (name) + 1;
19942   alignlength = ROUND_UP_WORD (length);
19943
19944   ASM_OUTPUT_ASCII (stream, name, length);
19945   ASM_OUTPUT_ALIGN (stream, 2);
19946   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19947   assemble_aligned_integer (UNITS_PER_WORD, x);
19948 }
19949
19950 /* Place some comments into the assembler stream
19951    describing the current function.  */
19952 static void
19953 arm_output_function_prologue (FILE *f)
19954 {
19955   unsigned long func_type;
19956
19957   /* Sanity check.  */
19958   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19959
19960   func_type = arm_current_func_type ();
19961
19962   switch ((int) ARM_FUNC_TYPE (func_type))
19963     {
19964     default:
19965     case ARM_FT_NORMAL:
19966       break;
19967     case ARM_FT_INTERWORKED:
19968       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19969       break;
19970     case ARM_FT_ISR:
19971       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19972       break;
19973     case ARM_FT_FIQ:
19974       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19975       break;
19976     case ARM_FT_EXCEPTION:
19977       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19978       break;
19979     }
19980
19981   if (IS_NAKED (func_type))
19982     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19983
19984   if (IS_VOLATILE (func_type))
19985     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19986
19987   if (IS_NESTED (func_type))
19988     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19989   if (IS_STACKALIGN (func_type))
19990     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19991   if (IS_CMSE_ENTRY (func_type))
19992     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19993
19994   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19995                crtl->args.size,
19996                crtl->args.pretend_args_size,
19997                (HOST_WIDE_INT) get_frame_size ());
19998
19999   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20000                frame_pointer_needed,
20001                cfun->machine->uses_anonymous_args);
20002
20003   if (cfun->machine->lr_save_eliminated)
20004     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20005
20006   if (crtl->calls_eh_return)
20007     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20008
20009 }
20010
20011 static void
20012 arm_output_function_epilogue (FILE *)
20013 {
20014   arm_stack_offsets *offsets;
20015
20016   if (TARGET_THUMB1)
20017     {
20018       int regno;
20019
20020       /* Emit any call-via-reg trampolines that are needed for v4t support
20021          of call_reg and call_value_reg type insns.  */
20022       for (regno = 0; regno < LR_REGNUM; regno++)
20023         {
20024           rtx label = cfun->machine->call_via[regno];
20025
20026           if (label != NULL)
20027             {
20028               switch_to_section (function_section (current_function_decl));
20029               targetm.asm_out.internal_label (asm_out_file, "L",
20030                                               CODE_LABEL_NUMBER (label));
20031               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20032             }
20033         }
20034
20035       /* ??? Probably not safe to set this here, since it assumes that a
20036          function will be emitted as assembly immediately after we generate
20037          RTL for it.  This does not happen for inline functions.  */
20038       cfun->machine->return_used_this_function = 0;
20039     }
20040   else /* TARGET_32BIT */
20041     {
20042       /* We need to take into account any stack-frame rounding.  */
20043       offsets = arm_get_frame_offsets ();
20044
20045       gcc_assert (!use_return_insn (FALSE, NULL)
20046                   || (cfun->machine->return_used_this_function != 0)
20047                   || offsets->saved_regs == offsets->outgoing_args
20048                   || frame_pointer_needed);
20049     }
20050 }
20051
20052 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20053    STR and STRD.  If an even number of registers are being pushed, one
20054    or more STRD patterns are created for each register pair.  If an
20055    odd number of registers are pushed, emit an initial STR followed by
20056    as many STRD instructions as are needed.  This works best when the
20057    stack is initially 64-bit aligned (the normal case), since it
20058    ensures that each STRD is also 64-bit aligned.  */
20059 static void
20060 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20061 {
20062   int num_regs = 0;
20063   int i;
20064   int regno;
20065   rtx par = NULL_RTX;
20066   rtx dwarf = NULL_RTX;
20067   rtx tmp;
20068   bool first = true;
20069
20070   num_regs = bit_count (saved_regs_mask);
20071
20072   /* Must be at least one register to save, and can't save SP or PC.  */
20073   gcc_assert (num_regs > 0 && num_regs <= 14);
20074   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20075   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20076
20077   /* Create sequence for DWARF info.  All the frame-related data for
20078      debugging is held in this wrapper.  */
20079   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20080
20081   /* Describe the stack adjustment.  */
20082   tmp = gen_rtx_SET (stack_pointer_rtx,
20083                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20084   RTX_FRAME_RELATED_P (tmp) = 1;
20085   XVECEXP (dwarf, 0, 0) = tmp;
20086
20087   /* Find the first register.  */
20088   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20089     ;
20090
20091   i = 0;
20092
20093   /* If there's an odd number of registers to push.  Start off by
20094      pushing a single register.  This ensures that subsequent strd
20095      operations are dword aligned (assuming that SP was originally
20096      64-bit aligned).  */
20097   if ((num_regs & 1) != 0)
20098     {
20099       rtx reg, mem, insn;
20100
20101       reg = gen_rtx_REG (SImode, regno);
20102       if (num_regs == 1)
20103         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20104                                                      stack_pointer_rtx));
20105       else
20106         mem = gen_frame_mem (Pmode,
20107                              gen_rtx_PRE_MODIFY
20108                              (Pmode, stack_pointer_rtx,
20109                               plus_constant (Pmode, stack_pointer_rtx,
20110                                              -4 * num_regs)));
20111
20112       tmp = gen_rtx_SET (mem, reg);
20113       RTX_FRAME_RELATED_P (tmp) = 1;
20114       insn = emit_insn (tmp);
20115       RTX_FRAME_RELATED_P (insn) = 1;
20116       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20117       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20118       RTX_FRAME_RELATED_P (tmp) = 1;
20119       i++;
20120       regno++;
20121       XVECEXP (dwarf, 0, i) = tmp;
20122       first = false;
20123     }
20124
20125   while (i < num_regs)
20126     if (saved_regs_mask & (1 << regno))
20127       {
20128         rtx reg1, reg2, mem1, mem2;
20129         rtx tmp0, tmp1, tmp2;
20130         int regno2;
20131
20132         /* Find the register to pair with this one.  */
20133         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20134              regno2++)
20135           ;
20136
20137         reg1 = gen_rtx_REG (SImode, regno);
20138         reg2 = gen_rtx_REG (SImode, regno2);
20139
20140         if (first)
20141           {
20142             rtx insn;
20143
20144             first = false;
20145             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20146                                                         stack_pointer_rtx,
20147                                                         -4 * num_regs));
20148             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20149                                                         stack_pointer_rtx,
20150                                                         -4 * (num_regs - 1)));
20151             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20152                                 plus_constant (Pmode, stack_pointer_rtx,
20153                                                -4 * (num_regs)));
20154             tmp1 = gen_rtx_SET (mem1, reg1);
20155             tmp2 = gen_rtx_SET (mem2, reg2);
20156             RTX_FRAME_RELATED_P (tmp0) = 1;
20157             RTX_FRAME_RELATED_P (tmp1) = 1;
20158             RTX_FRAME_RELATED_P (tmp2) = 1;
20159             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20160             XVECEXP (par, 0, 0) = tmp0;
20161             XVECEXP (par, 0, 1) = tmp1;
20162             XVECEXP (par, 0, 2) = tmp2;
20163             insn = emit_insn (par);
20164             RTX_FRAME_RELATED_P (insn) = 1;
20165             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20166           }
20167         else
20168           {
20169             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20170                                                         stack_pointer_rtx,
20171                                                         4 * i));
20172             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20173                                                         stack_pointer_rtx,
20174                                                         4 * (i + 1)));
20175             tmp1 = gen_rtx_SET (mem1, reg1);
20176             tmp2 = gen_rtx_SET (mem2, reg2);
20177             RTX_FRAME_RELATED_P (tmp1) = 1;
20178             RTX_FRAME_RELATED_P (tmp2) = 1;
20179             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20180             XVECEXP (par, 0, 0) = tmp1;
20181             XVECEXP (par, 0, 1) = tmp2;
20182             emit_insn (par);
20183           }
20184
20185         /* Create unwind information.  This is an approximation.  */
20186         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20187                                            plus_constant (Pmode,
20188                                                           stack_pointer_rtx,
20189                                                           4 * i)),
20190                             reg1);
20191         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20192                                            plus_constant (Pmode,
20193                                                           stack_pointer_rtx,
20194                                                           4 * (i + 1))),
20195                             reg2);
20196
20197         RTX_FRAME_RELATED_P (tmp1) = 1;
20198         RTX_FRAME_RELATED_P (tmp2) = 1;
20199         XVECEXP (dwarf, 0, i + 1) = tmp1;
20200         XVECEXP (dwarf, 0, i + 2) = tmp2;
20201         i += 2;
20202         regno = regno2 + 1;
20203       }
20204     else
20205       regno++;
20206
20207   return;
20208 }
20209
20210 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20211    whenever possible, otherwise it emits single-word stores.  The first store
20212    also allocates stack space for all saved registers, using writeback with
20213    post-addressing mode.  All other stores use offset addressing.  If no STRD
20214    can be emitted, this function emits a sequence of single-word stores,
20215    and not an STM as before, because single-word stores provide more freedom
20216    scheduling and can be turned into an STM by peephole optimizations.  */
20217 static void
20218 arm_emit_strd_push (unsigned long saved_regs_mask)
20219 {
20220   int num_regs = 0;
20221   int i, j, dwarf_index  = 0;
20222   int offset = 0;
20223   rtx dwarf = NULL_RTX;
20224   rtx insn = NULL_RTX;
20225   rtx tmp, mem;
20226
20227   /* TODO: A more efficient code can be emitted by changing the
20228      layout, e.g., first push all pairs that can use STRD to keep the
20229      stack aligned, and then push all other registers.  */
20230   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20231     if (saved_regs_mask & (1 << i))
20232       num_regs++;
20233
20234   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20235   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20236   gcc_assert (num_regs > 0);
20237
20238   /* Create sequence for DWARF info.  */
20239   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20240
20241   /* For dwarf info, we generate explicit stack update.  */
20242   tmp = gen_rtx_SET (stack_pointer_rtx,
20243                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20244   RTX_FRAME_RELATED_P (tmp) = 1;
20245   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20246
20247   /* Save registers.  */
20248   offset = - 4 * num_regs;
20249   j = 0;
20250   while (j <= LAST_ARM_REGNUM)
20251     if (saved_regs_mask & (1 << j))
20252       {
20253         if ((j % 2 == 0)
20254             && (saved_regs_mask & (1 << (j + 1))))
20255           {
20256             /* Current register and previous register form register pair for
20257                which STRD can be generated.  */
20258             if (offset < 0)
20259               {
20260                 /* Allocate stack space for all saved registers.  */
20261                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20262                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20263                 mem = gen_frame_mem (DImode, tmp);
20264                 offset = 0;
20265               }
20266             else if (offset > 0)
20267               mem = gen_frame_mem (DImode,
20268                                    plus_constant (Pmode,
20269                                                   stack_pointer_rtx,
20270                                                   offset));
20271             else
20272               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20273
20274             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20275             RTX_FRAME_RELATED_P (tmp) = 1;
20276             tmp = emit_insn (tmp);
20277
20278             /* Record the first store insn.  */
20279             if (dwarf_index == 1)
20280               insn = tmp;
20281
20282             /* Generate dwarf info.  */
20283             mem = gen_frame_mem (SImode,
20284                                  plus_constant (Pmode,
20285                                                 stack_pointer_rtx,
20286                                                 offset));
20287             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20288             RTX_FRAME_RELATED_P (tmp) = 1;
20289             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20290
20291             mem = gen_frame_mem (SImode,
20292                                  plus_constant (Pmode,
20293                                                 stack_pointer_rtx,
20294                                                 offset + 4));
20295             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20296             RTX_FRAME_RELATED_P (tmp) = 1;
20297             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20298
20299             offset += 8;
20300             j += 2;
20301           }
20302         else
20303           {
20304             /* Emit a single word store.  */
20305             if (offset < 0)
20306               {
20307                 /* Allocate stack space for all saved registers.  */
20308                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20309                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20310                 mem = gen_frame_mem (SImode, tmp);
20311                 offset = 0;
20312               }
20313             else if (offset > 0)
20314               mem = gen_frame_mem (SImode,
20315                                    plus_constant (Pmode,
20316                                                   stack_pointer_rtx,
20317                                                   offset));
20318             else
20319               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20320
20321             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20322             RTX_FRAME_RELATED_P (tmp) = 1;
20323             tmp = emit_insn (tmp);
20324
20325             /* Record the first store insn.  */
20326             if (dwarf_index == 1)
20327               insn = tmp;
20328
20329             /* Generate dwarf info.  */
20330             mem = gen_frame_mem (SImode,
20331                                  plus_constant(Pmode,
20332                                                stack_pointer_rtx,
20333                                                offset));
20334             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20335             RTX_FRAME_RELATED_P (tmp) = 1;
20336             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20337
20338             offset += 4;
20339             j += 1;
20340           }
20341       }
20342     else
20343       j++;
20344
20345   /* Attach dwarf info to the first insn we generate.  */
20346   gcc_assert (insn != NULL_RTX);
20347   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20348   RTX_FRAME_RELATED_P (insn) = 1;
20349 }
20350
20351 /* Generate and emit an insn that we will recognize as a push_multi.
20352    Unfortunately, since this insn does not reflect very well the actual
20353    semantics of the operation, we need to annotate the insn for the benefit
20354    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20355    MASK for registers that should be annotated for DWARF2 frame unwind
20356    information.  */
20357 static rtx
20358 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20359 {
20360   int num_regs = 0;
20361   int num_dwarf_regs = 0;
20362   int i, j;
20363   rtx par;
20364   rtx dwarf;
20365   int dwarf_par_index;
20366   rtx tmp, reg;
20367
20368   /* We don't record the PC in the dwarf frame information.  */
20369   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20370
20371   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20372     {
20373       if (mask & (1 << i))
20374         num_regs++;
20375       if (dwarf_regs_mask & (1 << i))
20376         num_dwarf_regs++;
20377     }
20378
20379   gcc_assert (num_regs && num_regs <= 16);
20380   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20381
20382   /* For the body of the insn we are going to generate an UNSPEC in
20383      parallel with several USEs.  This allows the insn to be recognized
20384      by the push_multi pattern in the arm.md file.
20385
20386      The body of the insn looks something like this:
20387
20388        (parallel [
20389            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20390                                         (const_int:SI <num>)))
20391                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20392            (use (reg:SI XX))
20393            (use (reg:SI YY))
20394            ...
20395         ])
20396
20397      For the frame note however, we try to be more explicit and actually
20398      show each register being stored into the stack frame, plus a (single)
20399      decrement of the stack pointer.  We do it this way in order to be
20400      friendly to the stack unwinding code, which only wants to see a single
20401      stack decrement per instruction.  The RTL we generate for the note looks
20402      something like this:
20403
20404       (sequence [
20405            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20406            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20407            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20408            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20409            ...
20410         ])
20411
20412      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20413      instead we'd have a parallel expression detailing all
20414      the stores to the various memory addresses so that debug
20415      information is more up-to-date. Remember however while writing
20416      this to take care of the constraints with the push instruction.
20417
20418      Note also that this has to be taken care of for the VFP registers.
20419
20420      For more see PR43399.  */
20421
20422   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20423   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20424   dwarf_par_index = 1;
20425
20426   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20427     {
20428       if (mask & (1 << i))
20429         {
20430           reg = gen_rtx_REG (SImode, i);
20431
20432           XVECEXP (par, 0, 0)
20433             = gen_rtx_SET (gen_frame_mem
20434                            (BLKmode,
20435                             gen_rtx_PRE_MODIFY (Pmode,
20436                                                 stack_pointer_rtx,
20437                                                 plus_constant
20438                                                 (Pmode, stack_pointer_rtx,
20439                                                  -4 * num_regs))
20440                             ),
20441                            gen_rtx_UNSPEC (BLKmode,
20442                                            gen_rtvec (1, reg),
20443                                            UNSPEC_PUSH_MULT));
20444
20445           if (dwarf_regs_mask & (1 << i))
20446             {
20447               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20448                                  reg);
20449               RTX_FRAME_RELATED_P (tmp) = 1;
20450               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20451             }
20452
20453           break;
20454         }
20455     }
20456
20457   for (j = 1, i++; j < num_regs; i++)
20458     {
20459       if (mask & (1 << i))
20460         {
20461           reg = gen_rtx_REG (SImode, i);
20462
20463           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20464
20465           if (dwarf_regs_mask & (1 << i))
20466             {
20467               tmp
20468                 = gen_rtx_SET (gen_frame_mem
20469                                (SImode,
20470                                 plus_constant (Pmode, stack_pointer_rtx,
20471                                                4 * j)),
20472                                reg);
20473               RTX_FRAME_RELATED_P (tmp) = 1;
20474               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20475             }
20476
20477           j++;
20478         }
20479     }
20480
20481   par = emit_insn (par);
20482
20483   tmp = gen_rtx_SET (stack_pointer_rtx,
20484                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20485   RTX_FRAME_RELATED_P (tmp) = 1;
20486   XVECEXP (dwarf, 0, 0) = tmp;
20487
20488   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20489
20490   return par;
20491 }
20492
20493 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20494    SIZE is the offset to be adjusted.
20495    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20496 static void
20497 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20498 {
20499   rtx dwarf;
20500
20501   RTX_FRAME_RELATED_P (insn) = 1;
20502   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20503   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20504 }
20505
20506 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20507    SAVED_REGS_MASK shows which registers need to be restored.
20508
20509    Unfortunately, since this insn does not reflect very well the actual
20510    semantics of the operation, we need to annotate the insn for the benefit
20511    of DWARF2 frame unwind information.  */
20512 static void
20513 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20514 {
20515   int num_regs = 0;
20516   int i, j;
20517   rtx par;
20518   rtx dwarf = NULL_RTX;
20519   rtx tmp, reg;
20520   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20521   int offset_adj;
20522   int emit_update;
20523
20524   offset_adj = return_in_pc ? 1 : 0;
20525   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20526     if (saved_regs_mask & (1 << i))
20527       num_regs++;
20528
20529   gcc_assert (num_regs && num_regs <= 16);
20530
20531   /* If SP is in reglist, then we don't emit SP update insn.  */
20532   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20533
20534   /* The parallel needs to hold num_regs SETs
20535      and one SET for the stack update.  */
20536   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20537
20538   if (return_in_pc)
20539     XVECEXP (par, 0, 0) = ret_rtx;
20540
20541   if (emit_update)
20542     {
20543       /* Increment the stack pointer, based on there being
20544          num_regs 4-byte registers to restore.  */
20545       tmp = gen_rtx_SET (stack_pointer_rtx,
20546                          plus_constant (Pmode,
20547                                         stack_pointer_rtx,
20548                                         4 * num_regs));
20549       RTX_FRAME_RELATED_P (tmp) = 1;
20550       XVECEXP (par, 0, offset_adj) = tmp;
20551     }
20552
20553   /* Now restore every reg, which may include PC.  */
20554   for (j = 0, i = 0; j < num_regs; i++)
20555     if (saved_regs_mask & (1 << i))
20556       {
20557         reg = gen_rtx_REG (SImode, i);
20558         if ((num_regs == 1) && emit_update && !return_in_pc)
20559           {
20560             /* Emit single load with writeback.  */
20561             tmp = gen_frame_mem (SImode,
20562                                  gen_rtx_POST_INC (Pmode,
20563                                                    stack_pointer_rtx));
20564             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20565             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20566             return;
20567           }
20568
20569         tmp = gen_rtx_SET (reg,
20570                            gen_frame_mem
20571                            (SImode,
20572                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20573         RTX_FRAME_RELATED_P (tmp) = 1;
20574         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20575
20576         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20577            should not have PC, skip PC.  */
20578         if (i != PC_REGNUM)
20579           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20580
20581         j++;
20582       }
20583
20584   if (return_in_pc)
20585     par = emit_jump_insn (par);
20586   else
20587     par = emit_insn (par);
20588
20589   REG_NOTES (par) = dwarf;
20590   if (!return_in_pc)
20591     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20592                                  stack_pointer_rtx, stack_pointer_rtx);
20593 }
20594
20595 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20596    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20597
20598    Unfortunately, since this insn does not reflect very well the actual
20599    semantics of the operation, we need to annotate the insn for the benefit
20600    of DWARF2 frame unwind information.  */
20601 static void
20602 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20603 {
20604   int i, j;
20605   rtx par;
20606   rtx dwarf = NULL_RTX;
20607   rtx tmp, reg;
20608
20609   gcc_assert (num_regs && num_regs <= 32);
20610
20611     /* Workaround ARM10 VFPr1 bug.  */
20612   if (num_regs == 2 && !arm_arch6)
20613     {
20614       if (first_reg == 15)
20615         first_reg--;
20616
20617       num_regs++;
20618     }
20619
20620   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20621      there could be up to 32 D-registers to restore.
20622      If there are more than 16 D-registers, make two recursive calls,
20623      each of which emits one pop_multi instruction.  */
20624   if (num_regs > 16)
20625     {
20626       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20627       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20628       return;
20629     }
20630
20631   /* The parallel needs to hold num_regs SETs
20632      and one SET for the stack update.  */
20633   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20634
20635   /* Increment the stack pointer, based on there being
20636      num_regs 8-byte registers to restore.  */
20637   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20638   RTX_FRAME_RELATED_P (tmp) = 1;
20639   XVECEXP (par, 0, 0) = tmp;
20640
20641   /* Now show every reg that will be restored, using a SET for each.  */
20642   for (j = 0, i=first_reg; j < num_regs; i += 2)
20643     {
20644       reg = gen_rtx_REG (DFmode, i);
20645
20646       tmp = gen_rtx_SET (reg,
20647                          gen_frame_mem
20648                          (DFmode,
20649                           plus_constant (Pmode, base_reg, 8 * j)));
20650       RTX_FRAME_RELATED_P (tmp) = 1;
20651       XVECEXP (par, 0, j + 1) = tmp;
20652
20653       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20654
20655       j++;
20656     }
20657
20658   par = emit_insn (par);
20659   REG_NOTES (par) = dwarf;
20660
20661   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20662   if (REGNO (base_reg) == IP_REGNUM)
20663     {
20664       RTX_FRAME_RELATED_P (par) = 1;
20665       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20666     }
20667   else
20668     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20669                                  base_reg, base_reg);
20670 }
20671
20672 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20673    number of registers are being popped, multiple LDRD patterns are created for
20674    all register pairs.  If odd number of registers are popped, last register is
20675    loaded by using LDR pattern.  */
20676 static void
20677 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20678 {
20679   int num_regs = 0;
20680   int i, j;
20681   rtx par = NULL_RTX;
20682   rtx dwarf = NULL_RTX;
20683   rtx tmp, reg, tmp1;
20684   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20685
20686   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20687     if (saved_regs_mask & (1 << i))
20688       num_regs++;
20689
20690   gcc_assert (num_regs && num_regs <= 16);
20691
20692   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20693      to be popped.  So, if num_regs is even, now it will become odd,
20694      and we can generate pop with PC.  If num_regs is odd, it will be
20695      even now, and ldr with return can be generated for PC.  */
20696   if (return_in_pc)
20697     num_regs--;
20698
20699   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20700
20701   /* Var j iterates over all the registers to gather all the registers in
20702      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20703      A PARALLEL RTX of register-pair is created here, so that pattern for
20704      LDRD can be matched.  As PC is always last register to be popped, and
20705      we have already decremented num_regs if PC, we don't have to worry
20706      about PC in this loop.  */
20707   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20708     if (saved_regs_mask & (1 << j))
20709       {
20710         /* Create RTX for memory load.  */
20711         reg = gen_rtx_REG (SImode, j);
20712         tmp = gen_rtx_SET (reg,
20713                            gen_frame_mem (SImode,
20714                                plus_constant (Pmode,
20715                                               stack_pointer_rtx, 4 * i)));
20716         RTX_FRAME_RELATED_P (tmp) = 1;
20717
20718         if (i % 2 == 0)
20719           {
20720             /* When saved-register index (i) is even, the RTX to be emitted is
20721                yet to be created.  Hence create it first.  The LDRD pattern we
20722                are generating is :
20723                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20724                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20725                where target registers need not be consecutive.  */
20726             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20727             dwarf = NULL_RTX;
20728           }
20729
20730         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20731            added as 0th element and if i is odd, reg_i is added as 1st element
20732            of LDRD pattern shown above.  */
20733         XVECEXP (par, 0, (i % 2)) = tmp;
20734         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20735
20736         if ((i % 2) == 1)
20737           {
20738             /* When saved-register index (i) is odd, RTXs for both the registers
20739                to be loaded are generated in above given LDRD pattern, and the
20740                pattern can be emitted now.  */
20741             par = emit_insn (par);
20742             REG_NOTES (par) = dwarf;
20743             RTX_FRAME_RELATED_P (par) = 1;
20744           }
20745
20746         i++;
20747       }
20748
20749   /* If the number of registers pushed is odd AND return_in_pc is false OR
20750      number of registers are even AND return_in_pc is true, last register is
20751      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20752      then LDR with post increment.  */
20753
20754   /* Increment the stack pointer, based on there being
20755      num_regs 4-byte registers to restore.  */
20756   tmp = gen_rtx_SET (stack_pointer_rtx,
20757                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20758   RTX_FRAME_RELATED_P (tmp) = 1;
20759   tmp = emit_insn (tmp);
20760   if (!return_in_pc)
20761     {
20762       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20763                                    stack_pointer_rtx, stack_pointer_rtx);
20764     }
20765
20766   dwarf = NULL_RTX;
20767
20768   if (((num_regs % 2) == 1 && !return_in_pc)
20769       || ((num_regs % 2) == 0 && return_in_pc))
20770     {
20771       /* Scan for the single register to be popped.  Skip until the saved
20772          register is found.  */
20773       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20774
20775       /* Gen LDR with post increment here.  */
20776       tmp1 = gen_rtx_MEM (SImode,
20777                           gen_rtx_POST_INC (SImode,
20778                                             stack_pointer_rtx));
20779       set_mem_alias_set (tmp1, get_frame_alias_set ());
20780
20781       reg = gen_rtx_REG (SImode, j);
20782       tmp = gen_rtx_SET (reg, tmp1);
20783       RTX_FRAME_RELATED_P (tmp) = 1;
20784       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20785
20786       if (return_in_pc)
20787         {
20788           /* If return_in_pc, j must be PC_REGNUM.  */
20789           gcc_assert (j == PC_REGNUM);
20790           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20791           XVECEXP (par, 0, 0) = ret_rtx;
20792           XVECEXP (par, 0, 1) = tmp;
20793           par = emit_jump_insn (par);
20794         }
20795       else
20796         {
20797           par = emit_insn (tmp);
20798           REG_NOTES (par) = dwarf;
20799           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20800                                        stack_pointer_rtx, stack_pointer_rtx);
20801         }
20802
20803     }
20804   else if ((num_regs % 2) == 1 && return_in_pc)
20805     {
20806       /* There are 2 registers to be popped.  So, generate the pattern
20807          pop_multiple_with_stack_update_and_return to pop in PC.  */
20808       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20809     }
20810
20811   return;
20812 }
20813
20814 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20815    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20816    offset addressing and then generates one separate stack udpate. This provides
20817    more scheduling freedom, compared to writeback on every load.  However,
20818    if the function returns using load into PC directly
20819    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20820    before the last load.  TODO: Add a peephole optimization to recognize
20821    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20822    peephole optimization to merge the load at stack-offset zero
20823    with the stack update instruction using load with writeback
20824    in post-index addressing mode.  */
20825 static void
20826 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20827 {
20828   int j = 0;
20829   int offset = 0;
20830   rtx par = NULL_RTX;
20831   rtx dwarf = NULL_RTX;
20832   rtx tmp, mem;
20833
20834   /* Restore saved registers.  */
20835   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20836   j = 0;
20837   while (j <= LAST_ARM_REGNUM)
20838     if (saved_regs_mask & (1 << j))
20839       {
20840         if ((j % 2) == 0
20841             && (saved_regs_mask & (1 << (j + 1)))
20842             && (j + 1) != PC_REGNUM)
20843           {
20844             /* Current register and next register form register pair for which
20845                LDRD can be generated. PC is always the last register popped, and
20846                we handle it separately.  */
20847             if (offset > 0)
20848               mem = gen_frame_mem (DImode,
20849                                    plus_constant (Pmode,
20850                                                   stack_pointer_rtx,
20851                                                   offset));
20852             else
20853               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20854
20855             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20856             tmp = emit_insn (tmp);
20857             RTX_FRAME_RELATED_P (tmp) = 1;
20858
20859             /* Generate dwarf info.  */
20860
20861             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20862                                     gen_rtx_REG (SImode, j),
20863                                     NULL_RTX);
20864             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20865                                     gen_rtx_REG (SImode, j + 1),
20866                                     dwarf);
20867
20868             REG_NOTES (tmp) = dwarf;
20869
20870             offset += 8;
20871             j += 2;
20872           }
20873         else if (j != PC_REGNUM)
20874           {
20875             /* Emit a single word load.  */
20876             if (offset > 0)
20877               mem = gen_frame_mem (SImode,
20878                                    plus_constant (Pmode,
20879                                                   stack_pointer_rtx,
20880                                                   offset));
20881             else
20882               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20883
20884             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20885             tmp = emit_insn (tmp);
20886             RTX_FRAME_RELATED_P (tmp) = 1;
20887
20888             /* Generate dwarf info.  */
20889             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20890                                               gen_rtx_REG (SImode, j),
20891                                               NULL_RTX);
20892
20893             offset += 4;
20894             j += 1;
20895           }
20896         else /* j == PC_REGNUM */
20897           j++;
20898       }
20899     else
20900       j++;
20901
20902   /* Update the stack.  */
20903   if (offset > 0)
20904     {
20905       tmp = gen_rtx_SET (stack_pointer_rtx,
20906                          plus_constant (Pmode,
20907                                         stack_pointer_rtx,
20908                                         offset));
20909       tmp = emit_insn (tmp);
20910       arm_add_cfa_adjust_cfa_note (tmp, offset,
20911                                    stack_pointer_rtx, stack_pointer_rtx);
20912       offset = 0;
20913     }
20914
20915   if (saved_regs_mask & (1 << PC_REGNUM))
20916     {
20917       /* Only PC is to be popped.  */
20918       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20919       XVECEXP (par, 0, 0) = ret_rtx;
20920       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20921                          gen_frame_mem (SImode,
20922                                         gen_rtx_POST_INC (SImode,
20923                                                           stack_pointer_rtx)));
20924       RTX_FRAME_RELATED_P (tmp) = 1;
20925       XVECEXP (par, 0, 1) = tmp;
20926       par = emit_jump_insn (par);
20927
20928       /* Generate dwarf info.  */
20929       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20930                               gen_rtx_REG (SImode, PC_REGNUM),
20931                               NULL_RTX);
20932       REG_NOTES (par) = dwarf;
20933       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20934                                    stack_pointer_rtx, stack_pointer_rtx);
20935     }
20936 }
20937
20938 /* Calculate the size of the return value that is passed in registers.  */
20939 static unsigned
20940 arm_size_return_regs (void)
20941 {
20942   machine_mode mode;
20943
20944   if (crtl->return_rtx != 0)
20945     mode = GET_MODE (crtl->return_rtx);
20946   else
20947     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20948
20949   return GET_MODE_SIZE (mode);
20950 }
20951
20952 /* Return true if the current function needs to save/restore LR.  */
20953 static bool
20954 thumb_force_lr_save (void)
20955 {
20956   return !cfun->machine->lr_save_eliminated
20957          && (!crtl->is_leaf
20958              || thumb_far_jump_used_p ()
20959              || df_regs_ever_live_p (LR_REGNUM));
20960 }
20961
20962 /* We do not know if r3 will be available because
20963    we do have an indirect tailcall happening in this
20964    particular case.  */
20965 static bool
20966 is_indirect_tailcall_p (rtx call)
20967 {
20968   rtx pat = PATTERN (call);
20969
20970   /* Indirect tail call.  */
20971   pat = XVECEXP (pat, 0, 0);
20972   if (GET_CODE (pat) == SET)
20973     pat = SET_SRC (pat);
20974
20975   pat = XEXP (XEXP (pat, 0), 0);
20976   return REG_P (pat);
20977 }
20978
20979 /* Return true if r3 is used by any of the tail call insns in the
20980    current function.  */
20981 static bool
20982 any_sibcall_could_use_r3 (void)
20983 {
20984   edge_iterator ei;
20985   edge e;
20986
20987   if (!crtl->tail_call_emit)
20988     return false;
20989   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20990     if (e->flags & EDGE_SIBCALL)
20991       {
20992         rtx_insn *call = BB_END (e->src);
20993         if (!CALL_P (call))
20994           call = prev_nonnote_nondebug_insn (call);
20995         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20996         if (find_regno_fusage (call, USE, 3)
20997             || is_indirect_tailcall_p (call))
20998           return true;
20999       }
21000   return false;
21001 }
21002
21003
21004 /* Compute the distance from register FROM to register TO.
21005    These can be the arg pointer (26), the soft frame pointer (25),
21006    the stack pointer (13) or the hard frame pointer (11).
21007    In thumb mode r7 is used as the soft frame pointer, if needed.
21008    Typical stack layout looks like this:
21009
21010        old stack pointer -> |    |
21011                              ----
21012                             |    | \
21013                             |    |   saved arguments for
21014                             |    |   vararg functions
21015                             |    | /
21016                               --
21017    hard FP & arg pointer -> |    | \
21018                             |    |   stack
21019                             |    |   frame
21020                             |    | /
21021                               --
21022                             |    | \
21023                             |    |   call saved
21024                             |    |   registers
21025       soft frame pointer -> |    | /
21026                               --
21027                             |    | \
21028                             |    |   local
21029                             |    |   variables
21030      locals base pointer -> |    | /
21031                               --
21032                             |    | \
21033                             |    |   outgoing
21034                             |    |   arguments
21035    current stack pointer -> |    | /
21036                               --
21037
21038   For a given function some or all of these stack components
21039   may not be needed, giving rise to the possibility of
21040   eliminating some of the registers.
21041
21042   The values returned by this function must reflect the behavior
21043   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21044
21045   The sign of the number returned reflects the direction of stack
21046   growth, so the values are positive for all eliminations except
21047   from the soft frame pointer to the hard frame pointer.
21048
21049   SFP may point just inside the local variables block to ensure correct
21050   alignment.  */
21051
21052
21053 /* Return cached stack offsets.  */
21054
21055 static arm_stack_offsets *
21056 arm_get_frame_offsets (void)
21057 {
21058   struct arm_stack_offsets *offsets;
21059
21060   offsets = &cfun->machine->stack_offsets;
21061
21062   return offsets;
21063 }
21064
21065
21066 /* Calculate stack offsets.  These are used to calculate register elimination
21067    offsets and in prologue/epilogue code.  Also calculates which registers
21068    should be saved.  */
21069
21070 static void
21071 arm_compute_frame_layout (void)
21072 {
21073   struct arm_stack_offsets *offsets;
21074   unsigned long func_type;
21075   int saved;
21076   int core_saved;
21077   HOST_WIDE_INT frame_size;
21078   int i;
21079
21080   offsets = &cfun->machine->stack_offsets;
21081
21082   /* Initially this is the size of the local variables.  It will translated
21083      into an offset once we have determined the size of preceding data.  */
21084   frame_size = ROUND_UP_WORD (get_frame_size ());
21085
21086   /* Space for variadic functions.  */
21087   offsets->saved_args = crtl->args.pretend_args_size;
21088
21089   /* In Thumb mode this is incorrect, but never used.  */
21090   offsets->frame
21091     = (offsets->saved_args
21092        + arm_compute_static_chain_stack_bytes ()
21093        + (frame_pointer_needed ? 4 : 0));
21094
21095   if (TARGET_32BIT)
21096     {
21097       unsigned int regno;
21098
21099       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21100       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21101       saved = core_saved;
21102
21103       /* We know that SP will be doubleword aligned on entry, and we must
21104          preserve that condition at any subroutine call.  We also require the
21105          soft frame pointer to be doubleword aligned.  */
21106
21107       if (TARGET_REALLY_IWMMXT)
21108         {
21109           /* Check for the call-saved iWMMXt registers.  */
21110           for (regno = FIRST_IWMMXT_REGNUM;
21111                regno <= LAST_IWMMXT_REGNUM;
21112                regno++)
21113             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21114               saved += 8;
21115         }
21116
21117       func_type = arm_current_func_type ();
21118       /* Space for saved VFP registers.  */
21119       if (! IS_VOLATILE (func_type)
21120           && TARGET_HARD_FLOAT)
21121         saved += arm_get_vfp_saved_size ();
21122     }
21123   else /* TARGET_THUMB1 */
21124     {
21125       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21126       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21127       saved = core_saved;
21128       if (TARGET_BACKTRACE)
21129         saved += 16;
21130     }
21131
21132   /* Saved registers include the stack frame.  */
21133   offsets->saved_regs
21134     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21135   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21136
21137   /* A leaf function does not need any stack alignment if it has nothing
21138      on the stack.  */
21139   if (crtl->is_leaf && frame_size == 0
21140       /* However if it calls alloca(), we have a dynamically allocated
21141          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21142       && ! cfun->calls_alloca)
21143     {
21144       offsets->outgoing_args = offsets->soft_frame;
21145       offsets->locals_base = offsets->soft_frame;
21146       return;
21147     }
21148
21149   /* Ensure SFP has the correct alignment.  */
21150   if (ARM_DOUBLEWORD_ALIGN
21151       && (offsets->soft_frame & 7))
21152     {
21153       offsets->soft_frame += 4;
21154       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21155          when there is a stack frame as the alignment will be rolled into
21156          the normal stack adjustment.  */
21157       if (frame_size + crtl->outgoing_args_size == 0)
21158         {
21159           int reg = -1;
21160
21161           /* Register r3 is caller-saved.  Normally it does not need to be
21162              saved on entry by the prologue.  However if we choose to save
21163              it for padding then we may confuse the compiler into thinking
21164              a prologue sequence is required when in fact it is not.  This
21165              will occur when shrink-wrapping if r3 is used as a scratch
21166              register and there are no other callee-saved writes.
21167
21168              This situation can be avoided when other callee-saved registers
21169              are available and r3 is not mandatory if we choose a callee-saved
21170              register for padding.  */
21171           bool prefer_callee_reg_p = false;
21172
21173           /* If it is safe to use r3, then do so.  This sometimes
21174              generates better code on Thumb-2 by avoiding the need to
21175              use 32-bit push/pop instructions.  */
21176           if (! any_sibcall_could_use_r3 ()
21177               && arm_size_return_regs () <= 12
21178               && (offsets->saved_regs_mask & (1 << 3)) == 0
21179               && (TARGET_THUMB2
21180                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21181             {
21182               reg = 3;
21183               if (!TARGET_THUMB2)
21184                 prefer_callee_reg_p = true;
21185             }
21186           if (reg == -1
21187               || prefer_callee_reg_p)
21188             {
21189               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21190                 {
21191                   /* Avoid fixed registers; they may be changed at
21192                      arbitrary times so it's unsafe to restore them
21193                      during the epilogue.  */
21194                   if (!fixed_regs[i]
21195                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21196                     {
21197                       reg = i;
21198                       break;
21199                     }
21200                 }
21201             }
21202
21203           if (reg != -1)
21204             {
21205               offsets->saved_regs += 4;
21206               offsets->saved_regs_mask |= (1 << reg);
21207             }
21208         }
21209     }
21210
21211   offsets->locals_base = offsets->soft_frame + frame_size;
21212   offsets->outgoing_args = (offsets->locals_base
21213                             + crtl->outgoing_args_size);
21214
21215   if (ARM_DOUBLEWORD_ALIGN)
21216     {
21217       /* Ensure SP remains doubleword aligned.  */
21218       if (offsets->outgoing_args & 7)
21219         offsets->outgoing_args += 4;
21220       gcc_assert (!(offsets->outgoing_args & 7));
21221     }
21222 }
21223
21224
21225 /* Calculate the relative offsets for the different stack pointers.  Positive
21226    offsets are in the direction of stack growth.  */
21227
21228 HOST_WIDE_INT
21229 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21230 {
21231   arm_stack_offsets *offsets;
21232
21233   offsets = arm_get_frame_offsets ();
21234
21235   /* OK, now we have enough information to compute the distances.
21236      There must be an entry in these switch tables for each pair
21237      of registers in ELIMINABLE_REGS, even if some of the entries
21238      seem to be redundant or useless.  */
21239   switch (from)
21240     {
21241     case ARG_POINTER_REGNUM:
21242       switch (to)
21243         {
21244         case THUMB_HARD_FRAME_POINTER_REGNUM:
21245           return 0;
21246
21247         case FRAME_POINTER_REGNUM:
21248           /* This is the reverse of the soft frame pointer
21249              to hard frame pointer elimination below.  */
21250           return offsets->soft_frame - offsets->saved_args;
21251
21252         case ARM_HARD_FRAME_POINTER_REGNUM:
21253           /* This is only non-zero in the case where the static chain register
21254              is stored above the frame.  */
21255           return offsets->frame - offsets->saved_args - 4;
21256
21257         case STACK_POINTER_REGNUM:
21258           /* If nothing has been pushed on the stack at all
21259              then this will return -4.  This *is* correct!  */
21260           return offsets->outgoing_args - (offsets->saved_args + 4);
21261
21262         default:
21263           gcc_unreachable ();
21264         }
21265       gcc_unreachable ();
21266
21267     case FRAME_POINTER_REGNUM:
21268       switch (to)
21269         {
21270         case THUMB_HARD_FRAME_POINTER_REGNUM:
21271           return 0;
21272
21273         case ARM_HARD_FRAME_POINTER_REGNUM:
21274           /* The hard frame pointer points to the top entry in the
21275              stack frame.  The soft frame pointer to the bottom entry
21276              in the stack frame.  If there is no stack frame at all,
21277              then they are identical.  */
21278
21279           return offsets->frame - offsets->soft_frame;
21280
21281         case STACK_POINTER_REGNUM:
21282           return offsets->outgoing_args - offsets->soft_frame;
21283
21284         default:
21285           gcc_unreachable ();
21286         }
21287       gcc_unreachable ();
21288
21289     default:
21290       /* You cannot eliminate from the stack pointer.
21291          In theory you could eliminate from the hard frame
21292          pointer to the stack pointer, but this will never
21293          happen, since if a stack frame is not needed the
21294          hard frame pointer will never be used.  */
21295       gcc_unreachable ();
21296     }
21297 }
21298
21299 /* Given FROM and TO register numbers, say whether this elimination is
21300    allowed.  Frame pointer elimination is automatically handled.
21301
21302    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21303    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21304    pointer, we must eliminate FRAME_POINTER_REGNUM into
21305    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21306    ARG_POINTER_REGNUM.  */
21307
21308 bool
21309 arm_can_eliminate (const int from, const int to)
21310 {
21311   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21312           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21313           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21314           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21315            true);
21316 }
21317
21318 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21319    number of bytes pushed.  */
21320
21321 static int
21322 arm_save_coproc_regs(void)
21323 {
21324   int saved_size = 0;
21325   unsigned reg;
21326   unsigned start_reg;
21327   rtx insn;
21328
21329   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21330     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21331       {
21332         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21333         insn = gen_rtx_MEM (V2SImode, insn);
21334         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21335         RTX_FRAME_RELATED_P (insn) = 1;
21336         saved_size += 8;
21337       }
21338
21339   if (TARGET_HARD_FLOAT)
21340     {
21341       start_reg = FIRST_VFP_REGNUM;
21342
21343       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21344         {
21345           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21346               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21347             {
21348               if (start_reg != reg)
21349                 saved_size += vfp_emit_fstmd (start_reg,
21350                                               (reg - start_reg) / 2);
21351               start_reg = reg + 2;
21352             }
21353         }
21354       if (start_reg != reg)
21355         saved_size += vfp_emit_fstmd (start_reg,
21356                                       (reg - start_reg) / 2);
21357     }
21358   return saved_size;
21359 }
21360
21361
21362 /* Set the Thumb frame pointer from the stack pointer.  */
21363
21364 static void
21365 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21366 {
21367   HOST_WIDE_INT amount;
21368   rtx insn, dwarf;
21369
21370   amount = offsets->outgoing_args - offsets->locals_base;
21371   if (amount < 1024)
21372     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21373                                   stack_pointer_rtx, GEN_INT (amount)));
21374   else
21375     {
21376       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21377       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21378          expects the first two operands to be the same.  */
21379       if (TARGET_THUMB2)
21380         {
21381           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21382                                         stack_pointer_rtx,
21383                                         hard_frame_pointer_rtx));
21384         }
21385       else
21386         {
21387           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21388                                         hard_frame_pointer_rtx,
21389                                         stack_pointer_rtx));
21390         }
21391       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21392                            plus_constant (Pmode, stack_pointer_rtx, amount));
21393       RTX_FRAME_RELATED_P (dwarf) = 1;
21394       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21395     }
21396
21397   RTX_FRAME_RELATED_P (insn) = 1;
21398 }
21399
21400 struct scratch_reg {
21401   rtx reg;
21402   bool saved;
21403 };
21404
21405 /* Return a short-lived scratch register for use as a 2nd scratch register on
21406    function entry after the registers are saved in the prologue.  This register
21407    must be released by means of release_scratch_register_on_entry.  IP is not
21408    considered since it is always used as the 1st scratch register if available.
21409
21410    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21411    mask of live registers.  */
21412
21413 static void
21414 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21415                                unsigned long live_regs)
21416 {
21417   int regno = -1;
21418
21419   sr->saved = false;
21420
21421   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21422     regno = LR_REGNUM;
21423   else
21424     {
21425       unsigned int i;
21426
21427       for (i = 4; i < 11; i++)
21428         if (regno1 != i && (live_regs & (1 << i)) != 0)
21429           {
21430             regno = i;
21431             break;
21432           }
21433
21434       if (regno < 0)
21435         {
21436           /* If IP is used as the 1st scratch register for a nested function,
21437              then either r3 wasn't available or is used to preserve IP.  */
21438           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21439             regno1 = 3;
21440           regno = (regno1 == 3 ? 2 : 3);
21441           sr->saved
21442             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21443                                regno);
21444         }
21445     }
21446
21447   sr->reg = gen_rtx_REG (SImode, regno);
21448   if (sr->saved)
21449     {
21450       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21451       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21452       rtx x = gen_rtx_SET (stack_pointer_rtx,
21453                            plus_constant (Pmode, stack_pointer_rtx, -4));
21454       RTX_FRAME_RELATED_P (insn) = 1;
21455       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21456     }
21457 }
21458
21459 /* Release a scratch register obtained from the preceding function.  */
21460
21461 static void
21462 release_scratch_register_on_entry (struct scratch_reg *sr)
21463 {
21464   if (sr->saved)
21465     {
21466       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21467       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21468       rtx x = gen_rtx_SET (stack_pointer_rtx,
21469                            plus_constant (Pmode, stack_pointer_rtx, 4));
21470       RTX_FRAME_RELATED_P (insn) = 1;
21471       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21472     }
21473 }
21474
21475 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21476
21477 #if PROBE_INTERVAL > 4096
21478 #error Cannot use indexed addressing mode for stack probing
21479 #endif
21480
21481 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21482    inclusive.  These are offsets from the current stack pointer.  REGNO1
21483    is the index number of the 1st scratch register and LIVE_REGS is the
21484    mask of live registers.  */
21485
21486 static void
21487 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21488                             unsigned int regno1, unsigned long live_regs)
21489 {
21490   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21491
21492   /* See if we have a constant small number of probes to generate.  If so,
21493      that's the easy case.  */
21494   if (size <= PROBE_INTERVAL)
21495     {
21496       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21497       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21498       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21499     }
21500
21501   /* The run-time loop is made up of 10 insns in the generic case while the
21502      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21503   else if (size <= 5 * PROBE_INTERVAL)
21504     {
21505       HOST_WIDE_INT i, rem;
21506
21507       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21508       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21509       emit_stack_probe (reg1);
21510
21511       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21512          it exceeds SIZE.  If only two probes are needed, this will not
21513          generate any code.  Then probe at FIRST + SIZE.  */
21514       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21515         {
21516           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21517           emit_stack_probe (reg1);
21518         }
21519
21520       rem = size - (i - PROBE_INTERVAL);
21521       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21522         {
21523           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21524           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21525         }
21526       else
21527         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21528     }
21529
21530   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21531      extra careful with variables wrapping around because we might be at
21532      the very top (or the very bottom) of the address space and we have
21533      to be able to handle this case properly; in particular, we use an
21534      equality test for the loop condition.  */
21535   else
21536     {
21537       HOST_WIDE_INT rounded_size;
21538       struct scratch_reg sr;
21539
21540       get_scratch_register_on_entry (&sr, regno1, live_regs);
21541
21542       emit_move_insn (reg1, GEN_INT (first));
21543
21544
21545       /* Step 1: round SIZE to the previous multiple of the interval.  */
21546
21547       rounded_size = size & -PROBE_INTERVAL;
21548       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21549
21550
21551       /* Step 2: compute initial and final value of the loop counter.  */
21552
21553       /* TEST_ADDR = SP + FIRST.  */
21554       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21555
21556       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21557       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21558
21559
21560       /* Step 3: the loop
21561
21562          do
21563            {
21564              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21565              probe at TEST_ADDR
21566            }
21567          while (TEST_ADDR != LAST_ADDR)
21568
21569          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21570          until it is equal to ROUNDED_SIZE.  */
21571
21572       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21573
21574
21575       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21576          that SIZE is equal to ROUNDED_SIZE.  */
21577
21578       if (size != rounded_size)
21579         {
21580           HOST_WIDE_INT rem = size - rounded_size;
21581
21582           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21583             {
21584               emit_set_insn (sr.reg,
21585                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21586               emit_stack_probe (plus_constant (Pmode, sr.reg,
21587                                                PROBE_INTERVAL - rem));
21588             }
21589           else
21590             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21591         }
21592
21593       release_scratch_register_on_entry (&sr);
21594     }
21595
21596   /* Make sure nothing is scheduled before we are done.  */
21597   emit_insn (gen_blockage ());
21598 }
21599
21600 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21601    absolute addresses.  */
21602
21603 const char *
21604 output_probe_stack_range (rtx reg1, rtx reg2)
21605 {
21606   static int labelno = 0;
21607   char loop_lab[32];
21608   rtx xops[2];
21609
21610   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21611
21612   /* Loop.  */
21613   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21614
21615   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21616   xops[0] = reg1;
21617   xops[1] = GEN_INT (PROBE_INTERVAL);
21618   output_asm_insn ("sub\t%0, %0, %1", xops);
21619
21620   /* Probe at TEST_ADDR.  */
21621   output_asm_insn ("str\tr0, [%0, #0]", xops);
21622
21623   /* Test if TEST_ADDR == LAST_ADDR.  */
21624   xops[1] = reg2;
21625   output_asm_insn ("cmp\t%0, %1", xops);
21626
21627   /* Branch.  */
21628   fputs ("\tbne\t", asm_out_file);
21629   assemble_name_raw (asm_out_file, loop_lab);
21630   fputc ('\n', asm_out_file);
21631
21632   return "";
21633 }
21634
21635 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21636    function.  */
21637 void
21638 arm_expand_prologue (void)
21639 {
21640   rtx amount;
21641   rtx insn;
21642   rtx ip_rtx;
21643   unsigned long live_regs_mask;
21644   unsigned long func_type;
21645   int fp_offset = 0;
21646   int saved_pretend_args = 0;
21647   int saved_regs = 0;
21648   unsigned HOST_WIDE_INT args_to_push;
21649   HOST_WIDE_INT size;
21650   arm_stack_offsets *offsets;
21651   bool clobber_ip;
21652
21653   func_type = arm_current_func_type ();
21654
21655   /* Naked functions don't have prologues.  */
21656   if (IS_NAKED (func_type))
21657     {
21658       if (flag_stack_usage_info)
21659         current_function_static_stack_size = 0;
21660       return;
21661     }
21662
21663   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21664   args_to_push = crtl->args.pretend_args_size;
21665
21666   /* Compute which register we will have to save onto the stack.  */
21667   offsets = arm_get_frame_offsets ();
21668   live_regs_mask = offsets->saved_regs_mask;
21669
21670   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21671
21672   if (IS_STACKALIGN (func_type))
21673     {
21674       rtx r0, r1;
21675
21676       /* Handle a word-aligned stack pointer.  We generate the following:
21677
21678           mov r0, sp
21679           bic r1, r0, #7
21680           mov sp, r1
21681           <save and restore r0 in normal prologue/epilogue>
21682           mov sp, r0
21683           bx lr
21684
21685          The unwinder doesn't need to know about the stack realignment.
21686          Just tell it we saved SP in r0.  */
21687       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21688
21689       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21690       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21691
21692       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21693       RTX_FRAME_RELATED_P (insn) = 1;
21694       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21695
21696       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21697
21698       /* ??? The CFA changes here, which may cause GDB to conclude that it
21699          has entered a different function.  That said, the unwind info is
21700          correct, individually, before and after this instruction because
21701          we've described the save of SP, which will override the default
21702          handling of SP as restoring from the CFA.  */
21703       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21704     }
21705
21706   /* The static chain register is the same as the IP register.  If it is
21707      clobbered when creating the frame, we need to save and restore it.  */
21708   clobber_ip = IS_NESTED (func_type)
21709                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21710                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21711                         || flag_stack_clash_protection)
21712                        && !df_regs_ever_live_p (LR_REGNUM)
21713                        && arm_r3_live_at_start_p ()));
21714
21715   /* Find somewhere to store IP whilst the frame is being created.
21716      We try the following places in order:
21717
21718        1. The last argument register r3 if it is available.
21719        2. A slot on the stack above the frame if there are no
21720           arguments to push onto the stack.
21721        3. Register r3 again, after pushing the argument registers
21722           onto the stack, if this is a varargs function.
21723        4. The last slot on the stack created for the arguments to
21724           push, if this isn't a varargs function.
21725
21726      Note - we only need to tell the dwarf2 backend about the SP
21727      adjustment in the second variant; the static chain register
21728      doesn't need to be unwound, as it doesn't contain a value
21729      inherited from the caller.  */
21730   if (clobber_ip)
21731     {
21732       if (!arm_r3_live_at_start_p ())
21733         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21734       else if (args_to_push == 0)
21735         {
21736           rtx addr, dwarf;
21737
21738           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21739           saved_regs += 4;
21740
21741           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21742           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21743           fp_offset = 4;
21744
21745           /* Just tell the dwarf backend that we adjusted SP.  */
21746           dwarf = gen_rtx_SET (stack_pointer_rtx,
21747                                plus_constant (Pmode, stack_pointer_rtx,
21748                                               -fp_offset));
21749           RTX_FRAME_RELATED_P (insn) = 1;
21750           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21751         }
21752       else
21753         {
21754           /* Store the args on the stack.  */
21755           if (cfun->machine->uses_anonymous_args)
21756             {
21757               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21758                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21759               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21760               saved_pretend_args = 1;
21761             }
21762           else
21763             {
21764               rtx addr, dwarf;
21765
21766               if (args_to_push == 4)
21767                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21768               else
21769                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21770                                            plus_constant (Pmode,
21771                                                           stack_pointer_rtx,
21772                                                           -args_to_push));
21773
21774               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21775
21776               /* Just tell the dwarf backend that we adjusted SP.  */
21777               dwarf = gen_rtx_SET (stack_pointer_rtx,
21778                                    plus_constant (Pmode, stack_pointer_rtx,
21779                                                   -args_to_push));
21780               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21781             }
21782
21783           RTX_FRAME_RELATED_P (insn) = 1;
21784           fp_offset = args_to_push;
21785           args_to_push = 0;
21786         }
21787     }
21788
21789   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21790     {
21791       if (IS_INTERRUPT (func_type))
21792         {
21793           /* Interrupt functions must not corrupt any registers.
21794              Creating a frame pointer however, corrupts the IP
21795              register, so we must push it first.  */
21796           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21797
21798           /* Do not set RTX_FRAME_RELATED_P on this insn.
21799              The dwarf stack unwinding code only wants to see one
21800              stack decrement per function, and this is not it.  If
21801              this instruction is labeled as being part of the frame
21802              creation sequence then dwarf2out_frame_debug_expr will
21803              die when it encounters the assignment of IP to FP
21804              later on, since the use of SP here establishes SP as
21805              the CFA register and not IP.
21806
21807              Anyway this instruction is not really part of the stack
21808              frame creation although it is part of the prologue.  */
21809         }
21810
21811       insn = emit_set_insn (ip_rtx,
21812                             plus_constant (Pmode, stack_pointer_rtx,
21813                                            fp_offset));
21814       RTX_FRAME_RELATED_P (insn) = 1;
21815     }
21816
21817   if (args_to_push)
21818     {
21819       /* Push the argument registers, or reserve space for them.  */
21820       if (cfun->machine->uses_anonymous_args)
21821         insn = emit_multi_reg_push
21822           ((0xf0 >> (args_to_push / 4)) & 0xf,
21823            (0xf0 >> (args_to_push / 4)) & 0xf);
21824       else
21825         insn = emit_insn
21826           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21827                        GEN_INT (- args_to_push)));
21828       RTX_FRAME_RELATED_P (insn) = 1;
21829     }
21830
21831   /* If this is an interrupt service routine, and the link register
21832      is going to be pushed, and we're not generating extra
21833      push of IP (needed when frame is needed and frame layout if apcs),
21834      subtracting four from LR now will mean that the function return
21835      can be done with a single instruction.  */
21836   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21837       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21838       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21839       && TARGET_ARM)
21840     {
21841       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21842
21843       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21844     }
21845
21846   if (live_regs_mask)
21847     {
21848       unsigned long dwarf_regs_mask = live_regs_mask;
21849
21850       saved_regs += bit_count (live_regs_mask) * 4;
21851       if (optimize_size && !frame_pointer_needed
21852           && saved_regs == offsets->saved_regs - offsets->saved_args)
21853         {
21854           /* If no coprocessor registers are being pushed and we don't have
21855              to worry about a frame pointer then push extra registers to
21856              create the stack frame.  This is done in a way that does not
21857              alter the frame layout, so is independent of the epilogue.  */
21858           int n;
21859           int frame;
21860           n = 0;
21861           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21862             n++;
21863           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21864           if (frame && n * 4 >= frame)
21865             {
21866               n = frame / 4;
21867               live_regs_mask |= (1 << n) - 1;
21868               saved_regs += frame;
21869             }
21870         }
21871
21872       if (TARGET_LDRD
21873           && current_tune->prefer_ldrd_strd
21874           && !optimize_function_for_size_p (cfun))
21875         {
21876           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21877           if (TARGET_THUMB2)
21878             thumb2_emit_strd_push (live_regs_mask);
21879           else if (TARGET_ARM
21880                    && !TARGET_APCS_FRAME
21881                    && !IS_INTERRUPT (func_type))
21882             arm_emit_strd_push (live_regs_mask);
21883           else
21884             {
21885               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21886               RTX_FRAME_RELATED_P (insn) = 1;
21887             }
21888         }
21889       else
21890         {
21891           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21892           RTX_FRAME_RELATED_P (insn) = 1;
21893         }
21894     }
21895
21896   if (! IS_VOLATILE (func_type))
21897     saved_regs += arm_save_coproc_regs ();
21898
21899   if (frame_pointer_needed && TARGET_ARM)
21900     {
21901       /* Create the new frame pointer.  */
21902       if (TARGET_APCS_FRAME)
21903         {
21904           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21905           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21906           RTX_FRAME_RELATED_P (insn) = 1;
21907         }
21908       else
21909         {
21910           insn = GEN_INT (saved_regs - (4 + fp_offset));
21911           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21912                                         stack_pointer_rtx, insn));
21913           RTX_FRAME_RELATED_P (insn) = 1;
21914         }
21915     }
21916
21917   size = offsets->outgoing_args - offsets->saved_args;
21918   if (flag_stack_usage_info)
21919     current_function_static_stack_size = size;
21920
21921   /* If this isn't an interrupt service routine and we have a frame, then do
21922      stack checking.  We use IP as the first scratch register, except for the
21923      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21924   if (!IS_INTERRUPT (func_type)
21925       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21926           || flag_stack_clash_protection))
21927     {
21928       unsigned int regno;
21929
21930       if (!IS_NESTED (func_type) || clobber_ip)
21931         regno = IP_REGNUM;
21932       else if (df_regs_ever_live_p (LR_REGNUM))
21933         regno = LR_REGNUM;
21934       else
21935         regno = 3;
21936
21937       if (crtl->is_leaf && !cfun->calls_alloca)
21938         {
21939           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21940             arm_emit_probe_stack_range (get_stack_check_protect (),
21941                                         size - get_stack_check_protect (),
21942                                         regno, live_regs_mask);
21943         }
21944       else if (size > 0)
21945         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21946                                     regno, live_regs_mask);
21947     }
21948
21949   /* Recover the static chain register.  */
21950   if (clobber_ip)
21951     {
21952       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21953         insn = gen_rtx_REG (SImode, 3);
21954       else
21955         {
21956           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21957           insn = gen_frame_mem (SImode, insn);
21958         }
21959       emit_set_insn (ip_rtx, insn);
21960       emit_insn (gen_force_register_use (ip_rtx));
21961     }
21962
21963   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21964     {
21965       /* This add can produce multiple insns for a large constant, so we
21966          need to get tricky.  */
21967       rtx_insn *last = get_last_insn ();
21968
21969       amount = GEN_INT (offsets->saved_args + saved_regs
21970                         - offsets->outgoing_args);
21971
21972       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21973                                     amount));
21974       do
21975         {
21976           last = last ? NEXT_INSN (last) : get_insns ();
21977           RTX_FRAME_RELATED_P (last) = 1;
21978         }
21979       while (last != insn);
21980
21981       /* If the frame pointer is needed, emit a special barrier that
21982          will prevent the scheduler from moving stores to the frame
21983          before the stack adjustment.  */
21984       if (frame_pointer_needed)
21985         emit_insn (gen_stack_tie (stack_pointer_rtx,
21986                                   hard_frame_pointer_rtx));
21987     }
21988
21989
21990   if (frame_pointer_needed && TARGET_THUMB2)
21991     thumb_set_frame_pointer (offsets);
21992
21993   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21994     {
21995       unsigned long mask;
21996
21997       mask = live_regs_mask;
21998       mask &= THUMB2_WORK_REGS;
21999       if (!IS_NESTED (func_type))
22000         mask |= (1 << IP_REGNUM);
22001       arm_load_pic_register (mask);
22002     }
22003
22004   /* If we are profiling, make sure no instructions are scheduled before
22005      the call to mcount.  Similarly if the user has requested no
22006      scheduling in the prolog.  Similarly if we want non-call exceptions
22007      using the EABI unwinder, to prevent faulting instructions from being
22008      swapped with a stack adjustment.  */
22009   if (crtl->profile || !TARGET_SCHED_PROLOG
22010       || (arm_except_unwind_info (&global_options) == UI_TARGET
22011           && cfun->can_throw_non_call_exceptions))
22012     emit_insn (gen_blockage ());
22013
22014   /* If the link register is being kept alive, with the return address in it,
22015      then make sure that it does not get reused by the ce2 pass.  */
22016   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22017     cfun->machine->lr_save_eliminated = 1;
22018 }
22019 \f
22020 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22021 static void
22022 arm_print_condition (FILE *stream)
22023 {
22024   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22025     {
22026       /* Branch conversion is not implemented for Thumb-2.  */
22027       if (TARGET_THUMB)
22028         {
22029           output_operand_lossage ("predicated Thumb instruction");
22030           return;
22031         }
22032       if (current_insn_predicate != NULL)
22033         {
22034           output_operand_lossage
22035             ("predicated instruction in conditional sequence");
22036           return;
22037         }
22038
22039       fputs (arm_condition_codes[arm_current_cc], stream);
22040     }
22041   else if (current_insn_predicate)
22042     {
22043       enum arm_cond_code code;
22044
22045       if (TARGET_THUMB1)
22046         {
22047           output_operand_lossage ("predicated Thumb instruction");
22048           return;
22049         }
22050
22051       code = get_arm_condition_code (current_insn_predicate);
22052       fputs (arm_condition_codes[code], stream);
22053     }
22054 }
22055
22056
22057 /* Globally reserved letters: acln
22058    Puncutation letters currently used: @_|?().!#
22059    Lower case letters currently used: bcdefhimpqtvwxyz
22060    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22061    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22062
22063    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22064
22065    If CODE is 'd', then the X is a condition operand and the instruction
22066    should only be executed if the condition is true.
22067    if CODE is 'D', then the X is a condition operand and the instruction
22068    should only be executed if the condition is false: however, if the mode
22069    of the comparison is CCFPEmode, then always execute the instruction -- we
22070    do this because in these circumstances !GE does not necessarily imply LT;
22071    in these cases the instruction pattern will take care to make sure that
22072    an instruction containing %d will follow, thereby undoing the effects of
22073    doing this instruction unconditionally.
22074    If CODE is 'N' then X is a floating point operand that must be negated
22075    before output.
22076    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22077    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22078 static void
22079 arm_print_operand (FILE *stream, rtx x, int code)
22080 {
22081   switch (code)
22082     {
22083     case '@':
22084       fputs (ASM_COMMENT_START, stream);
22085       return;
22086
22087     case '_':
22088       fputs (user_label_prefix, stream);
22089       return;
22090
22091     case '|':
22092       fputs (REGISTER_PREFIX, stream);
22093       return;
22094
22095     case '?':
22096       arm_print_condition (stream);
22097       return;
22098
22099     case '.':
22100       /* The current condition code for a condition code setting instruction.
22101          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22102       fputc('s', stream);
22103       arm_print_condition (stream);
22104       return;
22105
22106     case '!':
22107       /* If the instruction is conditionally executed then print
22108          the current condition code, otherwise print 's'.  */
22109       gcc_assert (TARGET_THUMB2);
22110       if (current_insn_predicate)
22111         arm_print_condition (stream);
22112       else
22113         fputc('s', stream);
22114       break;
22115
22116     /* %# is a "break" sequence. It doesn't output anything, but is used to
22117        separate e.g. operand numbers from following text, if that text consists
22118        of further digits which we don't want to be part of the operand
22119        number.  */
22120     case '#':
22121       return;
22122
22123     case 'N':
22124       {
22125         REAL_VALUE_TYPE r;
22126         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22127         fprintf (stream, "%s", fp_const_from_val (&r));
22128       }
22129       return;
22130
22131     /* An integer or symbol address without a preceding # sign.  */
22132     case 'c':
22133       switch (GET_CODE (x))
22134         {
22135         case CONST_INT:
22136           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22137           break;
22138
22139         case SYMBOL_REF:
22140           output_addr_const (stream, x);
22141           break;
22142
22143         case CONST:
22144           if (GET_CODE (XEXP (x, 0)) == PLUS
22145               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22146             {
22147               output_addr_const (stream, x);
22148               break;
22149             }
22150           /* Fall through.  */
22151
22152         default:
22153           output_operand_lossage ("Unsupported operand for code '%c'", code);
22154         }
22155       return;
22156
22157     /* An integer that we want to print in HEX.  */
22158     case 'x':
22159       switch (GET_CODE (x))
22160         {
22161         case CONST_INT:
22162           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22163           break;
22164
22165         default:
22166           output_operand_lossage ("Unsupported operand for code '%c'", code);
22167         }
22168       return;
22169
22170     case 'B':
22171       if (CONST_INT_P (x))
22172         {
22173           HOST_WIDE_INT val;
22174           val = ARM_SIGN_EXTEND (~INTVAL (x));
22175           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22176         }
22177       else
22178         {
22179           putc ('~', stream);
22180           output_addr_const (stream, x);
22181         }
22182       return;
22183
22184     case 'b':
22185       /* Print the log2 of a CONST_INT.  */
22186       {
22187         HOST_WIDE_INT val;
22188
22189         if (!CONST_INT_P (x)
22190             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22191           output_operand_lossage ("Unsupported operand for code '%c'", code);
22192         else
22193           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22194       }
22195       return;
22196
22197     case 'L':
22198       /* The low 16 bits of an immediate constant.  */
22199       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22200       return;
22201
22202     case 'i':
22203       fprintf (stream, "%s", arithmetic_instr (x, 1));
22204       return;
22205
22206     case 'I':
22207       fprintf (stream, "%s", arithmetic_instr (x, 0));
22208       return;
22209
22210     case 'S':
22211       {
22212         HOST_WIDE_INT val;
22213         const char *shift;
22214
22215         shift = shift_op (x, &val);
22216
22217         if (shift)
22218           {
22219             fprintf (stream, ", %s ", shift);
22220             if (val == -1)
22221               arm_print_operand (stream, XEXP (x, 1), 0);
22222             else
22223               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22224           }
22225       }
22226       return;
22227
22228       /* An explanation of the 'Q', 'R' and 'H' register operands:
22229
22230          In a pair of registers containing a DI or DF value the 'Q'
22231          operand returns the register number of the register containing
22232          the least significant part of the value.  The 'R' operand returns
22233          the register number of the register containing the most
22234          significant part of the value.
22235
22236          The 'H' operand returns the higher of the two register numbers.
22237          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22238          same as the 'Q' operand, since the most significant part of the
22239          value is held in the lower number register.  The reverse is true
22240          on systems where WORDS_BIG_ENDIAN is false.
22241
22242          The purpose of these operands is to distinguish between cases
22243          where the endian-ness of the values is important (for example
22244          when they are added together), and cases where the endian-ness
22245          is irrelevant, but the order of register operations is important.
22246          For example when loading a value from memory into a register
22247          pair, the endian-ness does not matter.  Provided that the value
22248          from the lower memory address is put into the lower numbered
22249          register, and the value from the higher address is put into the
22250          higher numbered register, the load will work regardless of whether
22251          the value being loaded is big-wordian or little-wordian.  The
22252          order of the two register loads can matter however, if the address
22253          of the memory location is actually held in one of the registers
22254          being overwritten by the load.
22255
22256          The 'Q' and 'R' constraints are also available for 64-bit
22257          constants.  */
22258     case 'Q':
22259       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22260         {
22261           rtx part = gen_lowpart (SImode, x);
22262           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22263           return;
22264         }
22265
22266       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22267         {
22268           output_operand_lossage ("invalid operand for code '%c'", code);
22269           return;
22270         }
22271
22272       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22273       return;
22274
22275     case 'R':
22276       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22277         {
22278           machine_mode mode = GET_MODE (x);
22279           rtx part;
22280
22281           if (mode == VOIDmode)
22282             mode = DImode;
22283           part = gen_highpart_mode (SImode, mode, x);
22284           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22285           return;
22286         }
22287
22288       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22289         {
22290           output_operand_lossage ("invalid operand for code '%c'", code);
22291           return;
22292         }
22293
22294       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22295       return;
22296
22297     case 'H':
22298       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22299         {
22300           output_operand_lossage ("invalid operand for code '%c'", code);
22301           return;
22302         }
22303
22304       asm_fprintf (stream, "%r", REGNO (x) + 1);
22305       return;
22306
22307     case 'J':
22308       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22309         {
22310           output_operand_lossage ("invalid operand for code '%c'", code);
22311           return;
22312         }
22313
22314       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22315       return;
22316
22317     case 'K':
22318       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22319         {
22320           output_operand_lossage ("invalid operand for code '%c'", code);
22321           return;
22322         }
22323
22324       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22325       return;
22326
22327     case 'm':
22328       asm_fprintf (stream, "%r",
22329                    REG_P (XEXP (x, 0))
22330                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22331       return;
22332
22333     case 'M':
22334       asm_fprintf (stream, "{%r-%r}",
22335                    REGNO (x),
22336                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22337       return;
22338
22339     /* Like 'M', but writing doubleword vector registers, for use by Neon
22340        insns.  */
22341     case 'h':
22342       {
22343         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22344         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22345         if (numregs == 1)
22346           asm_fprintf (stream, "{d%d}", regno);
22347         else
22348           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22349       }
22350       return;
22351
22352     case 'd':
22353       /* CONST_TRUE_RTX means always -- that's the default.  */
22354       if (x == const_true_rtx)
22355         return;
22356
22357       if (!COMPARISON_P (x))
22358         {
22359           output_operand_lossage ("invalid operand for code '%c'", code);
22360           return;
22361         }
22362
22363       fputs (arm_condition_codes[get_arm_condition_code (x)],
22364              stream);
22365       return;
22366
22367     case 'D':
22368       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22369          want to do that.  */
22370       if (x == const_true_rtx)
22371         {
22372           output_operand_lossage ("instruction never executed");
22373           return;
22374         }
22375       if (!COMPARISON_P (x))
22376         {
22377           output_operand_lossage ("invalid operand for code '%c'", code);
22378           return;
22379         }
22380
22381       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22382                                  (get_arm_condition_code (x))],
22383              stream);
22384       return;
22385
22386     case 's':
22387     case 'V':
22388     case 'W':
22389     case 'X':
22390     case 'Y':
22391     case 'Z':
22392       /* Former Maverick support, removed after GCC-4.7.  */
22393       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22394       return;
22395
22396     case 'U':
22397       if (!REG_P (x)
22398           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22399           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22400         /* Bad value for wCG register number.  */
22401         {
22402           output_operand_lossage ("invalid operand for code '%c'", code);
22403           return;
22404         }
22405
22406       else
22407         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22408       return;
22409
22410       /* Print an iWMMXt control register name.  */
22411     case 'w':
22412       if (!CONST_INT_P (x)
22413           || INTVAL (x) < 0
22414           || INTVAL (x) >= 16)
22415         /* Bad value for wC register number.  */
22416         {
22417           output_operand_lossage ("invalid operand for code '%c'", code);
22418           return;
22419         }
22420
22421       else
22422         {
22423           static const char * wc_reg_names [16] =
22424             {
22425               "wCID",  "wCon",  "wCSSF", "wCASF",
22426               "wC4",   "wC5",   "wC6",   "wC7",
22427               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22428               "wC12",  "wC13",  "wC14",  "wC15"
22429             };
22430
22431           fputs (wc_reg_names [INTVAL (x)], stream);
22432         }
22433       return;
22434
22435     /* Print the high single-precision register of a VFP double-precision
22436        register.  */
22437     case 'p':
22438       {
22439         machine_mode mode = GET_MODE (x);
22440         int regno;
22441
22442         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22443           {
22444             output_operand_lossage ("invalid operand for code '%c'", code);
22445             return;
22446           }
22447
22448         regno = REGNO (x);
22449         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22450           {
22451             output_operand_lossage ("invalid operand for code '%c'", code);
22452             return;
22453           }
22454
22455         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22456       }
22457       return;
22458
22459     /* Print a VFP/Neon double precision or quad precision register name.  */
22460     case 'P':
22461     case 'q':
22462       {
22463         machine_mode mode = GET_MODE (x);
22464         int is_quad = (code == 'q');
22465         int regno;
22466
22467         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22468           {
22469             output_operand_lossage ("invalid operand for code '%c'", code);
22470             return;
22471           }
22472
22473         if (!REG_P (x)
22474             || !IS_VFP_REGNUM (REGNO (x)))
22475           {
22476             output_operand_lossage ("invalid operand for code '%c'", code);
22477             return;
22478           }
22479
22480         regno = REGNO (x);
22481         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22482             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22483           {
22484             output_operand_lossage ("invalid operand for code '%c'", code);
22485             return;
22486           }
22487
22488         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22489           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22490       }
22491       return;
22492
22493     /* These two codes print the low/high doubleword register of a Neon quad
22494        register, respectively.  For pair-structure types, can also print
22495        low/high quadword registers.  */
22496     case 'e':
22497     case 'f':
22498       {
22499         machine_mode mode = GET_MODE (x);
22500         int regno;
22501
22502         if ((GET_MODE_SIZE (mode) != 16
22503              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22504           {
22505             output_operand_lossage ("invalid operand for code '%c'", code);
22506             return;
22507           }
22508
22509         regno = REGNO (x);
22510         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22511           {
22512             output_operand_lossage ("invalid operand for code '%c'", code);
22513             return;
22514           }
22515
22516         if (GET_MODE_SIZE (mode) == 16)
22517           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22518                                   + (code == 'f' ? 1 : 0));
22519         else
22520           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22521                                   + (code == 'f' ? 1 : 0));
22522       }
22523       return;
22524
22525     /* Print a VFPv3 floating-point constant, represented as an integer
22526        index.  */
22527     case 'G':
22528       {
22529         int index = vfp3_const_double_index (x);
22530         gcc_assert (index != -1);
22531         fprintf (stream, "%d", index);
22532       }
22533       return;
22534
22535     /* Print bits representing opcode features for Neon.
22536
22537        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22538        and polynomials as unsigned.
22539
22540        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22541
22542        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22543
22544     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22545     case 'T':
22546       {
22547         HOST_WIDE_INT bits = INTVAL (x);
22548         fputc ("uspf"[bits & 3], stream);
22549       }
22550       return;
22551
22552     /* Likewise, but signed and unsigned integers are both 'i'.  */
22553     case 'F':
22554       {
22555         HOST_WIDE_INT bits = INTVAL (x);
22556         fputc ("iipf"[bits & 3], stream);
22557       }
22558       return;
22559
22560     /* As for 'T', but emit 'u' instead of 'p'.  */
22561     case 't':
22562       {
22563         HOST_WIDE_INT bits = INTVAL (x);
22564         fputc ("usuf"[bits & 3], stream);
22565       }
22566       return;
22567
22568     /* Bit 2: rounding (vs none).  */
22569     case 'O':
22570       {
22571         HOST_WIDE_INT bits = INTVAL (x);
22572         fputs ((bits & 4) != 0 ? "r" : "", stream);
22573       }
22574       return;
22575
22576     /* Memory operand for vld1/vst1 instruction.  */
22577     case 'A':
22578       {
22579         rtx addr;
22580         bool postinc = FALSE;
22581         rtx postinc_reg = NULL;
22582         unsigned align, memsize, align_bits;
22583
22584         gcc_assert (MEM_P (x));
22585         addr = XEXP (x, 0);
22586         if (GET_CODE (addr) == POST_INC)
22587           {
22588             postinc = 1;
22589             addr = XEXP (addr, 0);
22590           }
22591         if (GET_CODE (addr) == POST_MODIFY)
22592           {
22593             postinc_reg = XEXP( XEXP (addr, 1), 1);
22594             addr = XEXP (addr, 0);
22595           }
22596         asm_fprintf (stream, "[%r", REGNO (addr));
22597
22598         /* We know the alignment of this access, so we can emit a hint in the
22599            instruction (for some alignments) as an aid to the memory subsystem
22600            of the target.  */
22601         align = MEM_ALIGN (x) >> 3;
22602         memsize = MEM_SIZE (x);
22603
22604         /* Only certain alignment specifiers are supported by the hardware.  */
22605         if (memsize == 32 && (align % 32) == 0)
22606           align_bits = 256;
22607         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22608           align_bits = 128;
22609         else if (memsize >= 8 && (align % 8) == 0)
22610           align_bits = 64;
22611         else
22612           align_bits = 0;
22613
22614         if (align_bits != 0)
22615           asm_fprintf (stream, ":%d", align_bits);
22616
22617         asm_fprintf (stream, "]");
22618
22619         if (postinc)
22620           fputs("!", stream);
22621         if (postinc_reg)
22622           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22623       }
22624       return;
22625
22626     case 'C':
22627       {
22628         rtx addr;
22629
22630         gcc_assert (MEM_P (x));
22631         addr = XEXP (x, 0);
22632         gcc_assert (REG_P (addr));
22633         asm_fprintf (stream, "[%r]", REGNO (addr));
22634       }
22635       return;
22636
22637     /* Translate an S register number into a D register number and element index.  */
22638     case 'y':
22639       {
22640         machine_mode mode = GET_MODE (x);
22641         int regno;
22642
22643         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22644           {
22645             output_operand_lossage ("invalid operand for code '%c'", code);
22646             return;
22647           }
22648
22649         regno = REGNO (x);
22650         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22651           {
22652             output_operand_lossage ("invalid operand for code '%c'", code);
22653             return;
22654           }
22655
22656         regno = regno - FIRST_VFP_REGNUM;
22657         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22658       }
22659       return;
22660
22661     case 'v':
22662         gcc_assert (CONST_DOUBLE_P (x));
22663         int result;
22664         result = vfp3_const_double_for_fract_bits (x);
22665         if (result == 0)
22666           result = vfp3_const_double_for_bits (x);
22667         fprintf (stream, "#%d", result);
22668         return;
22669
22670     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22671        number into a D register number and element index.  */
22672     case 'z':
22673       {
22674         machine_mode mode = GET_MODE (x);
22675         int regno;
22676
22677         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22678           {
22679             output_operand_lossage ("invalid operand for code '%c'", code);
22680             return;
22681           }
22682
22683         regno = REGNO (x);
22684         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22685           {
22686             output_operand_lossage ("invalid operand for code '%c'", code);
22687             return;
22688           }
22689
22690         regno = regno - FIRST_VFP_REGNUM;
22691         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22692       }
22693       return;
22694
22695     default:
22696       if (x == 0)
22697         {
22698           output_operand_lossage ("missing operand");
22699           return;
22700         }
22701
22702       switch (GET_CODE (x))
22703         {
22704         case REG:
22705           asm_fprintf (stream, "%r", REGNO (x));
22706           break;
22707
22708         case MEM:
22709           output_address (GET_MODE (x), XEXP (x, 0));
22710           break;
22711
22712         case CONST_DOUBLE:
22713           {
22714             char fpstr[20];
22715             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22716                               sizeof (fpstr), 0, 1);
22717             fprintf (stream, "#%s", fpstr);
22718           }
22719           break;
22720
22721         default:
22722           gcc_assert (GET_CODE (x) != NEG);
22723           fputc ('#', stream);
22724           if (GET_CODE (x) == HIGH)
22725             {
22726               fputs (":lower16:", stream);
22727               x = XEXP (x, 0);
22728             }
22729
22730           output_addr_const (stream, x);
22731           break;
22732         }
22733     }
22734 }
22735 \f
22736 /* Target hook for printing a memory address.  */
22737 static void
22738 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22739 {
22740   if (TARGET_32BIT)
22741     {
22742       int is_minus = GET_CODE (x) == MINUS;
22743
22744       if (REG_P (x))
22745         asm_fprintf (stream, "[%r]", REGNO (x));
22746       else if (GET_CODE (x) == PLUS || is_minus)
22747         {
22748           rtx base = XEXP (x, 0);
22749           rtx index = XEXP (x, 1);
22750           HOST_WIDE_INT offset = 0;
22751           if (!REG_P (base)
22752               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22753             {
22754               /* Ensure that BASE is a register.  */
22755               /* (one of them must be).  */
22756               /* Also ensure the SP is not used as in index register.  */
22757               std::swap (base, index);
22758             }
22759           switch (GET_CODE (index))
22760             {
22761             case CONST_INT:
22762               offset = INTVAL (index);
22763               if (is_minus)
22764                 offset = -offset;
22765               asm_fprintf (stream, "[%r, #%wd]",
22766                            REGNO (base), offset);
22767               break;
22768
22769             case REG:
22770               asm_fprintf (stream, "[%r, %s%r]",
22771                            REGNO (base), is_minus ? "-" : "",
22772                            REGNO (index));
22773               break;
22774
22775             case MULT:
22776             case ASHIFTRT:
22777             case LSHIFTRT:
22778             case ASHIFT:
22779             case ROTATERT:
22780               {
22781                 asm_fprintf (stream, "[%r, %s%r",
22782                              REGNO (base), is_minus ? "-" : "",
22783                              REGNO (XEXP (index, 0)));
22784                 arm_print_operand (stream, index, 'S');
22785                 fputs ("]", stream);
22786                 break;
22787               }
22788
22789             default:
22790               gcc_unreachable ();
22791             }
22792         }
22793       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22794                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22795         {
22796           gcc_assert (REG_P (XEXP (x, 0)));
22797
22798           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22799             asm_fprintf (stream, "[%r, #%s%d]!",
22800                          REGNO (XEXP (x, 0)),
22801                          GET_CODE (x) == PRE_DEC ? "-" : "",
22802                          GET_MODE_SIZE (mode));
22803           else
22804             asm_fprintf (stream, "[%r], #%s%d",
22805                          REGNO (XEXP (x, 0)),
22806                          GET_CODE (x) == POST_DEC ? "-" : "",
22807                          GET_MODE_SIZE (mode));
22808         }
22809       else if (GET_CODE (x) == PRE_MODIFY)
22810         {
22811           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22812           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22813             asm_fprintf (stream, "#%wd]!",
22814                          INTVAL (XEXP (XEXP (x, 1), 1)));
22815           else
22816             asm_fprintf (stream, "%r]!",
22817                          REGNO (XEXP (XEXP (x, 1), 1)));
22818         }
22819       else if (GET_CODE (x) == POST_MODIFY)
22820         {
22821           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22822           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22823             asm_fprintf (stream, "#%wd",
22824                          INTVAL (XEXP (XEXP (x, 1), 1)));
22825           else
22826             asm_fprintf (stream, "%r",
22827                          REGNO (XEXP (XEXP (x, 1), 1)));
22828         }
22829       else output_addr_const (stream, x);
22830     }
22831   else
22832     {
22833       if (REG_P (x))
22834         asm_fprintf (stream, "[%r]", REGNO (x));
22835       else if (GET_CODE (x) == POST_INC)
22836         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22837       else if (GET_CODE (x) == PLUS)
22838         {
22839           gcc_assert (REG_P (XEXP (x, 0)));
22840           if (CONST_INT_P (XEXP (x, 1)))
22841             asm_fprintf (stream, "[%r, #%wd]",
22842                          REGNO (XEXP (x, 0)),
22843                          INTVAL (XEXP (x, 1)));
22844           else
22845             asm_fprintf (stream, "[%r, %r]",
22846                          REGNO (XEXP (x, 0)),
22847                          REGNO (XEXP (x, 1)));
22848         }
22849       else
22850         output_addr_const (stream, x);
22851     }
22852 }
22853 \f
22854 /* Target hook for indicating whether a punctuation character for
22855    TARGET_PRINT_OPERAND is valid.  */
22856 static bool
22857 arm_print_operand_punct_valid_p (unsigned char code)
22858 {
22859   return (code == '@' || code == '|' || code == '.'
22860           || code == '(' || code == ')' || code == '#'
22861           || (TARGET_32BIT && (code == '?'))
22862           || (TARGET_THUMB2 && (code == '!'))
22863           || (TARGET_THUMB && (code == '_')));
22864 }
22865 \f
22866 /* Target hook for assembling integer objects.  The ARM version needs to
22867    handle word-sized values specially.  */
22868 static bool
22869 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22870 {
22871   machine_mode mode;
22872
22873   if (size == UNITS_PER_WORD && aligned_p)
22874     {
22875       fputs ("\t.word\t", asm_out_file);
22876       output_addr_const (asm_out_file, x);
22877
22878       /* Mark symbols as position independent.  We only do this in the
22879          .text segment, not in the .data segment.  */
22880       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22881           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22882         {
22883           /* See legitimize_pic_address for an explanation of the
22884              TARGET_VXWORKS_RTP check.  */
22885           /* References to weak symbols cannot be resolved locally:
22886              they may be overridden by a non-weak definition at link
22887              time.  */
22888           if (!arm_pic_data_is_text_relative
22889               || (GET_CODE (x) == SYMBOL_REF
22890                   && (!SYMBOL_REF_LOCAL_P (x)
22891                       || (SYMBOL_REF_DECL (x)
22892                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22893             fputs ("(GOT)", asm_out_file);
22894           else
22895             fputs ("(GOTOFF)", asm_out_file);
22896         }
22897       fputc ('\n', asm_out_file);
22898       return true;
22899     }
22900
22901   mode = GET_MODE (x);
22902
22903   if (arm_vector_mode_supported_p (mode))
22904     {
22905       int i, units;
22906
22907       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22908
22909       units = CONST_VECTOR_NUNITS (x);
22910       size = GET_MODE_UNIT_SIZE (mode);
22911
22912       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22913         for (i = 0; i < units; i++)
22914           {
22915             rtx elt = CONST_VECTOR_ELT (x, i);
22916             assemble_integer
22917               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22918           }
22919       else
22920         for (i = 0; i < units; i++)
22921           {
22922             rtx elt = CONST_VECTOR_ELT (x, i);
22923             assemble_real
22924               (*CONST_DOUBLE_REAL_VALUE (elt),
22925                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22926                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22927           }
22928
22929       return true;
22930     }
22931
22932   return default_assemble_integer (x, size, aligned_p);
22933 }
22934
22935 static void
22936 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22937 {
22938   section *s;
22939
22940   if (!TARGET_AAPCS_BASED)
22941     {
22942       (is_ctor ?
22943        default_named_section_asm_out_constructor
22944        : default_named_section_asm_out_destructor) (symbol, priority);
22945       return;
22946     }
22947
22948   /* Put these in the .init_array section, using a special relocation.  */
22949   if (priority != DEFAULT_INIT_PRIORITY)
22950     {
22951       char buf[18];
22952       sprintf (buf, "%s.%.5u",
22953                is_ctor ? ".init_array" : ".fini_array",
22954                priority);
22955       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22956     }
22957   else if (is_ctor)
22958     s = ctors_section;
22959   else
22960     s = dtors_section;
22961
22962   switch_to_section (s);
22963   assemble_align (POINTER_SIZE);
22964   fputs ("\t.word\t", asm_out_file);
22965   output_addr_const (asm_out_file, symbol);
22966   fputs ("(target1)\n", asm_out_file);
22967 }
22968
22969 /* Add a function to the list of static constructors.  */
22970
22971 static void
22972 arm_elf_asm_constructor (rtx symbol, int priority)
22973 {
22974   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22975 }
22976
22977 /* Add a function to the list of static destructors.  */
22978
22979 static void
22980 arm_elf_asm_destructor (rtx symbol, int priority)
22981 {
22982   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22983 }
22984 \f
22985 /* A finite state machine takes care of noticing whether or not instructions
22986    can be conditionally executed, and thus decrease execution time and code
22987    size by deleting branch instructions.  The fsm is controlled by
22988    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22989
22990 /* The state of the fsm controlling condition codes are:
22991    0: normal, do nothing special
22992    1: make ASM_OUTPUT_OPCODE not output this instruction
22993    2: make ASM_OUTPUT_OPCODE not output this instruction
22994    3: make instructions conditional
22995    4: make instructions conditional
22996
22997    State transitions (state->state by whom under condition):
22998    0 -> 1 final_prescan_insn if the `target' is a label
22999    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23000    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23001    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23002    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23003           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23004    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23005           (the target insn is arm_target_insn).
23006
23007    If the jump clobbers the conditions then we use states 2 and 4.
23008
23009    A similar thing can be done with conditional return insns.
23010
23011    XXX In case the `target' is an unconditional branch, this conditionalising
23012    of the instructions always reduces code size, but not always execution
23013    time.  But then, I want to reduce the code size to somewhere near what
23014    /bin/cc produces.  */
23015
23016 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23017    instructions.  When a COND_EXEC instruction is seen the subsequent
23018    instructions are scanned so that multiple conditional instructions can be
23019    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23020    specify the length and true/false mask for the IT block.  These will be
23021    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23022
23023 /* Returns the index of the ARM condition code string in
23024    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23025    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23026
23027 enum arm_cond_code
23028 maybe_get_arm_condition_code (rtx comparison)
23029 {
23030   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23031   enum arm_cond_code code;
23032   enum rtx_code comp_code = GET_CODE (comparison);
23033
23034   if (GET_MODE_CLASS (mode) != MODE_CC)
23035     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23036                            XEXP (comparison, 1));
23037
23038   switch (mode)
23039     {
23040     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23041     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23042     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23043     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23044     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23045     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23046     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23047     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23048     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23049     case E_CC_DLTUmode: code = ARM_CC;
23050
23051     dominance:
23052       if (comp_code == EQ)
23053         return ARM_INVERSE_CONDITION_CODE (code);
23054       if (comp_code == NE)
23055         return code;
23056       return ARM_NV;
23057
23058     case E_CC_NOOVmode:
23059       switch (comp_code)
23060         {
23061         case NE: return ARM_NE;
23062         case EQ: return ARM_EQ;
23063         case GE: return ARM_PL;
23064         case LT: return ARM_MI;
23065         default: return ARM_NV;
23066         }
23067
23068     case E_CC_Zmode:
23069       switch (comp_code)
23070         {
23071         case NE: return ARM_NE;
23072         case EQ: return ARM_EQ;
23073         default: return ARM_NV;
23074         }
23075
23076     case E_CC_Nmode:
23077       switch (comp_code)
23078         {
23079         case NE: return ARM_MI;
23080         case EQ: return ARM_PL;
23081         default: return ARM_NV;
23082         }
23083
23084     case E_CCFPEmode:
23085     case E_CCFPmode:
23086       /* We can handle all cases except UNEQ and LTGT.  */
23087       switch (comp_code)
23088         {
23089         case GE: return ARM_GE;
23090         case GT: return ARM_GT;
23091         case LE: return ARM_LS;
23092         case LT: return ARM_MI;
23093         case NE: return ARM_NE;
23094         case EQ: return ARM_EQ;
23095         case ORDERED: return ARM_VC;
23096         case UNORDERED: return ARM_VS;
23097         case UNLT: return ARM_LT;
23098         case UNLE: return ARM_LE;
23099         case UNGT: return ARM_HI;
23100         case UNGE: return ARM_PL;
23101           /* UNEQ and LTGT do not have a representation.  */
23102         case UNEQ: /* Fall through.  */
23103         case LTGT: /* Fall through.  */
23104         default: return ARM_NV;
23105         }
23106
23107     case E_CC_SWPmode:
23108       switch (comp_code)
23109         {
23110         case NE: return ARM_NE;
23111         case EQ: return ARM_EQ;
23112         case GE: return ARM_LE;
23113         case GT: return ARM_LT;
23114         case LE: return ARM_GE;
23115         case LT: return ARM_GT;
23116         case GEU: return ARM_LS;
23117         case GTU: return ARM_CC;
23118         case LEU: return ARM_CS;
23119         case LTU: return ARM_HI;
23120         default: return ARM_NV;
23121         }
23122
23123     case E_CC_Cmode:
23124       switch (comp_code)
23125         {
23126         case LTU: return ARM_CS;
23127         case GEU: return ARM_CC;
23128         case NE: return ARM_CS;
23129         case EQ: return ARM_CC;
23130         default: return ARM_NV;
23131         }
23132
23133     case E_CC_CZmode:
23134       switch (comp_code)
23135         {
23136         case NE: return ARM_NE;
23137         case EQ: return ARM_EQ;
23138         case GEU: return ARM_CS;
23139         case GTU: return ARM_HI;
23140         case LEU: return ARM_LS;
23141         case LTU: return ARM_CC;
23142         default: return ARM_NV;
23143         }
23144
23145     case E_CC_NCVmode:
23146       switch (comp_code)
23147         {
23148         case GE: return ARM_GE;
23149         case LT: return ARM_LT;
23150         case GEU: return ARM_CS;
23151         case LTU: return ARM_CC;
23152         default: return ARM_NV;
23153         }
23154
23155     case E_CC_Vmode:
23156       switch (comp_code)
23157         {
23158         case NE: return ARM_VS;
23159         case EQ: return ARM_VC;
23160         default: return ARM_NV;
23161         }
23162
23163     case E_CCmode:
23164       switch (comp_code)
23165         {
23166         case NE: return ARM_NE;
23167         case EQ: return ARM_EQ;
23168         case GE: return ARM_GE;
23169         case GT: return ARM_GT;
23170         case LE: return ARM_LE;
23171         case LT: return ARM_LT;
23172         case GEU: return ARM_CS;
23173         case GTU: return ARM_HI;
23174         case LEU: return ARM_LS;
23175         case LTU: return ARM_CC;
23176         default: return ARM_NV;
23177         }
23178
23179     default: gcc_unreachable ();
23180     }
23181 }
23182
23183 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23184 static enum arm_cond_code
23185 get_arm_condition_code (rtx comparison)
23186 {
23187   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23188   gcc_assert (code != ARM_NV);
23189   return code;
23190 }
23191
23192 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23193    code registers when not targetting Thumb1.  The VFP condition register
23194    only exists when generating hard-float code.  */
23195 static bool
23196 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23197 {
23198   if (!TARGET_32BIT)
23199     return false;
23200
23201   *p1 = CC_REGNUM;
23202   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23203   return true;
23204 }
23205
23206 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23207    instructions.  */
23208 void
23209 thumb2_final_prescan_insn (rtx_insn *insn)
23210 {
23211   rtx_insn *first_insn = insn;
23212   rtx body = PATTERN (insn);
23213   rtx predicate;
23214   enum arm_cond_code code;
23215   int n;
23216   int mask;
23217   int max;
23218
23219   /* max_insns_skipped in the tune was already taken into account in the
23220      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23221      just emit the IT blocks as we can.  It does not make sense to split
23222      the IT blocks.  */
23223   max = MAX_INSN_PER_IT_BLOCK;
23224
23225   /* Remove the previous insn from the count of insns to be output.  */
23226   if (arm_condexec_count)
23227       arm_condexec_count--;
23228
23229   /* Nothing to do if we are already inside a conditional block.  */
23230   if (arm_condexec_count)
23231     return;
23232
23233   if (GET_CODE (body) != COND_EXEC)
23234     return;
23235
23236   /* Conditional jumps are implemented directly.  */
23237   if (JUMP_P (insn))
23238     return;
23239
23240   predicate = COND_EXEC_TEST (body);
23241   arm_current_cc = get_arm_condition_code (predicate);
23242
23243   n = get_attr_ce_count (insn);
23244   arm_condexec_count = 1;
23245   arm_condexec_mask = (1 << n) - 1;
23246   arm_condexec_masklen = n;
23247   /* See if subsequent instructions can be combined into the same block.  */
23248   for (;;)
23249     {
23250       insn = next_nonnote_insn (insn);
23251
23252       /* Jumping into the middle of an IT block is illegal, so a label or
23253          barrier terminates the block.  */
23254       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23255         break;
23256
23257       body = PATTERN (insn);
23258       /* USE and CLOBBER aren't really insns, so just skip them.  */
23259       if (GET_CODE (body) == USE
23260           || GET_CODE (body) == CLOBBER)
23261         continue;
23262
23263       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23264       if (GET_CODE (body) != COND_EXEC)
23265         break;
23266       /* Maximum number of conditionally executed instructions in a block.  */
23267       n = get_attr_ce_count (insn);
23268       if (arm_condexec_masklen + n > max)
23269         break;
23270
23271       predicate = COND_EXEC_TEST (body);
23272       code = get_arm_condition_code (predicate);
23273       mask = (1 << n) - 1;
23274       if (arm_current_cc == code)
23275         arm_condexec_mask |= (mask << arm_condexec_masklen);
23276       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23277         break;
23278
23279       arm_condexec_count++;
23280       arm_condexec_masklen += n;
23281
23282       /* A jump must be the last instruction in a conditional block.  */
23283       if (JUMP_P (insn))
23284         break;
23285     }
23286   /* Restore recog_data (getting the attributes of other insns can
23287      destroy this array, but final.c assumes that it remains intact
23288      across this call).  */
23289   extract_constrain_insn_cached (first_insn);
23290 }
23291
23292 void
23293 arm_final_prescan_insn (rtx_insn *insn)
23294 {
23295   /* BODY will hold the body of INSN.  */
23296   rtx body = PATTERN (insn);
23297
23298   /* This will be 1 if trying to repeat the trick, and things need to be
23299      reversed if it appears to fail.  */
23300   int reverse = 0;
23301
23302   /* If we start with a return insn, we only succeed if we find another one.  */
23303   int seeking_return = 0;
23304   enum rtx_code return_code = UNKNOWN;
23305
23306   /* START_INSN will hold the insn from where we start looking.  This is the
23307      first insn after the following code_label if REVERSE is true.  */
23308   rtx_insn *start_insn = insn;
23309
23310   /* If in state 4, check if the target branch is reached, in order to
23311      change back to state 0.  */
23312   if (arm_ccfsm_state == 4)
23313     {
23314       if (insn == arm_target_insn)
23315         {
23316           arm_target_insn = NULL;
23317           arm_ccfsm_state = 0;
23318         }
23319       return;
23320     }
23321
23322   /* If in state 3, it is possible to repeat the trick, if this insn is an
23323      unconditional branch to a label, and immediately following this branch
23324      is the previous target label which is only used once, and the label this
23325      branch jumps to is not too far off.  */
23326   if (arm_ccfsm_state == 3)
23327     {
23328       if (simplejump_p (insn))
23329         {
23330           start_insn = next_nonnote_insn (start_insn);
23331           if (BARRIER_P (start_insn))
23332             {
23333               /* XXX Isn't this always a barrier?  */
23334               start_insn = next_nonnote_insn (start_insn);
23335             }
23336           if (LABEL_P (start_insn)
23337               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23338               && LABEL_NUSES (start_insn) == 1)
23339             reverse = TRUE;
23340           else
23341             return;
23342         }
23343       else if (ANY_RETURN_P (body))
23344         {
23345           start_insn = next_nonnote_insn (start_insn);
23346           if (BARRIER_P (start_insn))
23347             start_insn = next_nonnote_insn (start_insn);
23348           if (LABEL_P (start_insn)
23349               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23350               && LABEL_NUSES (start_insn) == 1)
23351             {
23352               reverse = TRUE;
23353               seeking_return = 1;
23354               return_code = GET_CODE (body);
23355             }
23356           else
23357             return;
23358         }
23359       else
23360         return;
23361     }
23362
23363   gcc_assert (!arm_ccfsm_state || reverse);
23364   if (!JUMP_P (insn))
23365     return;
23366
23367   /* This jump might be paralleled with a clobber of the condition codes
23368      the jump should always come first */
23369   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23370     body = XVECEXP (body, 0, 0);
23371
23372   if (reverse
23373       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23374           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23375     {
23376       int insns_skipped;
23377       int fail = FALSE, succeed = FALSE;
23378       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23379       int then_not_else = TRUE;
23380       rtx_insn *this_insn = start_insn;
23381       rtx label = 0;
23382
23383       /* Register the insn jumped to.  */
23384       if (reverse)
23385         {
23386           if (!seeking_return)
23387             label = XEXP (SET_SRC (body), 0);
23388         }
23389       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23390         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23391       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23392         {
23393           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23394           then_not_else = FALSE;
23395         }
23396       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23397         {
23398           seeking_return = 1;
23399           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23400         }
23401       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23402         {
23403           seeking_return = 1;
23404           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23405           then_not_else = FALSE;
23406         }
23407       else
23408         gcc_unreachable ();
23409
23410       /* See how many insns this branch skips, and what kind of insns.  If all
23411          insns are okay, and the label or unconditional branch to the same
23412          label is not too far away, succeed.  */
23413       for (insns_skipped = 0;
23414            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23415         {
23416           rtx scanbody;
23417
23418           this_insn = next_nonnote_insn (this_insn);
23419           if (!this_insn)
23420             break;
23421
23422           switch (GET_CODE (this_insn))
23423             {
23424             case CODE_LABEL:
23425               /* Succeed if it is the target label, otherwise fail since
23426                  control falls in from somewhere else.  */
23427               if (this_insn == label)
23428                 {
23429                   arm_ccfsm_state = 1;
23430                   succeed = TRUE;
23431                 }
23432               else
23433                 fail = TRUE;
23434               break;
23435
23436             case BARRIER:
23437               /* Succeed if the following insn is the target label.
23438                  Otherwise fail.
23439                  If return insns are used then the last insn in a function
23440                  will be a barrier.  */
23441               this_insn = next_nonnote_insn (this_insn);
23442               if (this_insn && this_insn == label)
23443                 {
23444                   arm_ccfsm_state = 1;
23445                   succeed = TRUE;
23446                 }
23447               else
23448                 fail = TRUE;
23449               break;
23450
23451             case CALL_INSN:
23452               /* The AAPCS says that conditional calls should not be
23453                  used since they make interworking inefficient (the
23454                  linker can't transform BL<cond> into BLX).  That's
23455                  only a problem if the machine has BLX.  */
23456               if (arm_arch5)
23457                 {
23458                   fail = TRUE;
23459                   break;
23460                 }
23461
23462               /* Succeed if the following insn is the target label, or
23463                  if the following two insns are a barrier and the
23464                  target label.  */
23465               this_insn = next_nonnote_insn (this_insn);
23466               if (this_insn && BARRIER_P (this_insn))
23467                 this_insn = next_nonnote_insn (this_insn);
23468
23469               if (this_insn && this_insn == label
23470                   && insns_skipped < max_insns_skipped)
23471                 {
23472                   arm_ccfsm_state = 1;
23473                   succeed = TRUE;
23474                 }
23475               else
23476                 fail = TRUE;
23477               break;
23478
23479             case JUMP_INSN:
23480               /* If this is an unconditional branch to the same label, succeed.
23481                  If it is to another label, do nothing.  If it is conditional,
23482                  fail.  */
23483               /* XXX Probably, the tests for SET and the PC are
23484                  unnecessary.  */
23485
23486               scanbody = PATTERN (this_insn);
23487               if (GET_CODE (scanbody) == SET
23488                   && GET_CODE (SET_DEST (scanbody)) == PC)
23489                 {
23490                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23491                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23492                     {
23493                       arm_ccfsm_state = 2;
23494                       succeed = TRUE;
23495                     }
23496                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23497                     fail = TRUE;
23498                 }
23499               /* Fail if a conditional return is undesirable (e.g. on a
23500                  StrongARM), but still allow this if optimizing for size.  */
23501               else if (GET_CODE (scanbody) == return_code
23502                        && !use_return_insn (TRUE, NULL)
23503                        && !optimize_size)
23504                 fail = TRUE;
23505               else if (GET_CODE (scanbody) == return_code)
23506                 {
23507                   arm_ccfsm_state = 2;
23508                   succeed = TRUE;
23509                 }
23510               else if (GET_CODE (scanbody) == PARALLEL)
23511                 {
23512                   switch (get_attr_conds (this_insn))
23513                     {
23514                     case CONDS_NOCOND:
23515                       break;
23516                     default:
23517                       fail = TRUE;
23518                       break;
23519                     }
23520                 }
23521               else
23522                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23523
23524               break;
23525
23526             case INSN:
23527               /* Instructions using or affecting the condition codes make it
23528                  fail.  */
23529               scanbody = PATTERN (this_insn);
23530               if (!(GET_CODE (scanbody) == SET
23531                     || GET_CODE (scanbody) == PARALLEL)
23532                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23533                 fail = TRUE;
23534               break;
23535
23536             default:
23537               break;
23538             }
23539         }
23540       if (succeed)
23541         {
23542           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23543             arm_target_label = CODE_LABEL_NUMBER (label);
23544           else
23545             {
23546               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23547
23548               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23549                 {
23550                   this_insn = next_nonnote_insn (this_insn);
23551                   gcc_assert (!this_insn
23552                               || (!BARRIER_P (this_insn)
23553                                   && !LABEL_P (this_insn)));
23554                 }
23555               if (!this_insn)
23556                 {
23557                   /* Oh, dear! we ran off the end.. give up.  */
23558                   extract_constrain_insn_cached (insn);
23559                   arm_ccfsm_state = 0;
23560                   arm_target_insn = NULL;
23561                   return;
23562                 }
23563               arm_target_insn = this_insn;
23564             }
23565
23566           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23567              what it was.  */
23568           if (!reverse)
23569             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23570
23571           if (reverse || then_not_else)
23572             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23573         }
23574
23575       /* Restore recog_data (getting the attributes of other insns can
23576          destroy this array, but final.c assumes that it remains intact
23577          across this call.  */
23578       extract_constrain_insn_cached (insn);
23579     }
23580 }
23581
23582 /* Output IT instructions.  */
23583 void
23584 thumb2_asm_output_opcode (FILE * stream)
23585 {
23586   char buff[5];
23587   int n;
23588
23589   if (arm_condexec_mask)
23590     {
23591       for (n = 0; n < arm_condexec_masklen; n++)
23592         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23593       buff[n] = 0;
23594       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23595                   arm_condition_codes[arm_current_cc]);
23596       arm_condexec_mask = 0;
23597     }
23598 }
23599
23600 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23601    UNITS_PER_WORD bytes wide.  */
23602 static unsigned int
23603 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23604 {
23605   if (TARGET_32BIT
23606       && regno > PC_REGNUM
23607       && regno != FRAME_POINTER_REGNUM
23608       && regno != ARG_POINTER_REGNUM
23609       && !IS_VFP_REGNUM (regno))
23610     return 1;
23611
23612   return ARM_NUM_REGS (mode);
23613 }
23614
23615 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23616 static bool
23617 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23618 {
23619   if (GET_MODE_CLASS (mode) == MODE_CC)
23620     return (regno == CC_REGNUM
23621             || (TARGET_HARD_FLOAT
23622                 && regno == VFPCC_REGNUM));
23623
23624   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23625     return false;
23626
23627   if (TARGET_THUMB1)
23628     /* For the Thumb we only allow values bigger than SImode in
23629        registers 0 - 6, so that there is always a second low
23630        register available to hold the upper part of the value.
23631        We probably we ought to ensure that the register is the
23632        start of an even numbered register pair.  */
23633     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23634
23635   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23636     {
23637       if (mode == SFmode || mode == SImode)
23638         return VFP_REGNO_OK_FOR_SINGLE (regno);
23639
23640       if (mode == DFmode)
23641         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23642
23643       if (mode == HFmode)
23644         return VFP_REGNO_OK_FOR_SINGLE (regno);
23645
23646       /* VFP registers can hold HImode values.  */
23647       if (mode == HImode)
23648         return VFP_REGNO_OK_FOR_SINGLE (regno);
23649
23650       if (TARGET_NEON)
23651         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23652                || (VALID_NEON_QREG_MODE (mode)
23653                    && NEON_REGNO_OK_FOR_QUAD (regno))
23654                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23655                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23656                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23657                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23658                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23659
23660       return false;
23661     }
23662
23663   if (TARGET_REALLY_IWMMXT)
23664     {
23665       if (IS_IWMMXT_GR_REGNUM (regno))
23666         return mode == SImode;
23667
23668       if (IS_IWMMXT_REGNUM (regno))
23669         return VALID_IWMMXT_REG_MODE (mode);
23670     }
23671
23672   /* We allow almost any value to be stored in the general registers.
23673      Restrict doubleword quantities to even register pairs in ARM state
23674      so that we can use ldrd.  Do not allow very large Neon structure
23675      opaque modes in general registers; they would use too many.  */
23676   if (regno <= LAST_ARM_REGNUM)
23677     {
23678       if (ARM_NUM_REGS (mode) > 4)
23679         return false;
23680
23681       if (TARGET_THUMB2)
23682         return true;
23683
23684       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23685     }
23686
23687   if (regno == FRAME_POINTER_REGNUM
23688       || regno == ARG_POINTER_REGNUM)
23689     /* We only allow integers in the fake hard registers.  */
23690     return GET_MODE_CLASS (mode) == MODE_INT;
23691
23692   return false;
23693 }
23694
23695 /* Implement TARGET_MODES_TIEABLE_P.  */
23696
23697 static bool
23698 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23699 {
23700   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23701     return true;
23702
23703   /* We specifically want to allow elements of "structure" modes to
23704      be tieable to the structure.  This more general condition allows
23705      other rarer situations too.  */
23706   if (TARGET_NEON
23707       && (VALID_NEON_DREG_MODE (mode1)
23708           || VALID_NEON_QREG_MODE (mode1)
23709           || VALID_NEON_STRUCT_MODE (mode1))
23710       && (VALID_NEON_DREG_MODE (mode2)
23711           || VALID_NEON_QREG_MODE (mode2)
23712           || VALID_NEON_STRUCT_MODE (mode2)))
23713     return true;
23714
23715   return false;
23716 }
23717
23718 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23719    not used in arm mode.  */
23720
23721 enum reg_class
23722 arm_regno_class (int regno)
23723 {
23724   if (regno == PC_REGNUM)
23725     return NO_REGS;
23726
23727   if (TARGET_THUMB1)
23728     {
23729       if (regno == STACK_POINTER_REGNUM)
23730         return STACK_REG;
23731       if (regno == CC_REGNUM)
23732         return CC_REG;
23733       if (regno < 8)
23734         return LO_REGS;
23735       return HI_REGS;
23736     }
23737
23738   if (TARGET_THUMB2 && regno < 8)
23739     return LO_REGS;
23740
23741   if (   regno <= LAST_ARM_REGNUM
23742       || regno == FRAME_POINTER_REGNUM
23743       || regno == ARG_POINTER_REGNUM)
23744     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23745
23746   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23747     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23748
23749   if (IS_VFP_REGNUM (regno))
23750     {
23751       if (regno <= D7_VFP_REGNUM)
23752         return VFP_D0_D7_REGS;
23753       else if (regno <= LAST_LO_VFP_REGNUM)
23754         return VFP_LO_REGS;
23755       else
23756         return VFP_HI_REGS;
23757     }
23758
23759   if (IS_IWMMXT_REGNUM (regno))
23760     return IWMMXT_REGS;
23761
23762   if (IS_IWMMXT_GR_REGNUM (regno))
23763     return IWMMXT_GR_REGS;
23764
23765   return NO_REGS;
23766 }
23767
23768 /* Handle a special case when computing the offset
23769    of an argument from the frame pointer.  */
23770 int
23771 arm_debugger_arg_offset (int value, rtx addr)
23772 {
23773   rtx_insn *insn;
23774
23775   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23776   if (value != 0)
23777     return 0;
23778
23779   /* We can only cope with the case where the address is held in a register.  */
23780   if (!REG_P (addr))
23781     return 0;
23782
23783   /* If we are using the frame pointer to point at the argument, then
23784      an offset of 0 is correct.  */
23785   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23786     return 0;
23787
23788   /* If we are using the stack pointer to point at the
23789      argument, then an offset of 0 is correct.  */
23790   /* ??? Check this is consistent with thumb2 frame layout.  */
23791   if ((TARGET_THUMB || !frame_pointer_needed)
23792       && REGNO (addr) == SP_REGNUM)
23793     return 0;
23794
23795   /* Oh dear.  The argument is pointed to by a register rather
23796      than being held in a register, or being stored at a known
23797      offset from the frame pointer.  Since GDB only understands
23798      those two kinds of argument we must translate the address
23799      held in the register into an offset from the frame pointer.
23800      We do this by searching through the insns for the function
23801      looking to see where this register gets its value.  If the
23802      register is initialized from the frame pointer plus an offset
23803      then we are in luck and we can continue, otherwise we give up.
23804
23805      This code is exercised by producing debugging information
23806      for a function with arguments like this:
23807
23808            double func (double a, double b, int c, double d) {return d;}
23809
23810      Without this code the stab for parameter 'd' will be set to
23811      an offset of 0 from the frame pointer, rather than 8.  */
23812
23813   /* The if() statement says:
23814
23815      If the insn is a normal instruction
23816      and if the insn is setting the value in a register
23817      and if the register being set is the register holding the address of the argument
23818      and if the address is computing by an addition
23819      that involves adding to a register
23820      which is the frame pointer
23821      a constant integer
23822
23823      then...  */
23824
23825   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23826     {
23827       if (   NONJUMP_INSN_P (insn)
23828           && GET_CODE (PATTERN (insn)) == SET
23829           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23830           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23831           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23832           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23833           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23834              )
23835         {
23836           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23837
23838           break;
23839         }
23840     }
23841
23842   if (value == 0)
23843     {
23844       debug_rtx (addr);
23845       warning (0, "unable to compute real location of stacked parameter");
23846       value = 8; /* XXX magic hack */
23847     }
23848
23849   return value;
23850 }
23851 \f
23852 /* Implement TARGET_PROMOTED_TYPE.  */
23853
23854 static tree
23855 arm_promoted_type (const_tree t)
23856 {
23857   if (SCALAR_FLOAT_TYPE_P (t)
23858       && TYPE_PRECISION (t) == 16
23859       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23860     return float_type_node;
23861   return NULL_TREE;
23862 }
23863
23864 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23865    This simply adds HFmode as a supported mode; even though we don't
23866    implement arithmetic on this type directly, it's supported by
23867    optabs conversions, much the way the double-word arithmetic is
23868    special-cased in the default hook.  */
23869
23870 static bool
23871 arm_scalar_mode_supported_p (scalar_mode mode)
23872 {
23873   if (mode == HFmode)
23874     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23875   else if (ALL_FIXED_POINT_MODE_P (mode))
23876     return true;
23877   else
23878     return default_scalar_mode_supported_p (mode);
23879 }
23880
23881 /* Set the value of FLT_EVAL_METHOD.
23882    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23883
23884     0: evaluate all operations and constants, whose semantic type has at
23885        most the range and precision of type float, to the range and
23886        precision of float; evaluate all other operations and constants to
23887        the range and precision of the semantic type;
23888
23889     N, where _FloatN is a supported interchange floating type
23890        evaluate all operations and constants, whose semantic type has at
23891        most the range and precision of _FloatN type, to the range and
23892        precision of the _FloatN type; evaluate all other operations and
23893        constants to the range and precision of the semantic type;
23894
23895    If we have the ARMv8.2-A extensions then we support _Float16 in native
23896    precision, so we should set this to 16.  Otherwise, we support the type,
23897    but want to evaluate expressions in float precision, so set this to
23898    0.  */
23899
23900 static enum flt_eval_method
23901 arm_excess_precision (enum excess_precision_type type)
23902 {
23903   switch (type)
23904     {
23905       case EXCESS_PRECISION_TYPE_FAST:
23906       case EXCESS_PRECISION_TYPE_STANDARD:
23907         /* We can calculate either in 16-bit range and precision or
23908            32-bit range and precision.  Make that decision based on whether
23909            we have native support for the ARMv8.2-A 16-bit floating-point
23910            instructions or not.  */
23911         return (TARGET_VFP_FP16INST
23912                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23913                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23914       case EXCESS_PRECISION_TYPE_IMPLICIT:
23915         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23916       default:
23917         gcc_unreachable ();
23918     }
23919   return FLT_EVAL_METHOD_UNPREDICTABLE;
23920 }
23921
23922
23923 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23924    _Float16 if we are using anything other than ieee format for 16-bit
23925    floating point.  Otherwise, punt to the default implementation.  */
23926 static opt_scalar_float_mode
23927 arm_floatn_mode (int n, bool extended)
23928 {
23929   if (!extended && n == 16)
23930     {
23931       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23932         return HFmode;
23933       return opt_scalar_float_mode ();
23934     }
23935
23936   return default_floatn_mode (n, extended);
23937 }
23938
23939
23940 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23941    not to early-clobber SRC registers in the process.
23942
23943    We assume that the operands described by SRC and DEST represent a
23944    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23945    number of components into which the copy has been decomposed.  */
23946 void
23947 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23948 {
23949   unsigned int i;
23950
23951   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23952       || REGNO (operands[0]) < REGNO (operands[1]))
23953     {
23954       for (i = 0; i < count; i++)
23955         {
23956           operands[2 * i] = dest[i];
23957           operands[2 * i + 1] = src[i];
23958         }
23959     }
23960   else
23961     {
23962       for (i = 0; i < count; i++)
23963         {
23964           operands[2 * i] = dest[count - i - 1];
23965           operands[2 * i + 1] = src[count - i - 1];
23966         }
23967     }
23968 }
23969
23970 /* Split operands into moves from op[1] + op[2] into op[0].  */
23971
23972 void
23973 neon_split_vcombine (rtx operands[3])
23974 {
23975   unsigned int dest = REGNO (operands[0]);
23976   unsigned int src1 = REGNO (operands[1]);
23977   unsigned int src2 = REGNO (operands[2]);
23978   machine_mode halfmode = GET_MODE (operands[1]);
23979   unsigned int halfregs = REG_NREGS (operands[1]);
23980   rtx destlo, desthi;
23981
23982   if (src1 == dest && src2 == dest + halfregs)
23983     {
23984       /* No-op move.  Can't split to nothing; emit something.  */
23985       emit_note (NOTE_INSN_DELETED);
23986       return;
23987     }
23988
23989   /* Preserve register attributes for variable tracking.  */
23990   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23991   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23992                                GET_MODE_SIZE (halfmode));
23993
23994   /* Special case of reversed high/low parts.  Use VSWP.  */
23995   if (src2 == dest && src1 == dest + halfregs)
23996     {
23997       rtx x = gen_rtx_SET (destlo, operands[1]);
23998       rtx y = gen_rtx_SET (desthi, operands[2]);
23999       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24000       return;
24001     }
24002
24003   if (!reg_overlap_mentioned_p (operands[2], destlo))
24004     {
24005       /* Try to avoid unnecessary moves if part of the result
24006          is in the right place already.  */
24007       if (src1 != dest)
24008         emit_move_insn (destlo, operands[1]);
24009       if (src2 != dest + halfregs)
24010         emit_move_insn (desthi, operands[2]);
24011     }
24012   else
24013     {
24014       if (src2 != dest + halfregs)
24015         emit_move_insn (desthi, operands[2]);
24016       if (src1 != dest)
24017         emit_move_insn (destlo, operands[1]);
24018     }
24019 }
24020 \f
24021 /* Return the number (counting from 0) of
24022    the least significant set bit in MASK.  */
24023
24024 inline static int
24025 number_of_first_bit_set (unsigned mask)
24026 {
24027   return ctz_hwi (mask);
24028 }
24029
24030 /* Like emit_multi_reg_push, but allowing for a different set of
24031    registers to be described as saved.  MASK is the set of registers
24032    to be saved; REAL_REGS is the set of registers to be described as
24033    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24034
24035 static rtx_insn *
24036 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24037 {
24038   unsigned long regno;
24039   rtx par[10], tmp, reg;
24040   rtx_insn *insn;
24041   int i, j;
24042
24043   /* Build the parallel of the registers actually being stored.  */
24044   for (i = 0; mask; ++i, mask &= mask - 1)
24045     {
24046       regno = ctz_hwi (mask);
24047       reg = gen_rtx_REG (SImode, regno);
24048
24049       if (i == 0)
24050         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24051       else
24052         tmp = gen_rtx_USE (VOIDmode, reg);
24053
24054       par[i] = tmp;
24055     }
24056
24057   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24058   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24059   tmp = gen_frame_mem (BLKmode, tmp);
24060   tmp = gen_rtx_SET (tmp, par[0]);
24061   par[0] = tmp;
24062
24063   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24064   insn = emit_insn (tmp);
24065
24066   /* Always build the stack adjustment note for unwind info.  */
24067   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24068   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24069   par[0] = tmp;
24070
24071   /* Build the parallel of the registers recorded as saved for unwind.  */
24072   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24073     {
24074       regno = ctz_hwi (real_regs);
24075       reg = gen_rtx_REG (SImode, regno);
24076
24077       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24078       tmp = gen_frame_mem (SImode, tmp);
24079       tmp = gen_rtx_SET (tmp, reg);
24080       RTX_FRAME_RELATED_P (tmp) = 1;
24081       par[j + 1] = tmp;
24082     }
24083
24084   if (j == 0)
24085     tmp = par[0];
24086   else
24087     {
24088       RTX_FRAME_RELATED_P (par[0]) = 1;
24089       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24090     }
24091
24092   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24093
24094   return insn;
24095 }
24096
24097 /* Emit code to push or pop registers to or from the stack.  F is the
24098    assembly file.  MASK is the registers to pop.  */
24099 static void
24100 thumb_pop (FILE *f, unsigned long mask)
24101 {
24102   int regno;
24103   int lo_mask = mask & 0xFF;
24104
24105   gcc_assert (mask);
24106
24107   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24108     {
24109       /* Special case.  Do not generate a POP PC statement here, do it in
24110          thumb_exit() */
24111       thumb_exit (f, -1);
24112       return;
24113     }
24114
24115   fprintf (f, "\tpop\t{");
24116
24117   /* Look at the low registers first.  */
24118   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24119     {
24120       if (lo_mask & 1)
24121         {
24122           asm_fprintf (f, "%r", regno);
24123
24124           if ((lo_mask & ~1) != 0)
24125             fprintf (f, ", ");
24126         }
24127     }
24128
24129   if (mask & (1 << PC_REGNUM))
24130     {
24131       /* Catch popping the PC.  */
24132       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24133           || IS_CMSE_ENTRY (arm_current_func_type ()))
24134         {
24135           /* The PC is never poped directly, instead
24136              it is popped into r3 and then BX is used.  */
24137           fprintf (f, "}\n");
24138
24139           thumb_exit (f, -1);
24140
24141           return;
24142         }
24143       else
24144         {
24145           if (mask & 0xFF)
24146             fprintf (f, ", ");
24147
24148           asm_fprintf (f, "%r", PC_REGNUM);
24149         }
24150     }
24151
24152   fprintf (f, "}\n");
24153 }
24154
24155 /* Generate code to return from a thumb function.
24156    If 'reg_containing_return_addr' is -1, then the return address is
24157    actually on the stack, at the stack pointer.
24158
24159    Note: do not forget to update length attribute of corresponding insn pattern
24160    when changing assembly output (eg. length attribute of epilogue_insns when
24161    updating Armv8-M Baseline Security Extensions register clearing
24162    sequences).  */
24163 static void
24164 thumb_exit (FILE *f, int reg_containing_return_addr)
24165 {
24166   unsigned regs_available_for_popping;
24167   unsigned regs_to_pop;
24168   int pops_needed;
24169   unsigned available;
24170   unsigned required;
24171   machine_mode mode;
24172   int size;
24173   int restore_a4 = FALSE;
24174
24175   /* Compute the registers we need to pop.  */
24176   regs_to_pop = 0;
24177   pops_needed = 0;
24178
24179   if (reg_containing_return_addr == -1)
24180     {
24181       regs_to_pop |= 1 << LR_REGNUM;
24182       ++pops_needed;
24183     }
24184
24185   if (TARGET_BACKTRACE)
24186     {
24187       /* Restore the (ARM) frame pointer and stack pointer.  */
24188       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24189       pops_needed += 2;
24190     }
24191
24192   /* If there is nothing to pop then just emit the BX instruction and
24193      return.  */
24194   if (pops_needed == 0)
24195     {
24196       if (crtl->calls_eh_return)
24197         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24198
24199       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24200         {
24201           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24202                        reg_containing_return_addr);
24203           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24204         }
24205       else
24206         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24207       return;
24208     }
24209   /* Otherwise if we are not supporting interworking and we have not created
24210      a backtrace structure and the function was not entered in ARM mode then
24211      just pop the return address straight into the PC.  */
24212   else if (!TARGET_INTERWORK
24213            && !TARGET_BACKTRACE
24214            && !is_called_in_ARM_mode (current_function_decl)
24215            && !crtl->calls_eh_return
24216            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24217     {
24218       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24219       return;
24220     }
24221
24222   /* Find out how many of the (return) argument registers we can corrupt.  */
24223   regs_available_for_popping = 0;
24224
24225   /* If returning via __builtin_eh_return, the bottom three registers
24226      all contain information needed for the return.  */
24227   if (crtl->calls_eh_return)
24228     size = 12;
24229   else
24230     {
24231       /* If we can deduce the registers used from the function's
24232          return value.  This is more reliable that examining
24233          df_regs_ever_live_p () because that will be set if the register is
24234          ever used in the function, not just if the register is used
24235          to hold a return value.  */
24236
24237       if (crtl->return_rtx != 0)
24238         mode = GET_MODE (crtl->return_rtx);
24239       else
24240         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24241
24242       size = GET_MODE_SIZE (mode);
24243
24244       if (size == 0)
24245         {
24246           /* In a void function we can use any argument register.
24247              In a function that returns a structure on the stack
24248              we can use the second and third argument registers.  */
24249           if (mode == VOIDmode)
24250             regs_available_for_popping =
24251               (1 << ARG_REGISTER (1))
24252               | (1 << ARG_REGISTER (2))
24253               | (1 << ARG_REGISTER (3));
24254           else
24255             regs_available_for_popping =
24256               (1 << ARG_REGISTER (2))
24257               | (1 << ARG_REGISTER (3));
24258         }
24259       else if (size <= 4)
24260         regs_available_for_popping =
24261           (1 << ARG_REGISTER (2))
24262           | (1 << ARG_REGISTER (3));
24263       else if (size <= 8)
24264         regs_available_for_popping =
24265           (1 << ARG_REGISTER (3));
24266     }
24267
24268   /* Match registers to be popped with registers into which we pop them.  */
24269   for (available = regs_available_for_popping,
24270        required  = regs_to_pop;
24271        required != 0 && available != 0;
24272        available &= ~(available & - available),
24273        required  &= ~(required  & - required))
24274     -- pops_needed;
24275
24276   /* If we have any popping registers left over, remove them.  */
24277   if (available > 0)
24278     regs_available_for_popping &= ~available;
24279
24280   /* Otherwise if we need another popping register we can use
24281      the fourth argument register.  */
24282   else if (pops_needed)
24283     {
24284       /* If we have not found any free argument registers and
24285          reg a4 contains the return address, we must move it.  */
24286       if (regs_available_for_popping == 0
24287           && reg_containing_return_addr == LAST_ARG_REGNUM)
24288         {
24289           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24290           reg_containing_return_addr = LR_REGNUM;
24291         }
24292       else if (size > 12)
24293         {
24294           /* Register a4 is being used to hold part of the return value,
24295              but we have dire need of a free, low register.  */
24296           restore_a4 = TRUE;
24297
24298           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24299         }
24300
24301       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24302         {
24303           /* The fourth argument register is available.  */
24304           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24305
24306           --pops_needed;
24307         }
24308     }
24309
24310   /* Pop as many registers as we can.  */
24311   thumb_pop (f, regs_available_for_popping);
24312
24313   /* Process the registers we popped.  */
24314   if (reg_containing_return_addr == -1)
24315     {
24316       /* The return address was popped into the lowest numbered register.  */
24317       regs_to_pop &= ~(1 << LR_REGNUM);
24318
24319       reg_containing_return_addr =
24320         number_of_first_bit_set (regs_available_for_popping);
24321
24322       /* Remove this register for the mask of available registers, so that
24323          the return address will not be corrupted by further pops.  */
24324       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24325     }
24326
24327   /* If we popped other registers then handle them here.  */
24328   if (regs_available_for_popping)
24329     {
24330       int frame_pointer;
24331
24332       /* Work out which register currently contains the frame pointer.  */
24333       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24334
24335       /* Move it into the correct place.  */
24336       asm_fprintf (f, "\tmov\t%r, %r\n",
24337                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24338
24339       /* (Temporarily) remove it from the mask of popped registers.  */
24340       regs_available_for_popping &= ~(1 << frame_pointer);
24341       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24342
24343       if (regs_available_for_popping)
24344         {
24345           int stack_pointer;
24346
24347           /* We popped the stack pointer as well,
24348              find the register that contains it.  */
24349           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24350
24351           /* Move it into the stack register.  */
24352           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24353
24354           /* At this point we have popped all necessary registers, so
24355              do not worry about restoring regs_available_for_popping
24356              to its correct value:
24357
24358              assert (pops_needed == 0)
24359              assert (regs_available_for_popping == (1 << frame_pointer))
24360              assert (regs_to_pop == (1 << STACK_POINTER))  */
24361         }
24362       else
24363         {
24364           /* Since we have just move the popped value into the frame
24365              pointer, the popping register is available for reuse, and
24366              we know that we still have the stack pointer left to pop.  */
24367           regs_available_for_popping |= (1 << frame_pointer);
24368         }
24369     }
24370
24371   /* If we still have registers left on the stack, but we no longer have
24372      any registers into which we can pop them, then we must move the return
24373      address into the link register and make available the register that
24374      contained it.  */
24375   if (regs_available_for_popping == 0 && pops_needed > 0)
24376     {
24377       regs_available_for_popping |= 1 << reg_containing_return_addr;
24378
24379       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24380                    reg_containing_return_addr);
24381
24382       reg_containing_return_addr = LR_REGNUM;
24383     }
24384
24385   /* If we have registers left on the stack then pop some more.
24386      We know that at most we will want to pop FP and SP.  */
24387   if (pops_needed > 0)
24388     {
24389       int  popped_into;
24390       int  move_to;
24391
24392       thumb_pop (f, regs_available_for_popping);
24393
24394       /* We have popped either FP or SP.
24395          Move whichever one it is into the correct register.  */
24396       popped_into = number_of_first_bit_set (regs_available_for_popping);
24397       move_to     = number_of_first_bit_set (regs_to_pop);
24398
24399       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24400       --pops_needed;
24401     }
24402
24403   /* If we still have not popped everything then we must have only
24404      had one register available to us and we are now popping the SP.  */
24405   if (pops_needed > 0)
24406     {
24407       int  popped_into;
24408
24409       thumb_pop (f, regs_available_for_popping);
24410
24411       popped_into = number_of_first_bit_set (regs_available_for_popping);
24412
24413       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24414       /*
24415         assert (regs_to_pop == (1 << STACK_POINTER))
24416         assert (pops_needed == 1)
24417       */
24418     }
24419
24420   /* If necessary restore the a4 register.  */
24421   if (restore_a4)
24422     {
24423       if (reg_containing_return_addr != LR_REGNUM)
24424         {
24425           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24426           reg_containing_return_addr = LR_REGNUM;
24427         }
24428
24429       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24430     }
24431
24432   if (crtl->calls_eh_return)
24433     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24434
24435   /* Return to caller.  */
24436   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24437     {
24438       /* This is for the cases where LR is not being used to contain the return
24439          address.  It may therefore contain information that we might not want
24440          to leak, hence it must be cleared.  The value in R0 will never be a
24441          secret at this point, so it is safe to use it, see the clearing code
24442          in 'cmse_nonsecure_entry_clear_before_return'.  */
24443       if (reg_containing_return_addr != LR_REGNUM)
24444         asm_fprintf (f, "\tmov\tlr, r0\n");
24445
24446       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24447       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24448     }
24449   else
24450     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24451 }
24452 \f
24453 /* Scan INSN just before assembler is output for it.
24454    For Thumb-1, we track the status of the condition codes; this
24455    information is used in the cbranchsi4_insn pattern.  */
24456 void
24457 thumb1_final_prescan_insn (rtx_insn *insn)
24458 {
24459   if (flag_print_asm_name)
24460     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24461                  INSN_ADDRESSES (INSN_UID (insn)));
24462   /* Don't overwrite the previous setter when we get to a cbranch.  */
24463   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24464     {
24465       enum attr_conds conds;
24466
24467       if (cfun->machine->thumb1_cc_insn)
24468         {
24469           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24470               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24471             CC_STATUS_INIT;
24472         }
24473       conds = get_attr_conds (insn);
24474       if (conds == CONDS_SET)
24475         {
24476           rtx set = single_set (insn);
24477           cfun->machine->thumb1_cc_insn = insn;
24478           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24479           cfun->machine->thumb1_cc_op1 = const0_rtx;
24480           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24481           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24482             {
24483               rtx src1 = XEXP (SET_SRC (set), 1);
24484               if (src1 == const0_rtx)
24485                 cfun->machine->thumb1_cc_mode = CCmode;
24486             }
24487           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24488             {
24489               /* Record the src register operand instead of dest because
24490                  cprop_hardreg pass propagates src.  */
24491               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24492             }
24493         }
24494       else if (conds != CONDS_NOCOND)
24495         cfun->machine->thumb1_cc_insn = NULL_RTX;
24496     }
24497
24498     /* Check if unexpected far jump is used.  */
24499     if (cfun->machine->lr_save_eliminated
24500         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24501       internal_error("Unexpected thumb1 far jump");
24502 }
24503
24504 int
24505 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24506 {
24507   unsigned HOST_WIDE_INT mask = 0xff;
24508   int i;
24509
24510   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24511   if (val == 0) /* XXX */
24512     return 0;
24513
24514   for (i = 0; i < 25; i++)
24515     if ((val & (mask << i)) == val)
24516       return 1;
24517
24518   return 0;
24519 }
24520
24521 /* Returns nonzero if the current function contains,
24522    or might contain a far jump.  */
24523 static int
24524 thumb_far_jump_used_p (void)
24525 {
24526   rtx_insn *insn;
24527   bool far_jump = false;
24528   unsigned int func_size = 0;
24529
24530   /* If we have already decided that far jumps may be used,
24531      do not bother checking again, and always return true even if
24532      it turns out that they are not being used.  Once we have made
24533      the decision that far jumps are present (and that hence the link
24534      register will be pushed onto the stack) we cannot go back on it.  */
24535   if (cfun->machine->far_jump_used)
24536     return 1;
24537
24538   /* If this function is not being called from the prologue/epilogue
24539      generation code then it must be being called from the
24540      INITIAL_ELIMINATION_OFFSET macro.  */
24541   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24542     {
24543       /* In this case we know that we are being asked about the elimination
24544          of the arg pointer register.  If that register is not being used,
24545          then there are no arguments on the stack, and we do not have to
24546          worry that a far jump might force the prologue to push the link
24547          register, changing the stack offsets.  In this case we can just
24548          return false, since the presence of far jumps in the function will
24549          not affect stack offsets.
24550
24551          If the arg pointer is live (or if it was live, but has now been
24552          eliminated and so set to dead) then we do have to test to see if
24553          the function might contain a far jump.  This test can lead to some
24554          false negatives, since before reload is completed, then length of
24555          branch instructions is not known, so gcc defaults to returning their
24556          longest length, which in turn sets the far jump attribute to true.
24557
24558          A false negative will not result in bad code being generated, but it
24559          will result in a needless push and pop of the link register.  We
24560          hope that this does not occur too often.
24561
24562          If we need doubleword stack alignment this could affect the other
24563          elimination offsets so we can't risk getting it wrong.  */
24564       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24565         cfun->machine->arg_pointer_live = 1;
24566       else if (!cfun->machine->arg_pointer_live)
24567         return 0;
24568     }
24569
24570   /* We should not change far_jump_used during or after reload, as there is
24571      no chance to change stack frame layout.  */
24572   if (reload_in_progress || reload_completed)
24573     return 0;
24574
24575   /* Check to see if the function contains a branch
24576      insn with the far jump attribute set.  */
24577   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24578     {
24579       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24580         {
24581           far_jump = true;
24582         }
24583       func_size += get_attr_length (insn);
24584     }
24585
24586   /* Attribute far_jump will always be true for thumb1 before
24587      shorten_branch pass.  So checking far_jump attribute before
24588      shorten_branch isn't much useful.
24589
24590      Following heuristic tries to estimate more accurately if a far jump
24591      may finally be used.  The heuristic is very conservative as there is
24592      no chance to roll-back the decision of not to use far jump.
24593
24594      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24595      2-byte insn is associated with a 4 byte constant pool.  Using
24596      function size 2048/3 as the threshold is conservative enough.  */
24597   if (far_jump)
24598     {
24599       if ((func_size * 3) >= 2048)
24600         {
24601           /* Record the fact that we have decided that
24602              the function does use far jumps.  */
24603           cfun->machine->far_jump_used = 1;
24604           return 1;
24605         }
24606     }
24607
24608   return 0;
24609 }
24610
24611 /* Return nonzero if FUNC must be entered in ARM mode.  */
24612 static bool
24613 is_called_in_ARM_mode (tree func)
24614 {
24615   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24616
24617   /* Ignore the problem about functions whose address is taken.  */
24618   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24619     return true;
24620
24621 #ifdef ARM_PE
24622   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24623 #else
24624   return false;
24625 #endif
24626 }
24627
24628 /* Given the stack offsets and register mask in OFFSETS, decide how
24629    many additional registers to push instead of subtracting a constant
24630    from SP.  For epilogues the principle is the same except we use pop.
24631    FOR_PROLOGUE indicates which we're generating.  */
24632 static int
24633 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24634 {
24635   HOST_WIDE_INT amount;
24636   unsigned long live_regs_mask = offsets->saved_regs_mask;
24637   /* Extract a mask of the ones we can give to the Thumb's push/pop
24638      instruction.  */
24639   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24640   /* Then count how many other high registers will need to be pushed.  */
24641   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24642   int n_free, reg_base, size;
24643
24644   if (!for_prologue && frame_pointer_needed)
24645     amount = offsets->locals_base - offsets->saved_regs;
24646   else
24647     amount = offsets->outgoing_args - offsets->saved_regs;
24648
24649   /* If the stack frame size is 512 exactly, we can save one load
24650      instruction, which should make this a win even when optimizing
24651      for speed.  */
24652   if (!optimize_size && amount != 512)
24653     return 0;
24654
24655   /* Can't do this if there are high registers to push.  */
24656   if (high_regs_pushed != 0)
24657     return 0;
24658
24659   /* Shouldn't do it in the prologue if no registers would normally
24660      be pushed at all.  In the epilogue, also allow it if we'll have
24661      a pop insn for the PC.  */
24662   if  (l_mask == 0
24663        && (for_prologue
24664            || TARGET_BACKTRACE
24665            || (live_regs_mask & 1 << LR_REGNUM) == 0
24666            || TARGET_INTERWORK
24667            || crtl->args.pretend_args_size != 0))
24668     return 0;
24669
24670   /* Don't do this if thumb_expand_prologue wants to emit instructions
24671      between the push and the stack frame allocation.  */
24672   if (for_prologue
24673       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24674           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24675     return 0;
24676
24677   reg_base = 0;
24678   n_free = 0;
24679   if (!for_prologue)
24680     {
24681       size = arm_size_return_regs ();
24682       reg_base = ARM_NUM_INTS (size);
24683       live_regs_mask >>= reg_base;
24684     }
24685
24686   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24687          && (for_prologue || call_used_regs[reg_base + n_free]))
24688     {
24689       live_regs_mask >>= 1;
24690       n_free++;
24691     }
24692
24693   if (n_free == 0)
24694     return 0;
24695   gcc_assert (amount / 4 * 4 == amount);
24696
24697   if (amount >= 512 && (amount - n_free * 4) < 512)
24698     return (amount - 508) / 4;
24699   if (amount <= n_free * 4)
24700     return amount / 4;
24701   return 0;
24702 }
24703
24704 /* The bits which aren't usefully expanded as rtl.  */
24705 const char *
24706 thumb1_unexpanded_epilogue (void)
24707 {
24708   arm_stack_offsets *offsets;
24709   int regno;
24710   unsigned long live_regs_mask = 0;
24711   int high_regs_pushed = 0;
24712   int extra_pop;
24713   int had_to_push_lr;
24714   int size;
24715
24716   if (cfun->machine->return_used_this_function != 0)
24717     return "";
24718
24719   if (IS_NAKED (arm_current_func_type ()))
24720     return "";
24721
24722   offsets = arm_get_frame_offsets ();
24723   live_regs_mask = offsets->saved_regs_mask;
24724   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24725
24726   /* If we can deduce the registers used from the function's return value.
24727      This is more reliable that examining df_regs_ever_live_p () because that
24728      will be set if the register is ever used in the function, not just if
24729      the register is used to hold a return value.  */
24730   size = arm_size_return_regs ();
24731
24732   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24733   if (extra_pop > 0)
24734     {
24735       unsigned long extra_mask = (1 << extra_pop) - 1;
24736       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24737     }
24738
24739   /* The prolog may have pushed some high registers to use as
24740      work registers.  e.g. the testsuite file:
24741      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24742      compiles to produce:
24743         push    {r4, r5, r6, r7, lr}
24744         mov     r7, r9
24745         mov     r6, r8
24746         push    {r6, r7}
24747      as part of the prolog.  We have to undo that pushing here.  */
24748
24749   if (high_regs_pushed)
24750     {
24751       unsigned long mask = live_regs_mask & 0xff;
24752       int next_hi_reg;
24753
24754       /* The available low registers depend on the size of the value we are
24755          returning.  */
24756       if (size <= 12)
24757         mask |=  1 << 3;
24758       if (size <= 8)
24759         mask |= 1 << 2;
24760
24761       if (mask == 0)
24762         /* Oh dear!  We have no low registers into which we can pop
24763            high registers!  */
24764         internal_error
24765           ("no low registers available for popping high registers");
24766
24767       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24768         if (live_regs_mask & (1 << next_hi_reg))
24769           break;
24770
24771       while (high_regs_pushed)
24772         {
24773           /* Find lo register(s) into which the high register(s) can
24774              be popped.  */
24775           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24776             {
24777               if (mask & (1 << regno))
24778                 high_regs_pushed--;
24779               if (high_regs_pushed == 0)
24780                 break;
24781             }
24782
24783           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24784
24785           /* Pop the values into the low register(s).  */
24786           thumb_pop (asm_out_file, mask);
24787
24788           /* Move the value(s) into the high registers.  */
24789           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24790             {
24791               if (mask & (1 << regno))
24792                 {
24793                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24794                                regno);
24795
24796                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24797                     if (live_regs_mask & (1 << next_hi_reg))
24798                       break;
24799                 }
24800             }
24801         }
24802       live_regs_mask &= ~0x0f00;
24803     }
24804
24805   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24806   live_regs_mask &= 0xff;
24807
24808   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24809     {
24810       /* Pop the return address into the PC.  */
24811       if (had_to_push_lr)
24812         live_regs_mask |= 1 << PC_REGNUM;
24813
24814       /* Either no argument registers were pushed or a backtrace
24815          structure was created which includes an adjusted stack
24816          pointer, so just pop everything.  */
24817       if (live_regs_mask)
24818         thumb_pop (asm_out_file, live_regs_mask);
24819
24820       /* We have either just popped the return address into the
24821          PC or it is was kept in LR for the entire function.
24822          Note that thumb_pop has already called thumb_exit if the
24823          PC was in the list.  */
24824       if (!had_to_push_lr)
24825         thumb_exit (asm_out_file, LR_REGNUM);
24826     }
24827   else
24828     {
24829       /* Pop everything but the return address.  */
24830       if (live_regs_mask)
24831         thumb_pop (asm_out_file, live_regs_mask);
24832
24833       if (had_to_push_lr)
24834         {
24835           if (size > 12)
24836             {
24837               /* We have no free low regs, so save one.  */
24838               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24839                            LAST_ARG_REGNUM);
24840             }
24841
24842           /* Get the return address into a temporary register.  */
24843           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24844
24845           if (size > 12)
24846             {
24847               /* Move the return address to lr.  */
24848               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24849                            LAST_ARG_REGNUM);
24850               /* Restore the low register.  */
24851               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24852                            IP_REGNUM);
24853               regno = LR_REGNUM;
24854             }
24855           else
24856             regno = LAST_ARG_REGNUM;
24857         }
24858       else
24859         regno = LR_REGNUM;
24860
24861       /* Remove the argument registers that were pushed onto the stack.  */
24862       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24863                    SP_REGNUM, SP_REGNUM,
24864                    crtl->args.pretend_args_size);
24865
24866       thumb_exit (asm_out_file, regno);
24867     }
24868
24869   return "";
24870 }
24871
24872 /* Functions to save and restore machine-specific function data.  */
24873 static struct machine_function *
24874 arm_init_machine_status (void)
24875 {
24876   struct machine_function *machine;
24877   machine = ggc_cleared_alloc<machine_function> ();
24878
24879 #if ARM_FT_UNKNOWN != 0
24880   machine->func_type = ARM_FT_UNKNOWN;
24881 #endif
24882   return machine;
24883 }
24884
24885 /* Return an RTX indicating where the return address to the
24886    calling function can be found.  */
24887 rtx
24888 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24889 {
24890   if (count != 0)
24891     return NULL_RTX;
24892
24893   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24894 }
24895
24896 /* Do anything needed before RTL is emitted for each function.  */
24897 void
24898 arm_init_expanders (void)
24899 {
24900   /* Arrange to initialize and mark the machine per-function status.  */
24901   init_machine_status = arm_init_machine_status;
24902
24903   /* This is to stop the combine pass optimizing away the alignment
24904      adjustment of va_arg.  */
24905   /* ??? It is claimed that this should not be necessary.  */
24906   if (cfun)
24907     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24908 }
24909
24910 /* Check that FUNC is called with a different mode.  */
24911
24912 bool
24913 arm_change_mode_p (tree func)
24914 {
24915   if (TREE_CODE (func) != FUNCTION_DECL)
24916     return false;
24917
24918   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24919
24920   if (!callee_tree)
24921     callee_tree = target_option_default_node;
24922
24923   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24924   int flags = callee_opts->x_target_flags;
24925
24926   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24927 }
24928
24929 /* Like arm_compute_initial_elimination offset.  Simpler because there
24930    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24931    to point at the base of the local variables after static stack
24932    space for a function has been allocated.  */
24933
24934 HOST_WIDE_INT
24935 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24936 {
24937   arm_stack_offsets *offsets;
24938
24939   offsets = arm_get_frame_offsets ();
24940
24941   switch (from)
24942     {
24943     case ARG_POINTER_REGNUM:
24944       switch (to)
24945         {
24946         case STACK_POINTER_REGNUM:
24947           return offsets->outgoing_args - offsets->saved_args;
24948
24949         case FRAME_POINTER_REGNUM:
24950           return offsets->soft_frame - offsets->saved_args;
24951
24952         case ARM_HARD_FRAME_POINTER_REGNUM:
24953           return offsets->saved_regs - offsets->saved_args;
24954
24955         case THUMB_HARD_FRAME_POINTER_REGNUM:
24956           return offsets->locals_base - offsets->saved_args;
24957
24958         default:
24959           gcc_unreachable ();
24960         }
24961       break;
24962
24963     case FRAME_POINTER_REGNUM:
24964       switch (to)
24965         {
24966         case STACK_POINTER_REGNUM:
24967           return offsets->outgoing_args - offsets->soft_frame;
24968
24969         case ARM_HARD_FRAME_POINTER_REGNUM:
24970           return offsets->saved_regs - offsets->soft_frame;
24971
24972         case THUMB_HARD_FRAME_POINTER_REGNUM:
24973           return offsets->locals_base - offsets->soft_frame;
24974
24975         default:
24976           gcc_unreachable ();
24977         }
24978       break;
24979
24980     default:
24981       gcc_unreachable ();
24982     }
24983 }
24984
24985 /* Generate the function's prologue.  */
24986
24987 void
24988 thumb1_expand_prologue (void)
24989 {
24990   rtx_insn *insn;
24991
24992   HOST_WIDE_INT amount;
24993   HOST_WIDE_INT size;
24994   arm_stack_offsets *offsets;
24995   unsigned long func_type;
24996   int regno;
24997   unsigned long live_regs_mask;
24998   unsigned long l_mask;
24999   unsigned high_regs_pushed = 0;
25000   bool lr_needs_saving;
25001
25002   func_type = arm_current_func_type ();
25003
25004   /* Naked functions don't have prologues.  */
25005   if (IS_NAKED (func_type))
25006     {
25007       if (flag_stack_usage_info)
25008         current_function_static_stack_size = 0;
25009       return;
25010     }
25011
25012   if (IS_INTERRUPT (func_type))
25013     {
25014       error ("interrupt Service Routines cannot be coded in Thumb mode");
25015       return;
25016     }
25017
25018   if (is_called_in_ARM_mode (current_function_decl))
25019     emit_insn (gen_prologue_thumb1_interwork ());
25020
25021   offsets = arm_get_frame_offsets ();
25022   live_regs_mask = offsets->saved_regs_mask;
25023   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25024
25025   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25026   l_mask = live_regs_mask & 0x40ff;
25027   /* Then count how many other high registers will need to be pushed.  */
25028   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25029
25030   if (crtl->args.pretend_args_size)
25031     {
25032       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25033
25034       if (cfun->machine->uses_anonymous_args)
25035         {
25036           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25037           unsigned long mask;
25038
25039           mask = 1ul << (LAST_ARG_REGNUM + 1);
25040           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25041
25042           insn = thumb1_emit_multi_reg_push (mask, 0);
25043         }
25044       else
25045         {
25046           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25047                                         stack_pointer_rtx, x));
25048         }
25049       RTX_FRAME_RELATED_P (insn) = 1;
25050     }
25051
25052   if (TARGET_BACKTRACE)
25053     {
25054       HOST_WIDE_INT offset = 0;
25055       unsigned work_register;
25056       rtx work_reg, x, arm_hfp_rtx;
25057
25058       /* We have been asked to create a stack backtrace structure.
25059          The code looks like this:
25060
25061          0   .align 2
25062          0   func:
25063          0     sub   SP, #16         Reserve space for 4 registers.
25064          2     push  {R7}            Push low registers.
25065          4     add   R7, SP, #20     Get the stack pointer before the push.
25066          6     str   R7, [SP, #8]    Store the stack pointer
25067                                         (before reserving the space).
25068          8     mov   R7, PC          Get hold of the start of this code + 12.
25069         10     str   R7, [SP, #16]   Store it.
25070         12     mov   R7, FP          Get hold of the current frame pointer.
25071         14     str   R7, [SP, #4]    Store it.
25072         16     mov   R7, LR          Get hold of the current return address.
25073         18     str   R7, [SP, #12]   Store it.
25074         20     add   R7, SP, #16     Point at the start of the
25075                                         backtrace structure.
25076         22     mov   FP, R7          Put this value into the frame pointer.  */
25077
25078       work_register = thumb_find_work_register (live_regs_mask);
25079       work_reg = gen_rtx_REG (SImode, work_register);
25080       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25081
25082       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25083                                     stack_pointer_rtx, GEN_INT (-16)));
25084       RTX_FRAME_RELATED_P (insn) = 1;
25085
25086       if (l_mask)
25087         {
25088           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25089           RTX_FRAME_RELATED_P (insn) = 1;
25090           lr_needs_saving = false;
25091
25092           offset = bit_count (l_mask) * UNITS_PER_WORD;
25093         }
25094
25095       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25096       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25097
25098       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25099       x = gen_frame_mem (SImode, x);
25100       emit_move_insn (x, work_reg);
25101
25102       /* Make sure that the instruction fetching the PC is in the right place
25103          to calculate "start of backtrace creation code + 12".  */
25104       /* ??? The stores using the common WORK_REG ought to be enough to
25105          prevent the scheduler from doing anything weird.  Failing that
25106          we could always move all of the following into an UNSPEC_VOLATILE.  */
25107       if (l_mask)
25108         {
25109           x = gen_rtx_REG (SImode, PC_REGNUM);
25110           emit_move_insn (work_reg, x);
25111
25112           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25113           x = gen_frame_mem (SImode, x);
25114           emit_move_insn (x, work_reg);
25115
25116           emit_move_insn (work_reg, arm_hfp_rtx);
25117
25118           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25119           x = gen_frame_mem (SImode, x);
25120           emit_move_insn (x, work_reg);
25121         }
25122       else
25123         {
25124           emit_move_insn (work_reg, arm_hfp_rtx);
25125
25126           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25127           x = gen_frame_mem (SImode, x);
25128           emit_move_insn (x, work_reg);
25129
25130           x = gen_rtx_REG (SImode, PC_REGNUM);
25131           emit_move_insn (work_reg, x);
25132
25133           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25134           x = gen_frame_mem (SImode, x);
25135           emit_move_insn (x, work_reg);
25136         }
25137
25138       x = gen_rtx_REG (SImode, LR_REGNUM);
25139       emit_move_insn (work_reg, x);
25140
25141       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25142       x = gen_frame_mem (SImode, x);
25143       emit_move_insn (x, work_reg);
25144
25145       x = GEN_INT (offset + 12);
25146       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25147
25148       emit_move_insn (arm_hfp_rtx, work_reg);
25149     }
25150   /* Optimization:  If we are not pushing any low registers but we are going
25151      to push some high registers then delay our first push.  This will just
25152      be a push of LR and we can combine it with the push of the first high
25153      register.  */
25154   else if ((l_mask & 0xff) != 0
25155            || (high_regs_pushed == 0 && lr_needs_saving))
25156     {
25157       unsigned long mask = l_mask;
25158       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25159       insn = thumb1_emit_multi_reg_push (mask, mask);
25160       RTX_FRAME_RELATED_P (insn) = 1;
25161       lr_needs_saving = false;
25162     }
25163
25164   if (high_regs_pushed)
25165     {
25166       unsigned pushable_regs;
25167       unsigned next_hi_reg;
25168       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25169                                                  : crtl->args.info.nregs;
25170       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25171
25172       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25173         if (live_regs_mask & (1 << next_hi_reg))
25174           break;
25175
25176       /* Here we need to mask out registers used for passing arguments
25177          even if they can be pushed.  This is to avoid using them to stash the high
25178          registers.  Such kind of stash may clobber the use of arguments.  */
25179       pushable_regs = l_mask & (~arg_regs_mask);
25180       if (lr_needs_saving)
25181         pushable_regs &= ~(1 << LR_REGNUM);
25182
25183       if (pushable_regs == 0)
25184         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25185
25186       while (high_regs_pushed > 0)
25187         {
25188           unsigned long real_regs_mask = 0;
25189           unsigned long push_mask = 0;
25190
25191           for (regno = LR_REGNUM; regno >= 0; regno --)
25192             {
25193               if (pushable_regs & (1 << regno))
25194                 {
25195                   emit_move_insn (gen_rtx_REG (SImode, regno),
25196                                   gen_rtx_REG (SImode, next_hi_reg));
25197
25198                   high_regs_pushed --;
25199                   real_regs_mask |= (1 << next_hi_reg);
25200                   push_mask |= (1 << regno);
25201
25202                   if (high_regs_pushed)
25203                     {
25204                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25205                            next_hi_reg --)
25206                         if (live_regs_mask & (1 << next_hi_reg))
25207                           break;
25208                     }
25209                   else
25210                     break;
25211                 }
25212             }
25213
25214           /* If we had to find a work register and we have not yet
25215              saved the LR then add it to the list of regs to push.  */
25216           if (lr_needs_saving)
25217             {
25218               push_mask |= 1 << LR_REGNUM;
25219               real_regs_mask |= 1 << LR_REGNUM;
25220               lr_needs_saving = false;
25221             }
25222
25223           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25224           RTX_FRAME_RELATED_P (insn) = 1;
25225         }
25226     }
25227
25228   /* Load the pic register before setting the frame pointer,
25229      so we can use r7 as a temporary work register.  */
25230   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25231     arm_load_pic_register (live_regs_mask);
25232
25233   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25234     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25235                     stack_pointer_rtx);
25236
25237   size = offsets->outgoing_args - offsets->saved_args;
25238   if (flag_stack_usage_info)
25239     current_function_static_stack_size = size;
25240
25241   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25242   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25243        || flag_stack_clash_protection)
25244       && size)
25245     sorry ("-fstack-check=specific for Thumb-1");
25246
25247   amount = offsets->outgoing_args - offsets->saved_regs;
25248   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25249   if (amount)
25250     {
25251       if (amount < 512)
25252         {
25253           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25254                                         GEN_INT (- amount)));
25255           RTX_FRAME_RELATED_P (insn) = 1;
25256         }
25257       else
25258         {
25259           rtx reg, dwarf;
25260
25261           /* The stack decrement is too big for an immediate value in a single
25262              insn.  In theory we could issue multiple subtracts, but after
25263              three of them it becomes more space efficient to place the full
25264              value in the constant pool and load into a register.  (Also the
25265              ARM debugger really likes to see only one stack decrement per
25266              function).  So instead we look for a scratch register into which
25267              we can load the decrement, and then we subtract this from the
25268              stack pointer.  Unfortunately on the thumb the only available
25269              scratch registers are the argument registers, and we cannot use
25270              these as they may hold arguments to the function.  Instead we
25271              attempt to locate a call preserved register which is used by this
25272              function.  If we can find one, then we know that it will have
25273              been pushed at the start of the prologue and so we can corrupt
25274              it now.  */
25275           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25276             if (live_regs_mask & (1 << regno))
25277               break;
25278
25279           gcc_assert(regno <= LAST_LO_REGNUM);
25280
25281           reg = gen_rtx_REG (SImode, regno);
25282
25283           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25284
25285           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25286                                         stack_pointer_rtx, reg));
25287
25288           dwarf = gen_rtx_SET (stack_pointer_rtx,
25289                                plus_constant (Pmode, stack_pointer_rtx,
25290                                               -amount));
25291           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25292           RTX_FRAME_RELATED_P (insn) = 1;
25293         }
25294     }
25295
25296   if (frame_pointer_needed)
25297     thumb_set_frame_pointer (offsets);
25298
25299   /* If we are profiling, make sure no instructions are scheduled before
25300      the call to mcount.  Similarly if the user has requested no
25301      scheduling in the prolog.  Similarly if we want non-call exceptions
25302      using the EABI unwinder, to prevent faulting instructions from being
25303      swapped with a stack adjustment.  */
25304   if (crtl->profile || !TARGET_SCHED_PROLOG
25305       || (arm_except_unwind_info (&global_options) == UI_TARGET
25306           && cfun->can_throw_non_call_exceptions))
25307     emit_insn (gen_blockage ());
25308
25309   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25310   if (live_regs_mask & 0xff)
25311     cfun->machine->lr_save_eliminated = 0;
25312 }
25313
25314 /* Clear caller saved registers not used to pass return values and leaked
25315    condition flags before exiting a cmse_nonsecure_entry function.  */
25316
25317 void
25318 cmse_nonsecure_entry_clear_before_return (void)
25319 {
25320   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25321   uint32_t padding_bits_to_clear = 0;
25322   auto_sbitmap to_clear_bitmap (maxregno + 1);
25323   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25324   tree result_type;
25325
25326   bitmap_clear (to_clear_bitmap);
25327   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25328   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25329
25330   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25331      registers.  */
25332   if (TARGET_HARD_FLOAT)
25333     {
25334       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25335
25336       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25337
25338       /* Make sure we don't clear the two scratch registers used to clear the
25339          relevant FPSCR bits in output_return_instruction.  */
25340       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25341       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25342       emit_use (gen_rtx_REG (SImode, 4));
25343       bitmap_clear_bit (to_clear_bitmap, 4);
25344     }
25345
25346   /* If the user has defined registers to be caller saved, these are no longer
25347      restored by the function before returning and must thus be cleared for
25348      security purposes.  */
25349   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25350     {
25351       /* We do not touch registers that can be used to pass arguments as per
25352          the AAPCS, since these should never be made callee-saved by user
25353          options.  */
25354       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25355         continue;
25356       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25357         continue;
25358       if (call_used_regs[regno])
25359         bitmap_set_bit (to_clear_bitmap, regno);
25360     }
25361
25362   /* Make sure we do not clear the registers used to return the result in.  */
25363   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25364   if (!VOID_TYPE_P (result_type))
25365     {
25366       uint64_t to_clear_return_mask;
25367       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25368
25369       /* No need to check that we return in registers, because we don't
25370          support returning on stack yet.  */
25371       gcc_assert (REG_P (result_rtl));
25372       to_clear_return_mask
25373         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25374                                      &padding_bits_to_clear);
25375       if (to_clear_return_mask)
25376         {
25377           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25378           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25379             {
25380               if (to_clear_return_mask & (1ULL << regno))
25381                 bitmap_clear_bit (to_clear_bitmap, regno);
25382             }
25383         }
25384     }
25385
25386   if (padding_bits_to_clear != 0)
25387     {
25388       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25389       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25390
25391       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25392          returning a composite type, which only uses r0.  Let's make sure that
25393          r1-r3 is cleared too.  */
25394       bitmap_clear (to_clear_arg_regs_bitmap);
25395       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25396       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25397     }
25398
25399   /* Clear full registers that leak before returning.  */
25400   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25401   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25402   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25403                         clearing_reg);
25404 }
25405
25406 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25407    POP instruction can be generated.  LR should be replaced by PC.  All
25408    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25409    all we really need to check here is if single register is to be
25410    returned, or multiple register return.  */
25411 void
25412 thumb2_expand_return (bool simple_return)
25413 {
25414   int i, num_regs;
25415   unsigned long saved_regs_mask;
25416   arm_stack_offsets *offsets;
25417
25418   offsets = arm_get_frame_offsets ();
25419   saved_regs_mask = offsets->saved_regs_mask;
25420
25421   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25422     if (saved_regs_mask & (1 << i))
25423       num_regs++;
25424
25425   if (!simple_return && saved_regs_mask)
25426     {
25427       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25428          functions or adapt code to handle according to ACLE.  This path should
25429          not be reachable for cmse_nonsecure_entry functions though we prefer
25430          to assert it for now to ensure that future code changes do not silently
25431          change this behavior.  */
25432       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25433       if (num_regs == 1)
25434         {
25435           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25436           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25437           rtx addr = gen_rtx_MEM (SImode,
25438                                   gen_rtx_POST_INC (SImode,
25439                                                     stack_pointer_rtx));
25440           set_mem_alias_set (addr, get_frame_alias_set ());
25441           XVECEXP (par, 0, 0) = ret_rtx;
25442           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25443           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25444           emit_jump_insn (par);
25445         }
25446       else
25447         {
25448           saved_regs_mask &= ~ (1 << LR_REGNUM);
25449           saved_regs_mask |=   (1 << PC_REGNUM);
25450           arm_emit_multi_reg_pop (saved_regs_mask);
25451         }
25452     }
25453   else
25454     {
25455       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25456         cmse_nonsecure_entry_clear_before_return ();
25457       emit_jump_insn (simple_return_rtx);
25458     }
25459 }
25460
25461 void
25462 thumb1_expand_epilogue (void)
25463 {
25464   HOST_WIDE_INT amount;
25465   arm_stack_offsets *offsets;
25466   int regno;
25467
25468   /* Naked functions don't have prologues.  */
25469   if (IS_NAKED (arm_current_func_type ()))
25470     return;
25471
25472   offsets = arm_get_frame_offsets ();
25473   amount = offsets->outgoing_args - offsets->saved_regs;
25474
25475   if (frame_pointer_needed)
25476     {
25477       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25478       amount = offsets->locals_base - offsets->saved_regs;
25479     }
25480   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25481
25482   gcc_assert (amount >= 0);
25483   if (amount)
25484     {
25485       emit_insn (gen_blockage ());
25486
25487       if (amount < 512)
25488         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25489                                GEN_INT (amount)));
25490       else
25491         {
25492           /* r3 is always free in the epilogue.  */
25493           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25494
25495           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25496           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25497         }
25498     }
25499
25500   /* Emit a USE (stack_pointer_rtx), so that
25501      the stack adjustment will not be deleted.  */
25502   emit_insn (gen_force_register_use (stack_pointer_rtx));
25503
25504   if (crtl->profile || !TARGET_SCHED_PROLOG)
25505     emit_insn (gen_blockage ());
25506
25507   /* Emit a clobber for each insn that will be restored in the epilogue,
25508      so that flow2 will get register lifetimes correct.  */
25509   for (regno = 0; regno < 13; regno++)
25510     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25511       emit_clobber (gen_rtx_REG (SImode, regno));
25512
25513   if (! df_regs_ever_live_p (LR_REGNUM))
25514     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25515
25516   /* Clear all caller-saved regs that are not used to return.  */
25517   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25518     cmse_nonsecure_entry_clear_before_return ();
25519 }
25520
25521 /* Epilogue code for APCS frame.  */
25522 static void
25523 arm_expand_epilogue_apcs_frame (bool really_return)
25524 {
25525   unsigned long func_type;
25526   unsigned long saved_regs_mask;
25527   int num_regs = 0;
25528   int i;
25529   int floats_from_frame = 0;
25530   arm_stack_offsets *offsets;
25531
25532   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25533   func_type = arm_current_func_type ();
25534
25535   /* Get frame offsets for ARM.  */
25536   offsets = arm_get_frame_offsets ();
25537   saved_regs_mask = offsets->saved_regs_mask;
25538
25539   /* Find the offset of the floating-point save area in the frame.  */
25540   floats_from_frame
25541     = (offsets->saved_args
25542        + arm_compute_static_chain_stack_bytes ()
25543        - offsets->frame);
25544
25545   /* Compute how many core registers saved and how far away the floats are.  */
25546   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25547     if (saved_regs_mask & (1 << i))
25548       {
25549         num_regs++;
25550         floats_from_frame += 4;
25551       }
25552
25553   if (TARGET_HARD_FLOAT)
25554     {
25555       int start_reg;
25556       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25557
25558       /* The offset is from IP_REGNUM.  */
25559       int saved_size = arm_get_vfp_saved_size ();
25560       if (saved_size > 0)
25561         {
25562           rtx_insn *insn;
25563           floats_from_frame += saved_size;
25564           insn = emit_insn (gen_addsi3 (ip_rtx,
25565                                         hard_frame_pointer_rtx,
25566                                         GEN_INT (-floats_from_frame)));
25567           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25568                                        ip_rtx, hard_frame_pointer_rtx);
25569         }
25570
25571       /* Generate VFP register multi-pop.  */
25572       start_reg = FIRST_VFP_REGNUM;
25573
25574       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25575         /* Look for a case where a reg does not need restoring.  */
25576         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25577             && (!df_regs_ever_live_p (i + 1)
25578                 || call_used_regs[i + 1]))
25579           {
25580             if (start_reg != i)
25581               arm_emit_vfp_multi_reg_pop (start_reg,
25582                                           (i - start_reg) / 2,
25583                                           gen_rtx_REG (SImode,
25584                                                        IP_REGNUM));
25585             start_reg = i + 2;
25586           }
25587
25588       /* Restore the remaining regs that we have discovered (or possibly
25589          even all of them, if the conditional in the for loop never
25590          fired).  */
25591       if (start_reg != i)
25592         arm_emit_vfp_multi_reg_pop (start_reg,
25593                                     (i - start_reg) / 2,
25594                                     gen_rtx_REG (SImode, IP_REGNUM));
25595     }
25596
25597   if (TARGET_IWMMXT)
25598     {
25599       /* The frame pointer is guaranteed to be non-double-word aligned, as
25600          it is set to double-word-aligned old_stack_pointer - 4.  */
25601       rtx_insn *insn;
25602       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25603
25604       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25605         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25606           {
25607             rtx addr = gen_frame_mem (V2SImode,
25608                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25609                                                 - lrm_count * 4));
25610             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25611             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25612                                                gen_rtx_REG (V2SImode, i),
25613                                                NULL_RTX);
25614             lrm_count += 2;
25615           }
25616     }
25617
25618   /* saved_regs_mask should contain IP which contains old stack pointer
25619      at the time of activation creation.  Since SP and IP are adjacent registers,
25620      we can restore the value directly into SP.  */
25621   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25622   saved_regs_mask &= ~(1 << IP_REGNUM);
25623   saved_regs_mask |= (1 << SP_REGNUM);
25624
25625   /* There are two registers left in saved_regs_mask - LR and PC.  We
25626      only need to restore LR (the return address), but to
25627      save time we can load it directly into PC, unless we need a
25628      special function exit sequence, or we are not really returning.  */
25629   if (really_return
25630       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25631       && !crtl->calls_eh_return)
25632     /* Delete LR from the register mask, so that LR on
25633        the stack is loaded into the PC in the register mask.  */
25634     saved_regs_mask &= ~(1 << LR_REGNUM);
25635   else
25636     saved_regs_mask &= ~(1 << PC_REGNUM);
25637
25638   num_regs = bit_count (saved_regs_mask);
25639   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25640     {
25641       rtx_insn *insn;
25642       emit_insn (gen_blockage ());
25643       /* Unwind the stack to just below the saved registers.  */
25644       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25645                                     hard_frame_pointer_rtx,
25646                                     GEN_INT (- 4 * num_regs)));
25647
25648       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25649                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25650     }
25651
25652   arm_emit_multi_reg_pop (saved_regs_mask);
25653
25654   if (IS_INTERRUPT (func_type))
25655     {
25656       /* Interrupt handlers will have pushed the
25657          IP onto the stack, so restore it now.  */
25658       rtx_insn *insn;
25659       rtx addr = gen_rtx_MEM (SImode,
25660                               gen_rtx_POST_INC (SImode,
25661                               stack_pointer_rtx));
25662       set_mem_alias_set (addr, get_frame_alias_set ());
25663       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25664       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25665                                          gen_rtx_REG (SImode, IP_REGNUM),
25666                                          NULL_RTX);
25667     }
25668
25669   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25670     return;
25671
25672   if (crtl->calls_eh_return)
25673     emit_insn (gen_addsi3 (stack_pointer_rtx,
25674                            stack_pointer_rtx,
25675                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25676
25677   if (IS_STACKALIGN (func_type))
25678     /* Restore the original stack pointer.  Before prologue, the stack was
25679        realigned and the original stack pointer saved in r0.  For details,
25680        see comment in arm_expand_prologue.  */
25681     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25682
25683   emit_jump_insn (simple_return_rtx);
25684 }
25685
25686 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25687    function is not a sibcall.  */
25688 void
25689 arm_expand_epilogue (bool really_return)
25690 {
25691   unsigned long func_type;
25692   unsigned long saved_regs_mask;
25693   int num_regs = 0;
25694   int i;
25695   int amount;
25696   arm_stack_offsets *offsets;
25697
25698   func_type = arm_current_func_type ();
25699
25700   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25701      let output_return_instruction take care of instruction emission if any.  */
25702   if (IS_NAKED (func_type)
25703       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25704     {
25705       if (really_return)
25706         emit_jump_insn (simple_return_rtx);
25707       return;
25708     }
25709
25710   /* If we are throwing an exception, then we really must be doing a
25711      return, so we can't tail-call.  */
25712   gcc_assert (!crtl->calls_eh_return || really_return);
25713
25714   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25715     {
25716       arm_expand_epilogue_apcs_frame (really_return);
25717       return;
25718     }
25719
25720   /* Get frame offsets for ARM.  */
25721   offsets = arm_get_frame_offsets ();
25722   saved_regs_mask = offsets->saved_regs_mask;
25723   num_regs = bit_count (saved_regs_mask);
25724
25725   if (frame_pointer_needed)
25726     {
25727       rtx_insn *insn;
25728       /* Restore stack pointer if necessary.  */
25729       if (TARGET_ARM)
25730         {
25731           /* In ARM mode, frame pointer points to first saved register.
25732              Restore stack pointer to last saved register.  */
25733           amount = offsets->frame - offsets->saved_regs;
25734
25735           /* Force out any pending memory operations that reference stacked data
25736              before stack de-allocation occurs.  */
25737           emit_insn (gen_blockage ());
25738           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25739                             hard_frame_pointer_rtx,
25740                             GEN_INT (amount)));
25741           arm_add_cfa_adjust_cfa_note (insn, amount,
25742                                        stack_pointer_rtx,
25743                                        hard_frame_pointer_rtx);
25744
25745           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25746              deleted.  */
25747           emit_insn (gen_force_register_use (stack_pointer_rtx));
25748         }
25749       else
25750         {
25751           /* In Thumb-2 mode, the frame pointer points to the last saved
25752              register.  */
25753           amount = offsets->locals_base - offsets->saved_regs;
25754           if (amount)
25755             {
25756               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25757                                 hard_frame_pointer_rtx,
25758                                 GEN_INT (amount)));
25759               arm_add_cfa_adjust_cfa_note (insn, amount,
25760                                            hard_frame_pointer_rtx,
25761                                            hard_frame_pointer_rtx);
25762             }
25763
25764           /* Force out any pending memory operations that reference stacked data
25765              before stack de-allocation occurs.  */
25766           emit_insn (gen_blockage ());
25767           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25768                                        hard_frame_pointer_rtx));
25769           arm_add_cfa_adjust_cfa_note (insn, 0,
25770                                        stack_pointer_rtx,
25771                                        hard_frame_pointer_rtx);
25772           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25773              deleted.  */
25774           emit_insn (gen_force_register_use (stack_pointer_rtx));
25775         }
25776     }
25777   else
25778     {
25779       /* Pop off outgoing args and local frame to adjust stack pointer to
25780          last saved register.  */
25781       amount = offsets->outgoing_args - offsets->saved_regs;
25782       if (amount)
25783         {
25784           rtx_insn *tmp;
25785           /* Force out any pending memory operations that reference stacked data
25786              before stack de-allocation occurs.  */
25787           emit_insn (gen_blockage ());
25788           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25789                                        stack_pointer_rtx,
25790                                        GEN_INT (amount)));
25791           arm_add_cfa_adjust_cfa_note (tmp, amount,
25792                                        stack_pointer_rtx, stack_pointer_rtx);
25793           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25794              not deleted.  */
25795           emit_insn (gen_force_register_use (stack_pointer_rtx));
25796         }
25797     }
25798
25799   if (TARGET_HARD_FLOAT)
25800     {
25801       /* Generate VFP register multi-pop.  */
25802       int end_reg = LAST_VFP_REGNUM + 1;
25803
25804       /* Scan the registers in reverse order.  We need to match
25805          any groupings made in the prologue and generate matching
25806          vldm operations.  The need to match groups is because,
25807          unlike pop, vldm can only do consecutive regs.  */
25808       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25809         /* Look for a case where a reg does not need restoring.  */
25810         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25811             && (!df_regs_ever_live_p (i + 1)
25812                 || call_used_regs[i + 1]))
25813           {
25814             /* Restore the regs discovered so far (from reg+2 to
25815                end_reg).  */
25816             if (end_reg > i + 2)
25817               arm_emit_vfp_multi_reg_pop (i + 2,
25818                                           (end_reg - (i + 2)) / 2,
25819                                           stack_pointer_rtx);
25820             end_reg = i;
25821           }
25822
25823       /* Restore the remaining regs that we have discovered (or possibly
25824          even all of them, if the conditional in the for loop never
25825          fired).  */
25826       if (end_reg > i + 2)
25827         arm_emit_vfp_multi_reg_pop (i + 2,
25828                                     (end_reg - (i + 2)) / 2,
25829                                     stack_pointer_rtx);
25830     }
25831
25832   if (TARGET_IWMMXT)
25833     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25834       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25835         {
25836           rtx_insn *insn;
25837           rtx addr = gen_rtx_MEM (V2SImode,
25838                                   gen_rtx_POST_INC (SImode,
25839                                                     stack_pointer_rtx));
25840           set_mem_alias_set (addr, get_frame_alias_set ());
25841           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25842           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25843                                              gen_rtx_REG (V2SImode, i),
25844                                              NULL_RTX);
25845           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25846                                        stack_pointer_rtx, stack_pointer_rtx);
25847         }
25848
25849   if (saved_regs_mask)
25850     {
25851       rtx insn;
25852       bool return_in_pc = false;
25853
25854       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25855           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25856           && !IS_CMSE_ENTRY (func_type)
25857           && !IS_STACKALIGN (func_type)
25858           && really_return
25859           && crtl->args.pretend_args_size == 0
25860           && saved_regs_mask & (1 << LR_REGNUM)
25861           && !crtl->calls_eh_return)
25862         {
25863           saved_regs_mask &= ~(1 << LR_REGNUM);
25864           saved_regs_mask |= (1 << PC_REGNUM);
25865           return_in_pc = true;
25866         }
25867
25868       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25869         {
25870           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25871             if (saved_regs_mask & (1 << i))
25872               {
25873                 rtx addr = gen_rtx_MEM (SImode,
25874                                         gen_rtx_POST_INC (SImode,
25875                                                           stack_pointer_rtx));
25876                 set_mem_alias_set (addr, get_frame_alias_set ());
25877
25878                 if (i == PC_REGNUM)
25879                   {
25880                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25881                     XVECEXP (insn, 0, 0) = ret_rtx;
25882                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25883                                                         addr);
25884                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25885                     insn = emit_jump_insn (insn);
25886                   }
25887                 else
25888                   {
25889                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25890                                                  addr));
25891                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25892                                                        gen_rtx_REG (SImode, i),
25893                                                        NULL_RTX);
25894                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25895                                                  stack_pointer_rtx,
25896                                                  stack_pointer_rtx);
25897                   }
25898               }
25899         }
25900       else
25901         {
25902           if (TARGET_LDRD
25903               && current_tune->prefer_ldrd_strd
25904               && !optimize_function_for_size_p (cfun))
25905             {
25906               if (TARGET_THUMB2)
25907                 thumb2_emit_ldrd_pop (saved_regs_mask);
25908               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25909                 arm_emit_ldrd_pop (saved_regs_mask);
25910               else
25911                 arm_emit_multi_reg_pop (saved_regs_mask);
25912             }
25913           else
25914             arm_emit_multi_reg_pop (saved_regs_mask);
25915         }
25916
25917       if (return_in_pc)
25918         return;
25919     }
25920
25921   amount
25922     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25923   if (amount)
25924     {
25925       int i, j;
25926       rtx dwarf = NULL_RTX;
25927       rtx_insn *tmp =
25928         emit_insn (gen_addsi3 (stack_pointer_rtx,
25929                                stack_pointer_rtx,
25930                                GEN_INT (amount)));
25931
25932       RTX_FRAME_RELATED_P (tmp) = 1;
25933
25934       if (cfun->machine->uses_anonymous_args)
25935         {
25936           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25937              pretend_args in stack.  */
25938           int num_regs = crtl->args.pretend_args_size / 4;
25939           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25940           for (j = 0, i = 0; j < num_regs; i++)
25941             if (saved_regs_mask & (1 << i))
25942               {
25943                 rtx reg = gen_rtx_REG (SImode, i);
25944                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25945                 j++;
25946               }
25947           REG_NOTES (tmp) = dwarf;
25948         }
25949       arm_add_cfa_adjust_cfa_note (tmp, amount,
25950                                    stack_pointer_rtx, stack_pointer_rtx);
25951     }
25952
25953     /* Clear all caller-saved regs that are not used to return.  */
25954     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25955       {
25956         /* CMSE_ENTRY always returns.  */
25957         gcc_assert (really_return);
25958         cmse_nonsecure_entry_clear_before_return ();
25959       }
25960
25961   if (!really_return)
25962     return;
25963
25964   if (crtl->calls_eh_return)
25965     emit_insn (gen_addsi3 (stack_pointer_rtx,
25966                            stack_pointer_rtx,
25967                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25968
25969   if (IS_STACKALIGN (func_type))
25970     /* Restore the original stack pointer.  Before prologue, the stack was
25971        realigned and the original stack pointer saved in r0.  For details,
25972        see comment in arm_expand_prologue.  */
25973     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25974
25975   emit_jump_insn (simple_return_rtx);
25976 }
25977
25978 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25979    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25980
25981 const char *
25982 thumb1_output_interwork (void)
25983 {
25984   const char * name;
25985   FILE *f = asm_out_file;
25986
25987   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25988   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25989               == SYMBOL_REF);
25990   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25991
25992   /* Generate code sequence to switch us into Thumb mode.  */
25993   /* The .code 32 directive has already been emitted by
25994      ASM_DECLARE_FUNCTION_NAME.  */
25995   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25996   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25997
25998   /* Generate a label, so that the debugger will notice the
25999      change in instruction sets.  This label is also used by
26000      the assembler to bypass the ARM code when this function
26001      is called from a Thumb encoded function elsewhere in the
26002      same file.  Hence the definition of STUB_NAME here must
26003      agree with the definition in gas/config/tc-arm.c.  */
26004
26005 #define STUB_NAME ".real_start_of"
26006
26007   fprintf (f, "\t.code\t16\n");
26008 #ifdef ARM_PE
26009   if (arm_dllexport_name_p (name))
26010     name = arm_strip_name_encoding (name);
26011 #endif
26012   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26013   fprintf (f, "\t.thumb_func\n");
26014   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26015
26016   return "";
26017 }
26018
26019 /* Handle the case of a double word load into a low register from
26020    a computed memory address.  The computed address may involve a
26021    register which is overwritten by the load.  */
26022 const char *
26023 thumb_load_double_from_address (rtx *operands)
26024 {
26025   rtx addr;
26026   rtx base;
26027   rtx offset;
26028   rtx arg1;
26029   rtx arg2;
26030
26031   gcc_assert (REG_P (operands[0]));
26032   gcc_assert (MEM_P (operands[1]));
26033
26034   /* Get the memory address.  */
26035   addr = XEXP (operands[1], 0);
26036
26037   /* Work out how the memory address is computed.  */
26038   switch (GET_CODE (addr))
26039     {
26040     case REG:
26041       operands[2] = adjust_address (operands[1], SImode, 4);
26042
26043       if (REGNO (operands[0]) == REGNO (addr))
26044         {
26045           output_asm_insn ("ldr\t%H0, %2", operands);
26046           output_asm_insn ("ldr\t%0, %1", operands);
26047         }
26048       else
26049         {
26050           output_asm_insn ("ldr\t%0, %1", operands);
26051           output_asm_insn ("ldr\t%H0, %2", operands);
26052         }
26053       break;
26054
26055     case CONST:
26056       /* Compute <address> + 4 for the high order load.  */
26057       operands[2] = adjust_address (operands[1], SImode, 4);
26058
26059       output_asm_insn ("ldr\t%0, %1", operands);
26060       output_asm_insn ("ldr\t%H0, %2", operands);
26061       break;
26062
26063     case PLUS:
26064       arg1   = XEXP (addr, 0);
26065       arg2   = XEXP (addr, 1);
26066
26067       if (CONSTANT_P (arg1))
26068         base = arg2, offset = arg1;
26069       else
26070         base = arg1, offset = arg2;
26071
26072       gcc_assert (REG_P (base));
26073
26074       /* Catch the case of <address> = <reg> + <reg> */
26075       if (REG_P (offset))
26076         {
26077           int reg_offset = REGNO (offset);
26078           int reg_base   = REGNO (base);
26079           int reg_dest   = REGNO (operands[0]);
26080
26081           /* Add the base and offset registers together into the
26082              higher destination register.  */
26083           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26084                        reg_dest + 1, reg_base, reg_offset);
26085
26086           /* Load the lower destination register from the address in
26087              the higher destination register.  */
26088           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26089                        reg_dest, reg_dest + 1);
26090
26091           /* Load the higher destination register from its own address
26092              plus 4.  */
26093           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26094                        reg_dest + 1, reg_dest + 1);
26095         }
26096       else
26097         {
26098           /* Compute <address> + 4 for the high order load.  */
26099           operands[2] = adjust_address (operands[1], SImode, 4);
26100
26101           /* If the computed address is held in the low order register
26102              then load the high order register first, otherwise always
26103              load the low order register first.  */
26104           if (REGNO (operands[0]) == REGNO (base))
26105             {
26106               output_asm_insn ("ldr\t%H0, %2", operands);
26107               output_asm_insn ("ldr\t%0, %1", operands);
26108             }
26109           else
26110             {
26111               output_asm_insn ("ldr\t%0, %1", operands);
26112               output_asm_insn ("ldr\t%H0, %2", operands);
26113             }
26114         }
26115       break;
26116
26117     case LABEL_REF:
26118       /* With no registers to worry about we can just load the value
26119          directly.  */
26120       operands[2] = adjust_address (operands[1], SImode, 4);
26121
26122       output_asm_insn ("ldr\t%H0, %2", operands);
26123       output_asm_insn ("ldr\t%0, %1", operands);
26124       break;
26125
26126     default:
26127       gcc_unreachable ();
26128     }
26129
26130   return "";
26131 }
26132
26133 const char *
26134 thumb_output_move_mem_multiple (int n, rtx *operands)
26135 {
26136   switch (n)
26137     {
26138     case 2:
26139       if (REGNO (operands[4]) > REGNO (operands[5]))
26140         std::swap (operands[4], operands[5]);
26141
26142       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26143       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26144       break;
26145
26146     case 3:
26147       if (REGNO (operands[4]) > REGNO (operands[5]))
26148         std::swap (operands[4], operands[5]);
26149       if (REGNO (operands[5]) > REGNO (operands[6]))
26150         std::swap (operands[5], operands[6]);
26151       if (REGNO (operands[4]) > REGNO (operands[5]))
26152         std::swap (operands[4], operands[5]);
26153
26154       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26155       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26156       break;
26157
26158     default:
26159       gcc_unreachable ();
26160     }
26161
26162   return "";
26163 }
26164
26165 /* Output a call-via instruction for thumb state.  */
26166 const char *
26167 thumb_call_via_reg (rtx reg)
26168 {
26169   int regno = REGNO (reg);
26170   rtx *labelp;
26171
26172   gcc_assert (regno < LR_REGNUM);
26173
26174   /* If we are in the normal text section we can use a single instance
26175      per compilation unit.  If we are doing function sections, then we need
26176      an entry per section, since we can't rely on reachability.  */
26177   if (in_section == text_section)
26178     {
26179       thumb_call_reg_needed = 1;
26180
26181       if (thumb_call_via_label[regno] == NULL)
26182         thumb_call_via_label[regno] = gen_label_rtx ();
26183       labelp = thumb_call_via_label + regno;
26184     }
26185   else
26186     {
26187       if (cfun->machine->call_via[regno] == NULL)
26188         cfun->machine->call_via[regno] = gen_label_rtx ();
26189       labelp = cfun->machine->call_via + regno;
26190     }
26191
26192   output_asm_insn ("bl\t%a0", labelp);
26193   return "";
26194 }
26195
26196 /* Routines for generating rtl.  */
26197 void
26198 thumb_expand_movmemqi (rtx *operands)
26199 {
26200   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26201   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26202   HOST_WIDE_INT len = INTVAL (operands[2]);
26203   HOST_WIDE_INT offset = 0;
26204
26205   while (len >= 12)
26206     {
26207       emit_insn (gen_movmem12b (out, in, out, in));
26208       len -= 12;
26209     }
26210
26211   if (len >= 8)
26212     {
26213       emit_insn (gen_movmem8b (out, in, out, in));
26214       len -= 8;
26215     }
26216
26217   if (len >= 4)
26218     {
26219       rtx reg = gen_reg_rtx (SImode);
26220       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26221       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26222       len -= 4;
26223       offset += 4;
26224     }
26225
26226   if (len >= 2)
26227     {
26228       rtx reg = gen_reg_rtx (HImode);
26229       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26230                                               plus_constant (Pmode, in,
26231                                                              offset))));
26232       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26233                                                                 offset)),
26234                             reg));
26235       len -= 2;
26236       offset += 2;
26237     }
26238
26239   if (len)
26240     {
26241       rtx reg = gen_reg_rtx (QImode);
26242       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26243                                               plus_constant (Pmode, in,
26244                                                              offset))));
26245       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26246                                                                 offset)),
26247                             reg));
26248     }
26249 }
26250
26251 void
26252 thumb_reload_out_hi (rtx *operands)
26253 {
26254   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26255 }
26256
26257 /* Return the length of a function name prefix
26258     that starts with the character 'c'.  */
26259 static int
26260 arm_get_strip_length (int c)
26261 {
26262   switch (c)
26263     {
26264     ARM_NAME_ENCODING_LENGTHS
26265       default: return 0;
26266     }
26267 }
26268
26269 /* Return a pointer to a function's name with any
26270    and all prefix encodings stripped from it.  */
26271 const char *
26272 arm_strip_name_encoding (const char *name)
26273 {
26274   int skip;
26275
26276   while ((skip = arm_get_strip_length (* name)))
26277     name += skip;
26278
26279   return name;
26280 }
26281
26282 /* If there is a '*' anywhere in the name's prefix, then
26283    emit the stripped name verbatim, otherwise prepend an
26284    underscore if leading underscores are being used.  */
26285 void
26286 arm_asm_output_labelref (FILE *stream, const char *name)
26287 {
26288   int skip;
26289   int verbatim = 0;
26290
26291   while ((skip = arm_get_strip_length (* name)))
26292     {
26293       verbatim |= (*name == '*');
26294       name += skip;
26295     }
26296
26297   if (verbatim)
26298     fputs (name, stream);
26299   else
26300     asm_fprintf (stream, "%U%s", name);
26301 }
26302
26303 /* This function is used to emit an EABI tag and its associated value.
26304    We emit the numerical value of the tag in case the assembler does not
26305    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26306    the tag name in a comment so that anyone reading the assembler output
26307    will know which tag is being set.
26308
26309    This function is not static because arm-c.c needs it too.  */
26310
26311 void
26312 arm_emit_eabi_attribute (const char *name, int num, int val)
26313 {
26314   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26315   if (flag_verbose_asm || flag_debug_asm)
26316     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26317   asm_fprintf (asm_out_file, "\n");
26318 }
26319
26320 /* This function is used to print CPU tuning information as comment
26321    in assembler file.  Pointers are not printed for now.  */
26322
26323 void
26324 arm_print_tune_info (void)
26325 {
26326   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26327   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26328                current_tune->constant_limit);
26329   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26330                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26331   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26332                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26333   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334                "prefetch.l1_cache_size:\t%d\n",
26335                current_tune->prefetch.l1_cache_size);
26336   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26337                "prefetch.l1_cache_line_size:\t%d\n",
26338                current_tune->prefetch.l1_cache_line_size);
26339   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26340                "prefer_constant_pool:\t%d\n",
26341                (int) current_tune->prefer_constant_pool);
26342   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26343                "branch_cost:\t(s:speed, p:predictable)\n");
26344   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26345   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26346                current_tune->branch_cost (false, false));
26347   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26348                current_tune->branch_cost (false, true));
26349   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26350                current_tune->branch_cost (true, false));
26351   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26352                current_tune->branch_cost (true, true));
26353   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26354                "prefer_ldrd_strd:\t%d\n",
26355                (int) current_tune->prefer_ldrd_strd);
26356   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26357                "logical_op_non_short_circuit:\t[%d,%d]\n",
26358                (int) current_tune->logical_op_non_short_circuit_thumb,
26359                (int) current_tune->logical_op_non_short_circuit_arm);
26360   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361                "prefer_neon_for_64bits:\t%d\n",
26362                (int) current_tune->prefer_neon_for_64bits);
26363   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26364                "disparage_flag_setting_t16_encodings:\t%d\n",
26365                (int) current_tune->disparage_flag_setting_t16_encodings);
26366   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26367                "string_ops_prefer_neon:\t%d\n",
26368                (int) current_tune->string_ops_prefer_neon);
26369   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26370                "max_insns_inline_memset:\t%d\n",
26371                current_tune->max_insns_inline_memset);
26372   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26373                current_tune->fusible_ops);
26374   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26375                (int) current_tune->sched_autopref);
26376 }
26377
26378 /* Print .arch and .arch_extension directives corresponding to the
26379    current architecture configuration.  */
26380 static void
26381 arm_print_asm_arch_directives ()
26382 {
26383   const arch_option *arch
26384     = arm_parse_arch_option_name (all_architectures, "-march",
26385                                   arm_active_target.arch_name);
26386   auto_sbitmap opt_bits (isa_num_bits);
26387
26388   gcc_assert (arch);
26389
26390   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26391   if (!arch->common.extensions)
26392     return;
26393
26394   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26395        opt->name != NULL;
26396        opt++)
26397     {
26398       if (!opt->remove)
26399         {
26400           arm_initialize_isa (opt_bits, opt->isa_bits);
26401
26402           /* If every feature bit of this option is set in the target
26403              ISA specification, print out the option name.  However,
26404              don't print anything if all the bits are part of the
26405              FPU specification.  */
26406           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26407               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26408             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26409         }
26410     }
26411 }
26412
26413 static void
26414 arm_file_start (void)
26415 {
26416   int val;
26417
26418   if (TARGET_BPABI)
26419     {
26420       /* We don't have a specified CPU.  Use the architecture to
26421          generate the tags.
26422
26423          Note: it might be better to do this unconditionally, then the
26424          assembler would not need to know about all new CPU names as
26425          they are added.  */
26426       if (!arm_active_target.core_name)
26427         {
26428           /* armv7ve doesn't support any extensions.  */
26429           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26430             {
26431               /* Keep backward compatability for assemblers
26432                  which don't support armv7ve.  */
26433               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26434               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26435               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26436               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26437               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26438             }
26439           else
26440             arm_print_asm_arch_directives ();
26441         }
26442       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26443         asm_fprintf (asm_out_file, "\t.arch %s\n",
26444                      arm_active_target.core_name + 8);
26445       else
26446         {
26447           const char* truncated_name
26448             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26449           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26450         }
26451
26452       if (print_tune_info)
26453         arm_print_tune_info ();
26454
26455       if (! TARGET_SOFT_FLOAT)
26456         {
26457           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26458             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26459
26460           if (TARGET_HARD_FLOAT_ABI)
26461             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26462         }
26463
26464       /* Some of these attributes only apply when the corresponding features
26465          are used.  However we don't have any easy way of figuring this out.
26466          Conservatively record the setting that would have been used.  */
26467
26468       if (flag_rounding_math)
26469         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26470
26471       if (!flag_unsafe_math_optimizations)
26472         {
26473           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26474           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26475         }
26476       if (flag_signaling_nans)
26477         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26478
26479       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26480                            flag_finite_math_only ? 1 : 3);
26481
26482       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26483       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26484       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26485                                flag_short_enums ? 1 : 2);
26486
26487       /* Tag_ABI_optimization_goals.  */
26488       if (optimize_size)
26489         val = 4;
26490       else if (optimize >= 2)
26491         val = 2;
26492       else if (optimize)
26493         val = 1;
26494       else
26495         val = 6;
26496       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26497
26498       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26499                                unaligned_access);
26500
26501       if (arm_fp16_format)
26502         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26503                              (int) arm_fp16_format);
26504
26505       if (arm_lang_output_object_attributes_hook)
26506         arm_lang_output_object_attributes_hook();
26507     }
26508
26509   default_file_start ();
26510 }
26511
26512 static void
26513 arm_file_end (void)
26514 {
26515   int regno;
26516
26517   if (NEED_INDICATE_EXEC_STACK)
26518     /* Add .note.GNU-stack.  */
26519     file_end_indicate_exec_stack ();
26520
26521   if (! thumb_call_reg_needed)
26522     return;
26523
26524   switch_to_section (text_section);
26525   asm_fprintf (asm_out_file, "\t.code 16\n");
26526   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26527
26528   for (regno = 0; regno < LR_REGNUM; regno++)
26529     {
26530       rtx label = thumb_call_via_label[regno];
26531
26532       if (label != 0)
26533         {
26534           targetm.asm_out.internal_label (asm_out_file, "L",
26535                                           CODE_LABEL_NUMBER (label));
26536           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26537         }
26538     }
26539 }
26540
26541 #ifndef ARM_PE
26542 /* Symbols in the text segment can be accessed without indirecting via the
26543    constant pool; it may take an extra binary operation, but this is still
26544    faster than indirecting via memory.  Don't do this when not optimizing,
26545    since we won't be calculating al of the offsets necessary to do this
26546    simplification.  */
26547
26548 static void
26549 arm_encode_section_info (tree decl, rtx rtl, int first)
26550 {
26551   if (optimize > 0 && TREE_CONSTANT (decl))
26552     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26553
26554   default_encode_section_info (decl, rtl, first);
26555 }
26556 #endif /* !ARM_PE */
26557
26558 static void
26559 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26560 {
26561   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26562       && !strcmp (prefix, "L"))
26563     {
26564       arm_ccfsm_state = 0;
26565       arm_target_insn = NULL;
26566     }
26567   default_internal_label (stream, prefix, labelno);
26568 }
26569
26570 /* Output code to add DELTA to the first argument, and then jump
26571    to FUNCTION.  Used for C++ multiple inheritance.  */
26572
26573 static void
26574 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26575                      HOST_WIDE_INT, tree function)
26576 {
26577   static int thunk_label = 0;
26578   char label[256];
26579   char labelpc[256];
26580   int mi_delta = delta;
26581   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26582   int shift = 0;
26583   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26584                     ? 1 : 0);
26585   if (mi_delta < 0)
26586     mi_delta = - mi_delta;
26587
26588   final_start_function (emit_barrier (), file, 1);
26589
26590   if (TARGET_THUMB1)
26591     {
26592       int labelno = thunk_label++;
26593       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26594       /* Thunks are entered in arm mode when available.  */
26595       if (TARGET_THUMB1_ONLY)
26596         {
26597           /* push r3 so we can use it as a temporary.  */
26598           /* TODO: Omit this save if r3 is not used.  */
26599           fputs ("\tpush {r3}\n", file);
26600           fputs ("\tldr\tr3, ", file);
26601         }
26602       else
26603         {
26604           fputs ("\tldr\tr12, ", file);
26605         }
26606       assemble_name (file, label);
26607       fputc ('\n', file);
26608       if (flag_pic)
26609         {
26610           /* If we are generating PIC, the ldr instruction below loads
26611              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26612              the address of the add + 8, so we have:
26613
26614              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26615                  = target + 1.
26616
26617              Note that we have "+ 1" because some versions of GNU ld
26618              don't set the low bit of the result for R_ARM_REL32
26619              relocations against thumb function symbols.
26620              On ARMv6M this is +4, not +8.  */
26621           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26622           assemble_name (file, labelpc);
26623           fputs (":\n", file);
26624           if (TARGET_THUMB1_ONLY)
26625             {
26626               /* This is 2 insns after the start of the thunk, so we know it
26627                  is 4-byte aligned.  */
26628               fputs ("\tadd\tr3, pc, r3\n", file);
26629               fputs ("\tmov r12, r3\n", file);
26630             }
26631           else
26632             fputs ("\tadd\tr12, pc, r12\n", file);
26633         }
26634       else if (TARGET_THUMB1_ONLY)
26635         fputs ("\tmov r12, r3\n", file);
26636     }
26637   if (TARGET_THUMB1_ONLY)
26638     {
26639       if (mi_delta > 255)
26640         {
26641           fputs ("\tldr\tr3, ", file);
26642           assemble_name (file, label);
26643           fputs ("+4\n", file);
26644           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26645                        mi_op, this_regno, this_regno);
26646         }
26647       else if (mi_delta != 0)
26648         {
26649           /* Thumb1 unified syntax requires s suffix in instruction name when
26650              one of the operands is immediate.  */
26651           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26652                        mi_op, this_regno, this_regno,
26653                        mi_delta);
26654         }
26655     }
26656   else
26657     {
26658       /* TODO: Use movw/movt for large constants when available.  */
26659       while (mi_delta != 0)
26660         {
26661           if ((mi_delta & (3 << shift)) == 0)
26662             shift += 2;
26663           else
26664             {
26665               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26666                            mi_op, this_regno, this_regno,
26667                            mi_delta & (0xff << shift));
26668               mi_delta &= ~(0xff << shift);
26669               shift += 8;
26670             }
26671         }
26672     }
26673   if (TARGET_THUMB1)
26674     {
26675       if (TARGET_THUMB1_ONLY)
26676         fputs ("\tpop\t{r3}\n", file);
26677
26678       fprintf (file, "\tbx\tr12\n");
26679       ASM_OUTPUT_ALIGN (file, 2);
26680       assemble_name (file, label);
26681       fputs (":\n", file);
26682       if (flag_pic)
26683         {
26684           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26685           rtx tem = XEXP (DECL_RTL (function), 0);
26686           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26687              pipeline offset is four rather than eight.  Adjust the offset
26688              accordingly.  */
26689           tem = plus_constant (GET_MODE (tem), tem,
26690                                TARGET_THUMB1_ONLY ? -3 : -7);
26691           tem = gen_rtx_MINUS (GET_MODE (tem),
26692                                tem,
26693                                gen_rtx_SYMBOL_REF (Pmode,
26694                                                    ggc_strdup (labelpc)));
26695           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26696         }
26697       else
26698         /* Output ".word .LTHUNKn".  */
26699         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26700
26701       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26702         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26703     }
26704   else
26705     {
26706       fputs ("\tb\t", file);
26707       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26708       if (NEED_PLT_RELOC)
26709         fputs ("(PLT)", file);
26710       fputc ('\n', file);
26711     }
26712
26713   final_end_function ();
26714 }
26715
26716 /* MI thunk handling for TARGET_32BIT.  */
26717
26718 static void
26719 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26720                        HOST_WIDE_INT vcall_offset, tree function)
26721 {
26722   /* On ARM, this_regno is R0 or R1 depending on
26723      whether the function returns an aggregate or not.
26724   */
26725   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26726                                        function)
26727                     ? R1_REGNUM : R0_REGNUM);
26728
26729   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26730   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26731   reload_completed = 1;
26732   emit_note (NOTE_INSN_PROLOGUE_END);
26733
26734   /* Add DELTA to THIS_RTX.  */
26735   if (delta != 0)
26736     arm_split_constant (PLUS, Pmode, NULL_RTX,
26737                         delta, this_rtx, this_rtx, false);
26738
26739   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26740   if (vcall_offset != 0)
26741     {
26742       /* Load *THIS_RTX.  */
26743       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26744       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26745       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26746                           false);
26747       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26748       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26749       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26750     }
26751
26752   /* Generate a tail call to the target function.  */
26753   if (!TREE_USED (function))
26754     {
26755       assemble_external (function);
26756       TREE_USED (function) = 1;
26757     }
26758   rtx funexp = XEXP (DECL_RTL (function), 0);
26759   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26760   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26761   SIBLING_CALL_P (insn) = 1;
26762
26763   insn = get_insns ();
26764   shorten_branches (insn);
26765   final_start_function (insn, file, 1);
26766   final (insn, file, 1);
26767   final_end_function ();
26768
26769   /* Stop pretending this is a post-reload pass.  */
26770   reload_completed = 0;
26771 }
26772
26773 /* Output code to add DELTA to the first argument, and then jump
26774    to FUNCTION.  Used for C++ multiple inheritance.  */
26775
26776 static void
26777 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26778                      HOST_WIDE_INT vcall_offset, tree function)
26779 {
26780   if (TARGET_32BIT)
26781     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26782   else
26783     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26784 }
26785
26786 int
26787 arm_emit_vector_const (FILE *file, rtx x)
26788 {
26789   int i;
26790   const char * pattern;
26791
26792   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26793
26794   switch (GET_MODE (x))
26795     {
26796     case E_V2SImode: pattern = "%08x"; break;
26797     case E_V4HImode: pattern = "%04x"; break;
26798     case E_V8QImode: pattern = "%02x"; break;
26799     default:       gcc_unreachable ();
26800     }
26801
26802   fprintf (file, "0x");
26803   for (i = CONST_VECTOR_NUNITS (x); i--;)
26804     {
26805       rtx element;
26806
26807       element = CONST_VECTOR_ELT (x, i);
26808       fprintf (file, pattern, INTVAL (element));
26809     }
26810
26811   return 1;
26812 }
26813
26814 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26815    HFmode constant pool entries are actually loaded with ldr.  */
26816 void
26817 arm_emit_fp16_const (rtx c)
26818 {
26819   long bits;
26820
26821   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26822   if (WORDS_BIG_ENDIAN)
26823     assemble_zeros (2);
26824   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26825   if (!WORDS_BIG_ENDIAN)
26826     assemble_zeros (2);
26827 }
26828
26829 const char *
26830 arm_output_load_gr (rtx *operands)
26831 {
26832   rtx reg;
26833   rtx offset;
26834   rtx wcgr;
26835   rtx sum;
26836
26837   if (!MEM_P (operands [1])
26838       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26839       || !REG_P (reg = XEXP (sum, 0))
26840       || !CONST_INT_P (offset = XEXP (sum, 1))
26841       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26842     return "wldrw%?\t%0, %1";
26843
26844   /* Fix up an out-of-range load of a GR register.  */
26845   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26846   wcgr = operands[0];
26847   operands[0] = reg;
26848   output_asm_insn ("ldr%?\t%0, %1", operands);
26849
26850   operands[0] = wcgr;
26851   operands[1] = reg;
26852   output_asm_insn ("tmcr%?\t%0, %1", operands);
26853   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26854
26855   return "";
26856 }
26857
26858 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26859
26860    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26861    named arg and all anonymous args onto the stack.
26862    XXX I know the prologue shouldn't be pushing registers, but it is faster
26863    that way.  */
26864
26865 static void
26866 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26867                             machine_mode mode,
26868                             tree type,
26869                             int *pretend_size,
26870                             int second_time ATTRIBUTE_UNUSED)
26871 {
26872   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26873   int nregs;
26874
26875   cfun->machine->uses_anonymous_args = 1;
26876   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26877     {
26878       nregs = pcum->aapcs_ncrn;
26879       if (nregs & 1)
26880         {
26881           int res = arm_needs_doubleword_align (mode, type);
26882           if (res < 0 && warn_psabi)
26883             inform (input_location, "parameter passing for argument of "
26884                     "type %qT changed in GCC 7.1", type);
26885           else if (res > 0)
26886             nregs++;
26887         }
26888     }
26889   else
26890     nregs = pcum->nregs;
26891
26892   if (nregs < NUM_ARG_REGS)
26893     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26894 }
26895
26896 /* We can't rely on the caller doing the proper promotion when
26897    using APCS or ATPCS.  */
26898
26899 static bool
26900 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26901 {
26902     return !TARGET_AAPCS_BASED;
26903 }
26904
26905 static machine_mode
26906 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26907                            machine_mode mode,
26908                            int *punsignedp ATTRIBUTE_UNUSED,
26909                            const_tree fntype ATTRIBUTE_UNUSED,
26910                            int for_return ATTRIBUTE_UNUSED)
26911 {
26912   if (GET_MODE_CLASS (mode) == MODE_INT
26913       && GET_MODE_SIZE (mode) < 4)
26914     return SImode;
26915
26916   return mode;
26917 }
26918
26919
26920 static bool
26921 arm_default_short_enums (void)
26922 {
26923   return ARM_DEFAULT_SHORT_ENUMS;
26924 }
26925
26926
26927 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26928
26929 static bool
26930 arm_align_anon_bitfield (void)
26931 {
26932   return TARGET_AAPCS_BASED;
26933 }
26934
26935
26936 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26937
26938 static tree
26939 arm_cxx_guard_type (void)
26940 {
26941   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26942 }
26943
26944
26945 /* The EABI says test the least significant bit of a guard variable.  */
26946
26947 static bool
26948 arm_cxx_guard_mask_bit (void)
26949 {
26950   return TARGET_AAPCS_BASED;
26951 }
26952
26953
26954 /* The EABI specifies that all array cookies are 8 bytes long.  */
26955
26956 static tree
26957 arm_get_cookie_size (tree type)
26958 {
26959   tree size;
26960
26961   if (!TARGET_AAPCS_BASED)
26962     return default_cxx_get_cookie_size (type);
26963
26964   size = build_int_cst (sizetype, 8);
26965   return size;
26966 }
26967
26968
26969 /* The EABI says that array cookies should also contain the element size.  */
26970
26971 static bool
26972 arm_cookie_has_size (void)
26973 {
26974   return TARGET_AAPCS_BASED;
26975 }
26976
26977
26978 /* The EABI says constructors and destructors should return a pointer to
26979    the object constructed/destroyed.  */
26980
26981 static bool
26982 arm_cxx_cdtor_returns_this (void)
26983 {
26984   return TARGET_AAPCS_BASED;
26985 }
26986
26987 /* The EABI says that an inline function may never be the key
26988    method.  */
26989
26990 static bool
26991 arm_cxx_key_method_may_be_inline (void)
26992 {
26993   return !TARGET_AAPCS_BASED;
26994 }
26995
26996 static void
26997 arm_cxx_determine_class_data_visibility (tree decl)
26998 {
26999   if (!TARGET_AAPCS_BASED
27000       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27001     return;
27002
27003   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27004      is exported.  However, on systems without dynamic vague linkage,
27005      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27006   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27007     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27008   else
27009     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27010   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27011 }
27012
27013 static bool
27014 arm_cxx_class_data_always_comdat (void)
27015 {
27016   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27017      vague linkage if the class has no key function.  */
27018   return !TARGET_AAPCS_BASED;
27019 }
27020
27021
27022 /* The EABI says __aeabi_atexit should be used to register static
27023    destructors.  */
27024
27025 static bool
27026 arm_cxx_use_aeabi_atexit (void)
27027 {
27028   return TARGET_AAPCS_BASED;
27029 }
27030
27031
27032 void
27033 arm_set_return_address (rtx source, rtx scratch)
27034 {
27035   arm_stack_offsets *offsets;
27036   HOST_WIDE_INT delta;
27037   rtx addr, mem;
27038   unsigned long saved_regs;
27039
27040   offsets = arm_get_frame_offsets ();
27041   saved_regs = offsets->saved_regs_mask;
27042
27043   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27044     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27045   else
27046     {
27047       if (frame_pointer_needed)
27048         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27049       else
27050         {
27051           /* LR will be the first saved register.  */
27052           delta = offsets->outgoing_args - (offsets->frame + 4);
27053
27054
27055           if (delta >= 4096)
27056             {
27057               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27058                                      GEN_INT (delta & ~4095)));
27059               addr = scratch;
27060               delta &= 4095;
27061             }
27062           else
27063             addr = stack_pointer_rtx;
27064
27065           addr = plus_constant (Pmode, addr, delta);
27066         }
27067
27068       /* The store needs to be marked to prevent DSE from deleting
27069          it as dead if it is based on fp.  */
27070       mem = gen_frame_mem (Pmode, addr);
27071       MEM_VOLATILE_P (mem) = true;
27072       emit_move_insn (mem, source);
27073     }
27074 }
27075
27076
27077 void
27078 thumb_set_return_address (rtx source, rtx scratch)
27079 {
27080   arm_stack_offsets *offsets;
27081   HOST_WIDE_INT delta;
27082   HOST_WIDE_INT limit;
27083   int reg;
27084   rtx addr, mem;
27085   unsigned long mask;
27086
27087   emit_use (source);
27088
27089   offsets = arm_get_frame_offsets ();
27090   mask = offsets->saved_regs_mask;
27091   if (mask & (1 << LR_REGNUM))
27092     {
27093       limit = 1024;
27094       /* Find the saved regs.  */
27095       if (frame_pointer_needed)
27096         {
27097           delta = offsets->soft_frame - offsets->saved_args;
27098           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27099           if (TARGET_THUMB1)
27100             limit = 128;
27101         }
27102       else
27103         {
27104           delta = offsets->outgoing_args - offsets->saved_args;
27105           reg = SP_REGNUM;
27106         }
27107       /* Allow for the stack frame.  */
27108       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27109         delta -= 16;
27110       /* The link register is always the first saved register.  */
27111       delta -= 4;
27112
27113       /* Construct the address.  */
27114       addr = gen_rtx_REG (SImode, reg);
27115       if (delta > limit)
27116         {
27117           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27118           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27119           addr = scratch;
27120         }
27121       else
27122         addr = plus_constant (Pmode, addr, delta);
27123
27124       /* The store needs to be marked to prevent DSE from deleting
27125          it as dead if it is based on fp.  */
27126       mem = gen_frame_mem (Pmode, addr);
27127       MEM_VOLATILE_P (mem) = true;
27128       emit_move_insn (mem, source);
27129     }
27130   else
27131     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27132 }
27133
27134 /* Implements target hook vector_mode_supported_p.  */
27135 bool
27136 arm_vector_mode_supported_p (machine_mode mode)
27137 {
27138   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27139   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27140       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27141       || mode == V2DImode || mode == V8HFmode))
27142     return true;
27143
27144   if ((TARGET_NEON || TARGET_IWMMXT)
27145       && ((mode == V2SImode)
27146           || (mode == V4HImode)
27147           || (mode == V8QImode)))
27148     return true;
27149
27150   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27151       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27152       || mode == V2HAmode))
27153     return true;
27154
27155   return false;
27156 }
27157
27158 /* Implements target hook array_mode_supported_p.  */
27159
27160 static bool
27161 arm_array_mode_supported_p (machine_mode mode,
27162                             unsigned HOST_WIDE_INT nelems)
27163 {
27164   if (TARGET_NEON
27165       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27166       && (nelems >= 2 && nelems <= 4))
27167     return true;
27168
27169   return false;
27170 }
27171
27172 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27173    registers when autovectorizing for Neon, at least until multiple vector
27174    widths are supported properly by the middle-end.  */
27175
27176 static machine_mode
27177 arm_preferred_simd_mode (scalar_mode mode)
27178 {
27179   if (TARGET_NEON)
27180     switch (mode)
27181       {
27182       case E_SFmode:
27183         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27184       case E_SImode:
27185         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27186       case E_HImode:
27187         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27188       case E_QImode:
27189         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27190       case E_DImode:
27191         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27192           return V2DImode;
27193         break;
27194
27195       default:;
27196       }
27197
27198   if (TARGET_REALLY_IWMMXT)
27199     switch (mode)
27200       {
27201       case E_SImode:
27202         return V2SImode;
27203       case E_HImode:
27204         return V4HImode;
27205       case E_QImode:
27206         return V8QImode;
27207
27208       default:;
27209       }
27210
27211   return word_mode;
27212 }
27213
27214 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27215
27216    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27217    using r0-r4 for function arguments, r7 for the stack frame and don't have
27218    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27219    potentially problematic instructions accept high registers so this is not
27220    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27221    that require many low registers.  */
27222 static bool
27223 arm_class_likely_spilled_p (reg_class_t rclass)
27224 {
27225   if ((TARGET_THUMB1 && rclass == LO_REGS)
27226       || rclass  == CC_REG)
27227     return true;
27228
27229   return false;
27230 }
27231
27232 /* Implements target hook small_register_classes_for_mode_p.  */
27233 bool
27234 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27235 {
27236   return TARGET_THUMB1;
27237 }
27238
27239 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27240    ARM insns and therefore guarantee that the shift count is modulo 256.
27241    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27242    guarantee no particular behavior for out-of-range counts.  */
27243
27244 static unsigned HOST_WIDE_INT
27245 arm_shift_truncation_mask (machine_mode mode)
27246 {
27247   return mode == SImode ? 255 : 0;
27248 }
27249
27250
27251 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27252
27253 unsigned int
27254 arm_dbx_register_number (unsigned int regno)
27255 {
27256   if (regno < 16)
27257     return regno;
27258
27259   if (IS_VFP_REGNUM (regno))
27260     {
27261       /* See comment in arm_dwarf_register_span.  */
27262       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27263         return 64 + regno - FIRST_VFP_REGNUM;
27264       else
27265         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27266     }
27267
27268   if (IS_IWMMXT_GR_REGNUM (regno))
27269     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27270
27271   if (IS_IWMMXT_REGNUM (regno))
27272     return 112 + regno - FIRST_IWMMXT_REGNUM;
27273
27274   return DWARF_FRAME_REGISTERS;
27275 }
27276
27277 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27278    GCC models tham as 64 32-bit registers, so we need to describe this to
27279    the DWARF generation code.  Other registers can use the default.  */
27280 static rtx
27281 arm_dwarf_register_span (rtx rtl)
27282 {
27283   machine_mode mode;
27284   unsigned regno;
27285   rtx parts[16];
27286   int nregs;
27287   int i;
27288
27289   regno = REGNO (rtl);
27290   if (!IS_VFP_REGNUM (regno))
27291     return NULL_RTX;
27292
27293   /* XXX FIXME: The EABI defines two VFP register ranges:
27294         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27295         256-287: D0-D31
27296      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27297      corresponding D register.  Until GDB supports this, we shall use the
27298      legacy encodings.  We also use these encodings for D0-D15 for
27299      compatibility with older debuggers.  */
27300   mode = GET_MODE (rtl);
27301   if (GET_MODE_SIZE (mode) < 8)
27302     return NULL_RTX;
27303
27304   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27305     {
27306       nregs = GET_MODE_SIZE (mode) / 4;
27307       for (i = 0; i < nregs; i += 2)
27308         if (TARGET_BIG_END)
27309           {
27310             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27311             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27312           }
27313         else
27314           {
27315             parts[i] = gen_rtx_REG (SImode, regno + i);
27316             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27317           }
27318     }
27319   else
27320     {
27321       nregs = GET_MODE_SIZE (mode) / 8;
27322       for (i = 0; i < nregs; i++)
27323         parts[i] = gen_rtx_REG (DImode, regno + i);
27324     }
27325
27326   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27327 }
27328
27329 #if ARM_UNWIND_INFO
27330 /* Emit unwind directives for a store-multiple instruction or stack pointer
27331    push during alignment.
27332    These should only ever be generated by the function prologue code, so
27333    expect them to have a particular form.
27334    The store-multiple instruction sometimes pushes pc as the last register,
27335    although it should not be tracked into unwind information, or for -Os
27336    sometimes pushes some dummy registers before first register that needs
27337    to be tracked in unwind information; such dummy registers are there just
27338    to avoid separate stack adjustment, and will not be restored in the
27339    epilogue.  */
27340
27341 static void
27342 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27343 {
27344   int i;
27345   HOST_WIDE_INT offset;
27346   HOST_WIDE_INT nregs;
27347   int reg_size;
27348   unsigned reg;
27349   unsigned lastreg;
27350   unsigned padfirst = 0, padlast = 0;
27351   rtx e;
27352
27353   e = XVECEXP (p, 0, 0);
27354   gcc_assert (GET_CODE (e) == SET);
27355
27356   /* First insn will adjust the stack pointer.  */
27357   gcc_assert (GET_CODE (e) == SET
27358               && REG_P (SET_DEST (e))
27359               && REGNO (SET_DEST (e)) == SP_REGNUM
27360               && GET_CODE (SET_SRC (e)) == PLUS);
27361
27362   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27363   nregs = XVECLEN (p, 0) - 1;
27364   gcc_assert (nregs);
27365
27366   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27367   if (reg < 16)
27368     {
27369       /* For -Os dummy registers can be pushed at the beginning to
27370          avoid separate stack pointer adjustment.  */
27371       e = XVECEXP (p, 0, 1);
27372       e = XEXP (SET_DEST (e), 0);
27373       if (GET_CODE (e) == PLUS)
27374         padfirst = INTVAL (XEXP (e, 1));
27375       gcc_assert (padfirst == 0 || optimize_size);
27376       /* The function prologue may also push pc, but not annotate it as it is
27377          never restored.  We turn this into a stack pointer adjustment.  */
27378       e = XVECEXP (p, 0, nregs);
27379       e = XEXP (SET_DEST (e), 0);
27380       if (GET_CODE (e) == PLUS)
27381         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27382       else
27383         padlast = offset - 4;
27384       gcc_assert (padlast == 0 || padlast == 4);
27385       if (padlast == 4)
27386         fprintf (asm_out_file, "\t.pad #4\n");
27387       reg_size = 4;
27388       fprintf (asm_out_file, "\t.save {");
27389     }
27390   else if (IS_VFP_REGNUM (reg))
27391     {
27392       reg_size = 8;
27393       fprintf (asm_out_file, "\t.vsave {");
27394     }
27395   else
27396     /* Unknown register type.  */
27397     gcc_unreachable ();
27398
27399   /* If the stack increment doesn't match the size of the saved registers,
27400      something has gone horribly wrong.  */
27401   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27402
27403   offset = padfirst;
27404   lastreg = 0;
27405   /* The remaining insns will describe the stores.  */
27406   for (i = 1; i <= nregs; i++)
27407     {
27408       /* Expect (set (mem <addr>) (reg)).
27409          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27410       e = XVECEXP (p, 0, i);
27411       gcc_assert (GET_CODE (e) == SET
27412                   && MEM_P (SET_DEST (e))
27413                   && REG_P (SET_SRC (e)));
27414
27415       reg = REGNO (SET_SRC (e));
27416       gcc_assert (reg >= lastreg);
27417
27418       if (i != 1)
27419         fprintf (asm_out_file, ", ");
27420       /* We can't use %r for vfp because we need to use the
27421          double precision register names.  */
27422       if (IS_VFP_REGNUM (reg))
27423         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27424       else
27425         asm_fprintf (asm_out_file, "%r", reg);
27426
27427       if (flag_checking)
27428         {
27429           /* Check that the addresses are consecutive.  */
27430           e = XEXP (SET_DEST (e), 0);
27431           if (GET_CODE (e) == PLUS)
27432             gcc_assert (REG_P (XEXP (e, 0))
27433                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27434                         && CONST_INT_P (XEXP (e, 1))
27435                         && offset == INTVAL (XEXP (e, 1)));
27436           else
27437             gcc_assert (i == 1
27438                         && REG_P (e)
27439                         && REGNO (e) == SP_REGNUM);
27440           offset += reg_size;
27441         }
27442     }
27443   fprintf (asm_out_file, "}\n");
27444   if (padfirst)
27445     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27446 }
27447
27448 /*  Emit unwind directives for a SET.  */
27449
27450 static void
27451 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27452 {
27453   rtx e0;
27454   rtx e1;
27455   unsigned reg;
27456
27457   e0 = XEXP (p, 0);
27458   e1 = XEXP (p, 1);
27459   switch (GET_CODE (e0))
27460     {
27461     case MEM:
27462       /* Pushing a single register.  */
27463       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27464           || !REG_P (XEXP (XEXP (e0, 0), 0))
27465           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27466         abort ();
27467
27468       asm_fprintf (asm_out_file, "\t.save ");
27469       if (IS_VFP_REGNUM (REGNO (e1)))
27470         asm_fprintf(asm_out_file, "{d%d}\n",
27471                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27472       else
27473         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27474       break;
27475
27476     case REG:
27477       if (REGNO (e0) == SP_REGNUM)
27478         {
27479           /* A stack increment.  */
27480           if (GET_CODE (e1) != PLUS
27481               || !REG_P (XEXP (e1, 0))
27482               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27483               || !CONST_INT_P (XEXP (e1, 1)))
27484             abort ();
27485
27486           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27487                        -INTVAL (XEXP (e1, 1)));
27488         }
27489       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27490         {
27491           HOST_WIDE_INT offset;
27492
27493           if (GET_CODE (e1) == PLUS)
27494             {
27495               if (!REG_P (XEXP (e1, 0))
27496                   || !CONST_INT_P (XEXP (e1, 1)))
27497                 abort ();
27498               reg = REGNO (XEXP (e1, 0));
27499               offset = INTVAL (XEXP (e1, 1));
27500               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27501                            HARD_FRAME_POINTER_REGNUM, reg,
27502                            offset);
27503             }
27504           else if (REG_P (e1))
27505             {
27506               reg = REGNO (e1);
27507               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27508                            HARD_FRAME_POINTER_REGNUM, reg);
27509             }
27510           else
27511             abort ();
27512         }
27513       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27514         {
27515           /* Move from sp to reg.  */
27516           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27517         }
27518      else if (GET_CODE (e1) == PLUS
27519               && REG_P (XEXP (e1, 0))
27520               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27521               && CONST_INT_P (XEXP (e1, 1)))
27522         {
27523           /* Set reg to offset from sp.  */
27524           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27525                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27526         }
27527       else
27528         abort ();
27529       break;
27530
27531     default:
27532       abort ();
27533     }
27534 }
27535
27536
27537 /* Emit unwind directives for the given insn.  */
27538
27539 static void
27540 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27541 {
27542   rtx note, pat;
27543   bool handled_one = false;
27544
27545   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27546     return;
27547
27548   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27549       && (TREE_NOTHROW (current_function_decl)
27550           || crtl->all_throwers_are_sibcalls))
27551     return;
27552
27553   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27554     return;
27555
27556   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27557     {
27558       switch (REG_NOTE_KIND (note))
27559         {
27560         case REG_FRAME_RELATED_EXPR:
27561           pat = XEXP (note, 0);
27562           goto found;
27563
27564         case REG_CFA_REGISTER:
27565           pat = XEXP (note, 0);
27566           if (pat == NULL)
27567             {
27568               pat = PATTERN (insn);
27569               if (GET_CODE (pat) == PARALLEL)
27570                 pat = XVECEXP (pat, 0, 0);
27571             }
27572
27573           /* Only emitted for IS_STACKALIGN re-alignment.  */
27574           {
27575             rtx dest, src;
27576             unsigned reg;
27577
27578             src = SET_SRC (pat);
27579             dest = SET_DEST (pat);
27580
27581             gcc_assert (src == stack_pointer_rtx);
27582             reg = REGNO (dest);
27583             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27584                          reg + 0x90, reg);
27585           }
27586           handled_one = true;
27587           break;
27588
27589         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27590            to get correct dwarf information for shrink-wrap.  We should not
27591            emit unwind information for it because these are used either for
27592            pretend arguments or notes to adjust sp and restore registers from
27593            stack.  */
27594         case REG_CFA_DEF_CFA:
27595         case REG_CFA_ADJUST_CFA:
27596         case REG_CFA_RESTORE:
27597           return;
27598
27599         case REG_CFA_EXPRESSION:
27600         case REG_CFA_OFFSET:
27601           /* ??? Only handling here what we actually emit.  */
27602           gcc_unreachable ();
27603
27604         default:
27605           break;
27606         }
27607     }
27608   if (handled_one)
27609     return;
27610   pat = PATTERN (insn);
27611  found:
27612
27613   switch (GET_CODE (pat))
27614     {
27615     case SET:
27616       arm_unwind_emit_set (asm_out_file, pat);
27617       break;
27618
27619     case SEQUENCE:
27620       /* Store multiple.  */
27621       arm_unwind_emit_sequence (asm_out_file, pat);
27622       break;
27623
27624     default:
27625       abort();
27626     }
27627 }
27628
27629
27630 /* Output a reference from a function exception table to the type_info
27631    object X.  The EABI specifies that the symbol should be relocated by
27632    an R_ARM_TARGET2 relocation.  */
27633
27634 static bool
27635 arm_output_ttype (rtx x)
27636 {
27637   fputs ("\t.word\t", asm_out_file);
27638   output_addr_const (asm_out_file, x);
27639   /* Use special relocations for symbol references.  */
27640   if (!CONST_INT_P (x))
27641     fputs ("(TARGET2)", asm_out_file);
27642   fputc ('\n', asm_out_file);
27643
27644   return TRUE;
27645 }
27646
27647 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27648
27649 static void
27650 arm_asm_emit_except_personality (rtx personality)
27651 {
27652   fputs ("\t.personality\t", asm_out_file);
27653   output_addr_const (asm_out_file, personality);
27654   fputc ('\n', asm_out_file);
27655 }
27656 #endif /* ARM_UNWIND_INFO */
27657
27658 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27659
27660 static void
27661 arm_asm_init_sections (void)
27662 {
27663 #if ARM_UNWIND_INFO
27664   exception_section = get_unnamed_section (0, output_section_asm_op,
27665                                            "\t.handlerdata");
27666 #endif /* ARM_UNWIND_INFO */
27667
27668 #ifdef OBJECT_FORMAT_ELF
27669   if (target_pure_code)
27670     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27671 #endif
27672 }
27673
27674 /* Output unwind directives for the start/end of a function.  */
27675
27676 void
27677 arm_output_fn_unwind (FILE * f, bool prologue)
27678 {
27679   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27680     return;
27681
27682   if (prologue)
27683     fputs ("\t.fnstart\n", f);
27684   else
27685     {
27686       /* If this function will never be unwound, then mark it as such.
27687          The came condition is used in arm_unwind_emit to suppress
27688          the frame annotations.  */
27689       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27690           && (TREE_NOTHROW (current_function_decl)
27691               || crtl->all_throwers_are_sibcalls))
27692         fputs("\t.cantunwind\n", f);
27693
27694       fputs ("\t.fnend\n", f);
27695     }
27696 }
27697
27698 static bool
27699 arm_emit_tls_decoration (FILE *fp, rtx x)
27700 {
27701   enum tls_reloc reloc;
27702   rtx val;
27703
27704   val = XVECEXP (x, 0, 0);
27705   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27706
27707   output_addr_const (fp, val);
27708
27709   switch (reloc)
27710     {
27711     case TLS_GD32:
27712       fputs ("(tlsgd)", fp);
27713       break;
27714     case TLS_LDM32:
27715       fputs ("(tlsldm)", fp);
27716       break;
27717     case TLS_LDO32:
27718       fputs ("(tlsldo)", fp);
27719       break;
27720     case TLS_IE32:
27721       fputs ("(gottpoff)", fp);
27722       break;
27723     case TLS_LE32:
27724       fputs ("(tpoff)", fp);
27725       break;
27726     case TLS_DESCSEQ:
27727       fputs ("(tlsdesc)", fp);
27728       break;
27729     default:
27730       gcc_unreachable ();
27731     }
27732
27733   switch (reloc)
27734     {
27735     case TLS_GD32:
27736     case TLS_LDM32:
27737     case TLS_IE32:
27738     case TLS_DESCSEQ:
27739       fputs (" + (. - ", fp);
27740       output_addr_const (fp, XVECEXP (x, 0, 2));
27741       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27742       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27743       output_addr_const (fp, XVECEXP (x, 0, 3));
27744       fputc (')', fp);
27745       break;
27746     default:
27747       break;
27748     }
27749
27750   return TRUE;
27751 }
27752
27753 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27754
27755 static void
27756 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27757 {
27758   gcc_assert (size == 4);
27759   fputs ("\t.word\t", file);
27760   output_addr_const (file, x);
27761   fputs ("(tlsldo)", file);
27762 }
27763
27764 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27765
27766 static bool
27767 arm_output_addr_const_extra (FILE *fp, rtx x)
27768 {
27769   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27770     return arm_emit_tls_decoration (fp, x);
27771   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27772     {
27773       char label[256];
27774       int labelno = INTVAL (XVECEXP (x, 0, 0));
27775
27776       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27777       assemble_name_raw (fp, label);
27778
27779       return TRUE;
27780     }
27781   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27782     {
27783       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27784       if (GOT_PCREL)
27785         fputs ("+.", fp);
27786       fputs ("-(", fp);
27787       output_addr_const (fp, XVECEXP (x, 0, 0));
27788       fputc (')', fp);
27789       return TRUE;
27790     }
27791   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27792     {
27793       output_addr_const (fp, XVECEXP (x, 0, 0));
27794       if (GOT_PCREL)
27795         fputs ("+.", fp);
27796       fputs ("-(", fp);
27797       output_addr_const (fp, XVECEXP (x, 0, 1));
27798       fputc (')', fp);
27799       return TRUE;
27800     }
27801   else if (GET_CODE (x) == CONST_VECTOR)
27802     return arm_emit_vector_const (fp, x);
27803
27804   return FALSE;
27805 }
27806
27807 /* Output assembly for a shift instruction.
27808    SET_FLAGS determines how the instruction modifies the condition codes.
27809    0 - Do not set condition codes.
27810    1 - Set condition codes.
27811    2 - Use smallest instruction.  */
27812 const char *
27813 arm_output_shift(rtx * operands, int set_flags)
27814 {
27815   char pattern[100];
27816   static const char flag_chars[3] = {'?', '.', '!'};
27817   const char *shift;
27818   HOST_WIDE_INT val;
27819   char c;
27820
27821   c = flag_chars[set_flags];
27822   shift = shift_op(operands[3], &val);
27823   if (shift)
27824     {
27825       if (val != -1)
27826         operands[2] = GEN_INT(val);
27827       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27828     }
27829   else
27830     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27831
27832   output_asm_insn (pattern, operands);
27833   return "";
27834 }
27835
27836 /* Output assembly for a WMMX immediate shift instruction.  */
27837 const char *
27838 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27839 {
27840   int shift = INTVAL (operands[2]);
27841   char templ[50];
27842   machine_mode opmode = GET_MODE (operands[0]);
27843
27844   gcc_assert (shift >= 0);
27845
27846   /* If the shift value in the register versions is > 63 (for D qualifier),
27847      31 (for W qualifier) or 15 (for H qualifier).  */
27848   if (((opmode == V4HImode) && (shift > 15))
27849         || ((opmode == V2SImode) && (shift > 31))
27850         || ((opmode == DImode) && (shift > 63)))
27851   {
27852     if (wror_or_wsra)
27853       {
27854         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27855         output_asm_insn (templ, operands);
27856         if (opmode == DImode)
27857           {
27858             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27859             output_asm_insn (templ, operands);
27860           }
27861       }
27862     else
27863       {
27864         /* The destination register will contain all zeros.  */
27865         sprintf (templ, "wzero\t%%0");
27866         output_asm_insn (templ, operands);
27867       }
27868     return "";
27869   }
27870
27871   if ((opmode == DImode) && (shift > 32))
27872     {
27873       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27874       output_asm_insn (templ, operands);
27875       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27876       output_asm_insn (templ, operands);
27877     }
27878   else
27879     {
27880       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27881       output_asm_insn (templ, operands);
27882     }
27883   return "";
27884 }
27885
27886 /* Output assembly for a WMMX tinsr instruction.  */
27887 const char *
27888 arm_output_iwmmxt_tinsr (rtx *operands)
27889 {
27890   int mask = INTVAL (operands[3]);
27891   int i;
27892   char templ[50];
27893   int units = mode_nunits[GET_MODE (operands[0])];
27894   gcc_assert ((mask & (mask - 1)) == 0);
27895   for (i = 0; i < units; ++i)
27896     {
27897       if ((mask & 0x01) == 1)
27898         {
27899           break;
27900         }
27901       mask >>= 1;
27902     }
27903   gcc_assert (i < units);
27904   {
27905     switch (GET_MODE (operands[0]))
27906       {
27907       case E_V8QImode:
27908         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27909         break;
27910       case E_V4HImode:
27911         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27912         break;
27913       case E_V2SImode:
27914         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27915         break;
27916       default:
27917         gcc_unreachable ();
27918         break;
27919       }
27920     output_asm_insn (templ, operands);
27921   }
27922   return "";
27923 }
27924
27925 /* Output a Thumb-1 casesi dispatch sequence.  */
27926 const char *
27927 thumb1_output_casesi (rtx *operands)
27928 {
27929   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27930
27931   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27932
27933   switch (GET_MODE(diff_vec))
27934     {
27935     case E_QImode:
27936       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27937               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27938     case E_HImode:
27939       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27940               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27941     case E_SImode:
27942       return "bl\t%___gnu_thumb1_case_si";
27943     default:
27944       gcc_unreachable ();
27945     }
27946 }
27947
27948 /* Output a Thumb-2 casesi instruction.  */
27949 const char *
27950 thumb2_output_casesi (rtx *operands)
27951 {
27952   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27953
27954   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27955
27956   output_asm_insn ("cmp\t%0, %1", operands);
27957   output_asm_insn ("bhi\t%l3", operands);
27958   switch (GET_MODE(diff_vec))
27959     {
27960     case E_QImode:
27961       return "tbb\t[%|pc, %0]";
27962     case E_HImode:
27963       return "tbh\t[%|pc, %0, lsl #1]";
27964     case E_SImode:
27965       if (flag_pic)
27966         {
27967           output_asm_insn ("adr\t%4, %l2", operands);
27968           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27969           output_asm_insn ("add\t%4, %4, %5", operands);
27970           return "bx\t%4";
27971         }
27972       else
27973         {
27974           output_asm_insn ("adr\t%4, %l2", operands);
27975           return "ldr\t%|pc, [%4, %0, lsl #2]";
27976         }
27977     default:
27978       gcc_unreachable ();
27979     }
27980 }
27981
27982 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27983    per-core tuning structs.  */
27984 static int
27985 arm_issue_rate (void)
27986 {
27987   return current_tune->issue_rate;
27988 }
27989
27990 /* Return how many instructions should scheduler lookahead to choose the
27991    best one.  */
27992 static int
27993 arm_first_cycle_multipass_dfa_lookahead (void)
27994 {
27995   int issue_rate = arm_issue_rate ();
27996
27997   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27998 }
27999
28000 /* Enable modeling of L2 auto-prefetcher.  */
28001 static int
28002 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28003 {
28004   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28005 }
28006
28007 const char *
28008 arm_mangle_type (const_tree type)
28009 {
28010   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28011      has to be managled as if it is in the "std" namespace.  */
28012   if (TARGET_AAPCS_BASED
28013       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28014     return "St9__va_list";
28015
28016   /* Half-precision float.  */
28017   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28018     return "Dh";
28019
28020   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28021      builtin type.  */
28022   if (TYPE_NAME (type) != NULL)
28023     return arm_mangle_builtin_type (type);
28024
28025   /* Use the default mangling.  */
28026   return NULL;
28027 }
28028
28029 /* Order of allocation of core registers for Thumb: this allocation is
28030    written over the corresponding initial entries of the array
28031    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28032    first.  Saving and restoring a low register is usually cheaper than
28033    using a call-clobbered high register.  */
28034
28035 static const int thumb_core_reg_alloc_order[] =
28036 {
28037    3,  2,  1,  0,  4,  5,  6,  7,
28038   12, 14,  8,  9, 10, 11
28039 };
28040
28041 /* Adjust register allocation order when compiling for Thumb.  */
28042
28043 void
28044 arm_order_regs_for_local_alloc (void)
28045 {
28046   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28047   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28048   if (TARGET_THUMB)
28049     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28050             sizeof (thumb_core_reg_alloc_order));
28051 }
28052
28053 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28054
28055 bool
28056 arm_frame_pointer_required (void)
28057 {
28058   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28059     return true;
28060
28061   /* If the function receives nonlocal gotos, it needs to save the frame
28062      pointer in the nonlocal_goto_save_area object.  */
28063   if (cfun->has_nonlocal_label)
28064     return true;
28065
28066   /* The frame pointer is required for non-leaf APCS frames.  */
28067   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28068     return true;
28069
28070   /* If we are probing the stack in the prologue, we will have a faulting
28071      instruction prior to the stack adjustment and this requires a frame
28072      pointer if we want to catch the exception using the EABI unwinder.  */
28073   if (!IS_INTERRUPT (arm_current_func_type ())
28074       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28075           || flag_stack_clash_protection)
28076       && arm_except_unwind_info (&global_options) == UI_TARGET
28077       && cfun->can_throw_non_call_exceptions)
28078     {
28079       HOST_WIDE_INT size = get_frame_size ();
28080
28081       /* That's irrelevant if there is no stack adjustment.  */
28082       if (size <= 0)
28083         return false;
28084
28085       /* That's relevant only if there is a stack probe.  */
28086       if (crtl->is_leaf && !cfun->calls_alloca)
28087         {
28088           /* We don't have the final size of the frame so adjust.  */
28089           size += 32 * UNITS_PER_WORD;
28090           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28091             return true;
28092         }
28093       else
28094         return true;
28095     }
28096
28097   return false;
28098 }
28099
28100 /* Only thumb1 can't support conditional execution, so return true if
28101    the target is not thumb1.  */
28102 static bool
28103 arm_have_conditional_execution (void)
28104 {
28105   return !TARGET_THUMB1;
28106 }
28107
28108 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28109 static HOST_WIDE_INT
28110 arm_vector_alignment (const_tree type)
28111 {
28112   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28113
28114   if (TARGET_AAPCS_BASED)
28115     align = MIN (align, 64);
28116
28117   return align;
28118 }
28119
28120 static unsigned int
28121 arm_autovectorize_vector_sizes (void)
28122 {
28123   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28124 }
28125
28126 static bool
28127 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28128 {
28129   /* Vectors which aren't in packed structures will not be less aligned than
28130      the natural alignment of their element type, so this is safe.  */
28131   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28132     return !is_packed;
28133
28134   return default_builtin_vector_alignment_reachable (type, is_packed);
28135 }
28136
28137 static bool
28138 arm_builtin_support_vector_misalignment (machine_mode mode,
28139                                          const_tree type, int misalignment,
28140                                          bool is_packed)
28141 {
28142   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28143     {
28144       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28145
28146       if (is_packed)
28147         return align == 1;
28148
28149       /* If the misalignment is unknown, we should be able to handle the access
28150          so long as it is not to a member of a packed data structure.  */
28151       if (misalignment == -1)
28152         return true;
28153
28154       /* Return true if the misalignment is a multiple of the natural alignment
28155          of the vector's element type.  This is probably always going to be
28156          true in practice, since we've already established that this isn't a
28157          packed access.  */
28158       return ((misalignment % align) == 0);
28159     }
28160
28161   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28162                                                       is_packed);
28163 }
28164
28165 static void
28166 arm_conditional_register_usage (void)
28167 {
28168   int regno;
28169
28170   if (TARGET_THUMB1 && optimize_size)
28171     {
28172       /* When optimizing for size on Thumb-1, it's better not
28173         to use the HI regs, because of the overhead of
28174         stacking them.  */
28175       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28176         fixed_regs[regno] = call_used_regs[regno] = 1;
28177     }
28178
28179   /* The link register can be clobbered by any branch insn,
28180      but we have no way to track that at present, so mark
28181      it as unavailable.  */
28182   if (TARGET_THUMB1)
28183     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28184
28185   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28186     {
28187       /* VFPv3 registers are disabled when earlier VFP
28188          versions are selected due to the definition of
28189          LAST_VFP_REGNUM.  */
28190       for (regno = FIRST_VFP_REGNUM;
28191            regno <= LAST_VFP_REGNUM; ++ regno)
28192         {
28193           fixed_regs[regno] = 0;
28194           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28195             || regno >= FIRST_VFP_REGNUM + 32;
28196         }
28197     }
28198
28199   if (TARGET_REALLY_IWMMXT)
28200     {
28201       regno = FIRST_IWMMXT_GR_REGNUM;
28202       /* The 2002/10/09 revision of the XScale ABI has wCG0
28203          and wCG1 as call-preserved registers.  The 2002/11/21
28204          revision changed this so that all wCG registers are
28205          scratch registers.  */
28206       for (regno = FIRST_IWMMXT_GR_REGNUM;
28207            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28208         fixed_regs[regno] = 0;
28209       /* The XScale ABI has wR0 - wR9 as scratch registers,
28210          the rest as call-preserved registers.  */
28211       for (regno = FIRST_IWMMXT_REGNUM;
28212            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28213         {
28214           fixed_regs[regno] = 0;
28215           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28216         }
28217     }
28218
28219   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28220     {
28221       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28222       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28223     }
28224   else if (TARGET_APCS_STACK)
28225     {
28226       fixed_regs[10]     = 1;
28227       call_used_regs[10] = 1;
28228     }
28229   /* -mcaller-super-interworking reserves r11 for calls to
28230      _interwork_r11_call_via_rN().  Making the register global
28231      is an easy way of ensuring that it remains valid for all
28232      calls.  */
28233   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28234       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28235     {
28236       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28237       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28238       if (TARGET_CALLER_INTERWORKING)
28239         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28240     }
28241   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28242 }
28243
28244 static reg_class_t
28245 arm_preferred_rename_class (reg_class_t rclass)
28246 {
28247   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28248      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28249      and code size can be reduced.  */
28250   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28251     return LO_REGS;
28252   else
28253     return NO_REGS;
28254 }
28255
28256 /* Compute the attribute "length" of insn "*push_multi".
28257    So this function MUST be kept in sync with that insn pattern.  */
28258 int
28259 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28260 {
28261   int i, regno, hi_reg;
28262   int num_saves = XVECLEN (parallel_op, 0);
28263
28264   /* ARM mode.  */
28265   if (TARGET_ARM)
28266     return 4;
28267   /* Thumb1 mode.  */
28268   if (TARGET_THUMB1)
28269     return 2;
28270
28271   /* Thumb2 mode.  */
28272   regno = REGNO (first_op);
28273   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28274      list is 8-bit.  Normally this means all registers in the list must be
28275      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28276      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28277      with 16-bit encoding.  */
28278   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28279   for (i = 1; i < num_saves && !hi_reg; i++)
28280     {
28281       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28282       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28283     }
28284
28285   if (!hi_reg)
28286     return 2;
28287   return 4;
28288 }
28289
28290 /* Compute the attribute "length" of insn.  Currently, this function is used
28291    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28292    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28293    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28294    true if OPERANDS contains insn which explicit updates base register.  */
28295
28296 int
28297 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28298 {
28299   /* ARM mode.  */
28300   if (TARGET_ARM)
28301     return 4;
28302   /* Thumb1 mode.  */
28303   if (TARGET_THUMB1)
28304     return 2;
28305
28306   rtx parallel_op = operands[0];
28307   /* Initialize to elements number of PARALLEL.  */
28308   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28309   /* Initialize the value to base register.  */
28310   unsigned regno = REGNO (operands[1]);
28311   /* Skip return and write back pattern.
28312      We only need register pop pattern for later analysis.  */
28313   unsigned first_indx = 0;
28314   first_indx += return_pc ? 1 : 0;
28315   first_indx += write_back_p ? 1 : 0;
28316
28317   /* A pop operation can be done through LDM or POP.  If the base register is SP
28318      and if it's with write back, then a LDM will be alias of POP.  */
28319   bool pop_p = (regno == SP_REGNUM && write_back_p);
28320   bool ldm_p = !pop_p;
28321
28322   /* Check base register for LDM.  */
28323   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28324     return 4;
28325
28326   /* Check each register in the list.  */
28327   for (; indx >= first_indx; indx--)
28328     {
28329       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28330       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28331          comment in arm_attr_length_push_multi.  */
28332       if (REGNO_REG_CLASS (regno) == HI_REGS
28333           && (regno != PC_REGNUM || ldm_p))
28334         return 4;
28335     }
28336
28337   return 2;
28338 }
28339
28340 /* Compute the number of instructions emitted by output_move_double.  */
28341 int
28342 arm_count_output_move_double_insns (rtx *operands)
28343 {
28344   int count;
28345   rtx ops[2];
28346   /* output_move_double may modify the operands array, so call it
28347      here on a copy of the array.  */
28348   ops[0] = operands[0];
28349   ops[1] = operands[1];
28350   output_move_double (ops, false, &count);
28351   return count;
28352 }
28353
28354 int
28355 vfp3_const_double_for_fract_bits (rtx operand)
28356 {
28357   REAL_VALUE_TYPE r0;
28358
28359   if (!CONST_DOUBLE_P (operand))
28360     return 0;
28361
28362   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28363   if (exact_real_inverse (DFmode, &r0)
28364       && !REAL_VALUE_NEGATIVE (r0))
28365     {
28366       if (exact_real_truncate (DFmode, &r0))
28367         {
28368           HOST_WIDE_INT value = real_to_integer (&r0);
28369           value = value & 0xffffffff;
28370           if ((value != 0) && ( (value & (value - 1)) == 0))
28371             {
28372               int ret = exact_log2 (value);
28373               gcc_assert (IN_RANGE (ret, 0, 31));
28374               return ret;
28375             }
28376         }
28377     }
28378   return 0;
28379 }
28380
28381 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28382    log2 is in [1, 32], return that log2.  Otherwise return -1.
28383    This is used in the patterns for vcvt.s32.f32 floating-point to
28384    fixed-point conversions.  */
28385
28386 int
28387 vfp3_const_double_for_bits (rtx x)
28388 {
28389   const REAL_VALUE_TYPE *r;
28390
28391   if (!CONST_DOUBLE_P (x))
28392     return -1;
28393
28394   r = CONST_DOUBLE_REAL_VALUE (x);
28395
28396   if (REAL_VALUE_NEGATIVE (*r)
28397       || REAL_VALUE_ISNAN (*r)
28398       || REAL_VALUE_ISINF (*r)
28399       || !real_isinteger (r, SFmode))
28400     return -1;
28401
28402   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28403
28404 /* The exact_log2 above will have returned -1 if this is
28405    not an exact log2.  */
28406   if (!IN_RANGE (hwint, 1, 32))
28407     return -1;
28408
28409   return hwint;
28410 }
28411
28412 \f
28413 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28414
28415 static void
28416 arm_pre_atomic_barrier (enum memmodel model)
28417 {
28418   if (need_atomic_barrier_p (model, true))
28419     emit_insn (gen_memory_barrier ());
28420 }
28421
28422 static void
28423 arm_post_atomic_barrier (enum memmodel model)
28424 {
28425   if (need_atomic_barrier_p (model, false))
28426     emit_insn (gen_memory_barrier ());
28427 }
28428
28429 /* Emit the load-exclusive and store-exclusive instructions.
28430    Use acquire and release versions if necessary.  */
28431
28432 static void
28433 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28434 {
28435   rtx (*gen) (rtx, rtx);
28436
28437   if (acq)
28438     {
28439       switch (mode)
28440         {
28441         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28442         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28443         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28444         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28445         default:
28446           gcc_unreachable ();
28447         }
28448     }
28449   else
28450     {
28451       switch (mode)
28452         {
28453         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28454         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28455         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28456         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28457         default:
28458           gcc_unreachable ();
28459         }
28460     }
28461
28462   emit_insn (gen (rval, mem));
28463 }
28464
28465 static void
28466 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28467                           rtx mem, bool rel)
28468 {
28469   rtx (*gen) (rtx, rtx, rtx);
28470
28471   if (rel)
28472     {
28473       switch (mode)
28474         {
28475         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28476         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28477         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28478         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28479         default:
28480           gcc_unreachable ();
28481         }
28482     }
28483   else
28484     {
28485       switch (mode)
28486         {
28487         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28488         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28489         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28490         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28491         default:
28492           gcc_unreachable ();
28493         }
28494     }
28495
28496   emit_insn (gen (bval, rval, mem));
28497 }
28498
28499 /* Mark the previous jump instruction as unlikely.  */
28500
28501 static void
28502 emit_unlikely_jump (rtx insn)
28503 {
28504   rtx_insn *jump = emit_jump_insn (insn);
28505   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28506 }
28507
28508 /* Expand a compare and swap pattern.  */
28509
28510 void
28511 arm_expand_compare_and_swap (rtx operands[])
28512 {
28513   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28514   machine_mode mode;
28515   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28516
28517   bval = operands[0];
28518   rval = operands[1];
28519   mem = operands[2];
28520   oldval = operands[3];
28521   newval = operands[4];
28522   is_weak = operands[5];
28523   mod_s = operands[6];
28524   mod_f = operands[7];
28525   mode = GET_MODE (mem);
28526
28527   /* Normally the succ memory model must be stronger than fail, but in the
28528      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28529      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28530
28531   if (TARGET_HAVE_LDACQ
28532       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28533       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28534     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28535
28536   switch (mode)
28537     {
28538     case E_QImode:
28539     case E_HImode:
28540       /* For narrow modes, we're going to perform the comparison in SImode,
28541          so do the zero-extension now.  */
28542       rval = gen_reg_rtx (SImode);
28543       oldval = convert_modes (SImode, mode, oldval, true);
28544       /* FALLTHRU */
28545
28546     case E_SImode:
28547       /* Force the value into a register if needed.  We waited until after
28548          the zero-extension above to do this properly.  */
28549       if (!arm_add_operand (oldval, SImode))
28550         oldval = force_reg (SImode, oldval);
28551       break;
28552
28553     case E_DImode:
28554       if (!cmpdi_operand (oldval, mode))
28555         oldval = force_reg (mode, oldval);
28556       break;
28557
28558     default:
28559       gcc_unreachable ();
28560     }
28561
28562   if (TARGET_THUMB1)
28563     {
28564       switch (mode)
28565         {
28566         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28567         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28568         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28569         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28570         default:
28571           gcc_unreachable ();
28572         }
28573     }
28574   else
28575     {
28576       switch (mode)
28577         {
28578         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28579         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28580         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28581         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28582         default:
28583           gcc_unreachable ();
28584         }
28585     }
28586
28587   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28588   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28589
28590   if (mode == QImode || mode == HImode)
28591     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28592
28593   /* In all cases, we arrange for success to be signaled by Z set.
28594      This arrangement allows for the boolean result to be used directly
28595      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28596      boolean negation of the result is also stored in bval because Thumb-1
28597      backend lacks dependency tracking for CC flag due to flag-setting not
28598      being represented at RTL level.  */
28599   if (TARGET_THUMB1)
28600       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28601   else
28602     {
28603       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28604       emit_insn (gen_rtx_SET (bval, x));
28605     }
28606 }
28607
28608 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28609    another memory store between the load-exclusive and store-exclusive can
28610    reset the monitor from Exclusive to Open state.  This means we must wait
28611    until after reload to split the pattern, lest we get a register spill in
28612    the middle of the atomic sequence.  Success of the compare and swap is
28613    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28614    for Thumb-1 targets (ie. negation of the boolean value returned by
28615    atomic_compare_and_swapmode standard pattern in operand 0).  */
28616
28617 void
28618 arm_split_compare_and_swap (rtx operands[])
28619 {
28620   rtx rval, mem, oldval, newval, neg_bval;
28621   machine_mode mode;
28622   enum memmodel mod_s, mod_f;
28623   bool is_weak;
28624   rtx_code_label *label1, *label2;
28625   rtx x, cond;
28626
28627   rval = operands[1];
28628   mem = operands[2];
28629   oldval = operands[3];
28630   newval = operands[4];
28631   is_weak = (operands[5] != const0_rtx);
28632   mod_s = memmodel_from_int (INTVAL (operands[6]));
28633   mod_f = memmodel_from_int (INTVAL (operands[7]));
28634   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28635   mode = GET_MODE (mem);
28636
28637   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28638
28639   bool use_acquire = TARGET_HAVE_LDACQ
28640                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28641                           || is_mm_release (mod_s));
28642
28643   bool use_release = TARGET_HAVE_LDACQ
28644                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28645                           || is_mm_acquire (mod_s));
28646
28647   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28648      a full barrier is emitted after the store-release.  */
28649   if (is_armv8_sync)
28650     use_acquire = false;
28651
28652   /* Checks whether a barrier is needed and emits one accordingly.  */
28653   if (!(use_acquire || use_release))
28654     arm_pre_atomic_barrier (mod_s);
28655
28656   label1 = NULL;
28657   if (!is_weak)
28658     {
28659       label1 = gen_label_rtx ();
28660       emit_label (label1);
28661     }
28662   label2 = gen_label_rtx ();
28663
28664   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28665
28666   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28667      as required to communicate with arm_expand_compare_and_swap.  */
28668   if (TARGET_32BIT)
28669     {
28670       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28671       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28672       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28673                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28674       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28675     }
28676   else
28677     {
28678       emit_move_insn (neg_bval, const1_rtx);
28679       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28680       if (thumb1_cmpneg_operand (oldval, SImode))
28681         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28682                                                     label2, cond));
28683       else
28684         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28685     }
28686
28687   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28688
28689   /* Weak or strong, we want EQ to be true for success, so that we
28690      match the flags that we got from the compare above.  */
28691   if (TARGET_32BIT)
28692     {
28693       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28694       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28695       emit_insn (gen_rtx_SET (cond, x));
28696     }
28697
28698   if (!is_weak)
28699     {
28700       /* Z is set to boolean value of !neg_bval, as required to communicate
28701          with arm_expand_compare_and_swap.  */
28702       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28703       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28704     }
28705
28706   if (!is_mm_relaxed (mod_f))
28707     emit_label (label2);
28708
28709   /* Checks whether a barrier is needed and emits one accordingly.  */
28710   if (is_armv8_sync
28711       || !(use_acquire || use_release))
28712     arm_post_atomic_barrier (mod_s);
28713
28714   if (is_mm_relaxed (mod_f))
28715     emit_label (label2);
28716 }
28717
28718 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28719    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28720    operation).  Operation is performed on the content at MEM and on VALUE
28721    following the memory model MODEL_RTX.  The content at MEM before and after
28722    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28723    success of the operation is returned in COND.  Using a scratch register or
28724    an operand register for these determines what result is returned for that
28725    pattern.  */
28726
28727 void
28728 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28729                      rtx value, rtx model_rtx, rtx cond)
28730 {
28731   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28732   machine_mode mode = GET_MODE (mem);
28733   machine_mode wmode = (mode == DImode ? DImode : SImode);
28734   rtx_code_label *label;
28735   bool all_low_regs, bind_old_new;
28736   rtx x;
28737
28738   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28739
28740   bool use_acquire = TARGET_HAVE_LDACQ
28741                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28742                           || is_mm_release (model));
28743
28744   bool use_release = TARGET_HAVE_LDACQ
28745                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28746                           || is_mm_acquire (model));
28747
28748   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28749      a full barrier is emitted after the store-release.  */
28750   if (is_armv8_sync)
28751     use_acquire = false;
28752
28753   /* Checks whether a barrier is needed and emits one accordingly.  */
28754   if (!(use_acquire || use_release))
28755     arm_pre_atomic_barrier (model);
28756
28757   label = gen_label_rtx ();
28758   emit_label (label);
28759
28760   if (new_out)
28761     new_out = gen_lowpart (wmode, new_out);
28762   if (old_out)
28763     old_out = gen_lowpart (wmode, old_out);
28764   else
28765     old_out = new_out;
28766   value = simplify_gen_subreg (wmode, value, mode, 0);
28767
28768   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28769
28770   /* Does the operation require destination and first operand to use the same
28771      register?  This is decided by register constraints of relevant insn
28772      patterns in thumb1.md.  */
28773   gcc_assert (!new_out || REG_P (new_out));
28774   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28775                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28776                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28777   bind_old_new =
28778     (TARGET_THUMB1
28779      && code != SET
28780      && code != MINUS
28781      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28782
28783   /* We want to return the old value while putting the result of the operation
28784      in the same register as the old value so copy the old value over to the
28785      destination register and use that register for the operation.  */
28786   if (old_out && bind_old_new)
28787     {
28788       emit_move_insn (new_out, old_out);
28789       old_out = new_out;
28790     }
28791
28792   switch (code)
28793     {
28794     case SET:
28795       new_out = value;
28796       break;
28797
28798     case NOT:
28799       x = gen_rtx_AND (wmode, old_out, value);
28800       emit_insn (gen_rtx_SET (new_out, x));
28801       x = gen_rtx_NOT (wmode, new_out);
28802       emit_insn (gen_rtx_SET (new_out, x));
28803       break;
28804
28805     case MINUS:
28806       if (CONST_INT_P (value))
28807         {
28808           value = GEN_INT (-INTVAL (value));
28809           code = PLUS;
28810         }
28811       /* FALLTHRU */
28812
28813     case PLUS:
28814       if (mode == DImode)
28815         {
28816           /* DImode plus/minus need to clobber flags.  */
28817           /* The adddi3 and subdi3 patterns are incorrectly written so that
28818              they require matching operands, even when we could easily support
28819              three operands.  Thankfully, this can be fixed up post-splitting,
28820              as the individual add+adc patterns do accept three operands and
28821              post-reload cprop can make these moves go away.  */
28822           emit_move_insn (new_out, old_out);
28823           if (code == PLUS)
28824             x = gen_adddi3 (new_out, new_out, value);
28825           else
28826             x = gen_subdi3 (new_out, new_out, value);
28827           emit_insn (x);
28828           break;
28829         }
28830       /* FALLTHRU */
28831
28832     default:
28833       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28834       emit_insn (gen_rtx_SET (new_out, x));
28835       break;
28836     }
28837
28838   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28839                             use_release);
28840
28841   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28842   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28843
28844   /* Checks whether a barrier is needed and emits one accordingly.  */
28845   if (is_armv8_sync
28846       || !(use_acquire || use_release))
28847     arm_post_atomic_barrier (model);
28848 }
28849 \f
28850 #define MAX_VECT_LEN 16
28851
28852 struct expand_vec_perm_d
28853 {
28854   rtx target, op0, op1;
28855   auto_vec_perm_indices perm;
28856   machine_mode vmode;
28857   bool one_vector_p;
28858   bool testing_p;
28859 };
28860
28861 /* Generate a variable permutation.  */
28862
28863 static void
28864 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28865 {
28866   machine_mode vmode = GET_MODE (target);
28867   bool one_vector_p = rtx_equal_p (op0, op1);
28868
28869   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28870   gcc_checking_assert (GET_MODE (op0) == vmode);
28871   gcc_checking_assert (GET_MODE (op1) == vmode);
28872   gcc_checking_assert (GET_MODE (sel) == vmode);
28873   gcc_checking_assert (TARGET_NEON);
28874
28875   if (one_vector_p)
28876     {
28877       if (vmode == V8QImode)
28878         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28879       else
28880         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28881     }
28882   else
28883     {
28884       rtx pair;
28885
28886       if (vmode == V8QImode)
28887         {
28888           pair = gen_reg_rtx (V16QImode);
28889           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28890           pair = gen_lowpart (TImode, pair);
28891           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28892         }
28893       else
28894         {
28895           pair = gen_reg_rtx (OImode);
28896           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28897           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28898         }
28899     }
28900 }
28901
28902 void
28903 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28904 {
28905   machine_mode vmode = GET_MODE (target);
28906   unsigned int nelt = GET_MODE_NUNITS (vmode);
28907   bool one_vector_p = rtx_equal_p (op0, op1);
28908   rtx mask;
28909
28910   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28911      numbering of elements for big-endian, we must reverse the order.  */
28912   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28913
28914   /* The VTBL instruction does not use a modulo index, so we must take care
28915      of that ourselves.  */
28916   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28917   mask = gen_const_vec_duplicate (vmode, mask);
28918   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28919
28920   arm_expand_vec_perm_1 (target, op0, op1, sel);
28921 }
28922
28923 /* Map lane ordering between architectural lane order, and GCC lane order,
28924    taking into account ABI.  See comment above output_move_neon for details.  */
28925
28926 static int
28927 neon_endian_lane_map (machine_mode mode, int lane)
28928 {
28929   if (BYTES_BIG_ENDIAN)
28930   {
28931     int nelems = GET_MODE_NUNITS (mode);
28932     /* Reverse lane order.  */
28933     lane = (nelems - 1 - lane);
28934     /* Reverse D register order, to match ABI.  */
28935     if (GET_MODE_SIZE (mode) == 16)
28936       lane = lane ^ (nelems / 2);
28937   }
28938   return lane;
28939 }
28940
28941 /* Some permutations index into pairs of vectors, this is a helper function
28942    to map indexes into those pairs of vectors.  */
28943
28944 static int
28945 neon_pair_endian_lane_map (machine_mode mode, int lane)
28946 {
28947   int nelem = GET_MODE_NUNITS (mode);
28948   if (BYTES_BIG_ENDIAN)
28949     lane =
28950       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28951   return lane;
28952 }
28953
28954 /* Generate or test for an insn that supports a constant permutation.  */
28955
28956 /* Recognize patterns for the VUZP insns.  */
28957
28958 static bool
28959 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28960 {
28961   unsigned int i, odd, mask, nelt = d->perm.length ();
28962   rtx out0, out1, in0, in1;
28963   rtx (*gen)(rtx, rtx, rtx, rtx);
28964   int first_elem;
28965   int swap_nelt;
28966
28967   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28968     return false;
28969
28970   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28971      big endian pattern on 64 bit vectors, so we correct for that.  */
28972   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28973     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28974
28975   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28976
28977   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28978     odd = 0;
28979   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28980     odd = 1;
28981   else
28982     return false;
28983   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28984
28985   for (i = 0; i < nelt; i++)
28986     {
28987       unsigned elt =
28988         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28989       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28990         return false;
28991     }
28992
28993   /* Success!  */
28994   if (d->testing_p)
28995     return true;
28996
28997   switch (d->vmode)
28998     {
28999     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29000     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29001     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29002     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29003     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29004     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29005     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29006     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29007     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29008     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29009     default:
29010       gcc_unreachable ();
29011     }
29012
29013   in0 = d->op0;
29014   in1 = d->op1;
29015   if (swap_nelt != 0)
29016     std::swap (in0, in1);
29017
29018   out0 = d->target;
29019   out1 = gen_reg_rtx (d->vmode);
29020   if (odd)
29021     std::swap (out0, out1);
29022
29023   emit_insn (gen (out0, in0, in1, out1));
29024   return true;
29025 }
29026
29027 /* Recognize patterns for the VZIP insns.  */
29028
29029 static bool
29030 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29031 {
29032   unsigned int i, high, mask, nelt = d->perm.length ();
29033   rtx out0, out1, in0, in1;
29034   rtx (*gen)(rtx, rtx, rtx, rtx);
29035   int first_elem;
29036   bool is_swapped;
29037
29038   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29039     return false;
29040
29041   is_swapped = BYTES_BIG_ENDIAN;
29042
29043   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29044
29045   high = nelt / 2;
29046   if (first_elem == neon_endian_lane_map (d->vmode, high))
29047     ;
29048   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29049     high = 0;
29050   else
29051     return false;
29052   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29053
29054   for (i = 0; i < nelt / 2; i++)
29055     {
29056       unsigned elt =
29057         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29058       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29059           != elt)
29060         return false;
29061       elt =
29062         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29063       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29064           != elt)
29065         return false;
29066     }
29067
29068   /* Success!  */
29069   if (d->testing_p)
29070     return true;
29071
29072   switch (d->vmode)
29073     {
29074     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29075     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29076     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29077     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29078     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29079     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29080     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29081     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29082     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29083     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29084     default:
29085       gcc_unreachable ();
29086     }
29087
29088   in0 = d->op0;
29089   in1 = d->op1;
29090   if (is_swapped)
29091     std::swap (in0, in1);
29092
29093   out0 = d->target;
29094   out1 = gen_reg_rtx (d->vmode);
29095   if (high)
29096     std::swap (out0, out1);
29097
29098   emit_insn (gen (out0, in0, in1, out1));
29099   return true;
29100 }
29101
29102 /* Recognize patterns for the VREV insns.  */
29103
29104 static bool
29105 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29106 {
29107   unsigned int i, j, diff, nelt = d->perm.length ();
29108   rtx (*gen)(rtx, rtx);
29109
29110   if (!d->one_vector_p)
29111     return false;
29112
29113   diff = d->perm[0];
29114   switch (diff)
29115     {
29116     case 7:
29117       switch (d->vmode)
29118         {
29119         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29120         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29121         default:
29122           return false;
29123         }
29124       break;
29125     case 3:
29126       switch (d->vmode)
29127         {
29128         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29129         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29130         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29131         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29132         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29133         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29134         default:
29135           return false;
29136         }
29137       break;
29138     case 1:
29139       switch (d->vmode)
29140         {
29141         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29142         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29143         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29144         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29145         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29146         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29147         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29148         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29149         default:
29150           return false;
29151         }
29152       break;
29153     default:
29154       return false;
29155     }
29156
29157   for (i = 0; i < nelt ; i += diff + 1)
29158     for (j = 0; j <= diff; j += 1)
29159       {
29160         /* This is guaranteed to be true as the value of diff
29161            is 7, 3, 1 and we should have enough elements in the
29162            queue to generate this. Getting a vector mask with a
29163            value of diff other than these values implies that
29164            something is wrong by the time we get here.  */
29165         gcc_assert (i + j < nelt);
29166         if (d->perm[i + j] != i + diff - j)
29167           return false;
29168       }
29169
29170   /* Success! */
29171   if (d->testing_p)
29172     return true;
29173
29174   emit_insn (gen (d->target, d->op0));
29175   return true;
29176 }
29177
29178 /* Recognize patterns for the VTRN insns.  */
29179
29180 static bool
29181 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29182 {
29183   unsigned int i, odd, mask, nelt = d->perm.length ();
29184   rtx out0, out1, in0, in1;
29185   rtx (*gen)(rtx, rtx, rtx, rtx);
29186
29187   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29188     return false;
29189
29190   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29191   if (d->perm[0] == 0)
29192     odd = 0;
29193   else if (d->perm[0] == 1)
29194     odd = 1;
29195   else
29196     return false;
29197   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29198
29199   for (i = 0; i < nelt; i += 2)
29200     {
29201       if (d->perm[i] != i + odd)
29202         return false;
29203       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29204         return false;
29205     }
29206
29207   /* Success!  */
29208   if (d->testing_p)
29209     return true;
29210
29211   switch (d->vmode)
29212     {
29213     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29214     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29215     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29216     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29217     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29218     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29219     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29220     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29221     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29222     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29223     default:
29224       gcc_unreachable ();
29225     }
29226
29227   in0 = d->op0;
29228   in1 = d->op1;
29229   if (BYTES_BIG_ENDIAN)
29230     {
29231       std::swap (in0, in1);
29232       odd = !odd;
29233     }
29234
29235   out0 = d->target;
29236   out1 = gen_reg_rtx (d->vmode);
29237   if (odd)
29238     std::swap (out0, out1);
29239
29240   emit_insn (gen (out0, in0, in1, out1));
29241   return true;
29242 }
29243
29244 /* Recognize patterns for the VEXT insns.  */
29245
29246 static bool
29247 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29248 {
29249   unsigned int i, nelt = d->perm.length ();
29250   rtx (*gen) (rtx, rtx, rtx, rtx);
29251   rtx offset;
29252
29253   unsigned int location;
29254
29255   unsigned int next  = d->perm[0] + 1;
29256
29257   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29258   if (BYTES_BIG_ENDIAN)
29259     return false;
29260
29261   /* Check if the extracted indexes are increasing by one.  */
29262   for (i = 1; i < nelt; next++, i++)
29263     {
29264       /* If we hit the most significant element of the 2nd vector in
29265          the previous iteration, no need to test further.  */
29266       if (next == 2 * nelt)
29267         return false;
29268
29269       /* If we are operating on only one vector: it could be a
29270          rotation.  If there are only two elements of size < 64, let
29271          arm_evpc_neon_vrev catch it.  */
29272       if (d->one_vector_p && (next == nelt))
29273         {
29274           if ((nelt == 2) && (d->vmode != V2DImode))
29275             return false;
29276           else
29277             next = 0;
29278         }
29279
29280       if (d->perm[i] != next)
29281         return false;
29282     }
29283
29284   location = d->perm[0];
29285
29286   switch (d->vmode)
29287     {
29288     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29289     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29290     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29291     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29292     case E_V2SImode: gen = gen_neon_vextv2si; break;
29293     case E_V4SImode: gen = gen_neon_vextv4si; break;
29294     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29295     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29296     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29297     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29298     case E_V2DImode: gen = gen_neon_vextv2di; break;
29299     default:
29300       return false;
29301     }
29302
29303   /* Success! */
29304   if (d->testing_p)
29305     return true;
29306
29307   offset = GEN_INT (location);
29308   emit_insn (gen (d->target, d->op0, d->op1, offset));
29309   return true;
29310 }
29311
29312 /* The NEON VTBL instruction is a fully variable permuation that's even
29313    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29314    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29315    can do slightly better by expanding this as a constant where we don't
29316    have to apply a mask.  */
29317
29318 static bool
29319 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29320 {
29321   rtx rperm[MAX_VECT_LEN], sel;
29322   machine_mode vmode = d->vmode;
29323   unsigned int i, nelt = d->perm.length ();
29324
29325   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29326      numbering of elements for big-endian, we must reverse the order.  */
29327   if (BYTES_BIG_ENDIAN)
29328     return false;
29329
29330   if (d->testing_p)
29331     return true;
29332
29333   /* Generic code will try constant permutation twice.  Once with the
29334      original mode and again with the elements lowered to QImode.
29335      So wait and don't do the selector expansion ourselves.  */
29336   if (vmode != V8QImode && vmode != V16QImode)
29337     return false;
29338
29339   for (i = 0; i < nelt; ++i)
29340     rperm[i] = GEN_INT (d->perm[i]);
29341   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29342   sel = force_reg (vmode, sel);
29343
29344   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29345   return true;
29346 }
29347
29348 static bool
29349 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29350 {
29351   /* Check if the input mask matches vext before reordering the
29352      operands.  */
29353   if (TARGET_NEON)
29354     if (arm_evpc_neon_vext (d))
29355       return true;
29356
29357   /* The pattern matching functions above are written to look for a small
29358      number to begin the sequence (0, 1, N/2).  If we begin with an index
29359      from the second operand, we can swap the operands.  */
29360   unsigned int nelt = d->perm.length ();
29361   if (d->perm[0] >= nelt)
29362     {
29363       for (unsigned int i = 0; i < nelt; ++i)
29364         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29365
29366       std::swap (d->op0, d->op1);
29367     }
29368
29369   if (TARGET_NEON)
29370     {
29371       if (arm_evpc_neon_vuzp (d))
29372         return true;
29373       if (arm_evpc_neon_vzip (d))
29374         return true;
29375       if (arm_evpc_neon_vrev (d))
29376         return true;
29377       if (arm_evpc_neon_vtrn (d))
29378         return true;
29379       return arm_evpc_neon_vtbl (d);
29380     }
29381   return false;
29382 }
29383
29384 /* Expand a vec_perm_const pattern.  */
29385
29386 bool
29387 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29388 {
29389   struct expand_vec_perm_d d;
29390   int i, nelt, which;
29391
29392   d.target = target;
29393   d.op0 = op0;
29394   d.op1 = op1;
29395
29396   d.vmode = GET_MODE (target);
29397   gcc_assert (VECTOR_MODE_P (d.vmode));
29398   d.testing_p = false;
29399
29400   nelt = GET_MODE_NUNITS (d.vmode);
29401   d.perm.reserve (nelt);
29402   for (i = which = 0; i < nelt; ++i)
29403     {
29404       rtx e = XVECEXP (sel, 0, i);
29405       int ei = INTVAL (e) & (2 * nelt - 1);
29406       which |= (ei < nelt ? 1 : 2);
29407       d.perm.quick_push (ei);
29408     }
29409
29410   switch (which)
29411     {
29412     default:
29413       gcc_unreachable();
29414
29415     case 3:
29416       d.one_vector_p = false;
29417       if (!rtx_equal_p (op0, op1))
29418         break;
29419
29420       /* The elements of PERM do not suggest that only the first operand
29421          is used, but both operands are identical.  Allow easier matching
29422          of the permutation by folding the permutation into the single
29423          input vector.  */
29424       /* FALLTHRU */
29425     case 2:
29426       for (i = 0; i < nelt; ++i)
29427         d.perm[i] &= nelt - 1;
29428       d.op0 = op1;
29429       d.one_vector_p = true;
29430       break;
29431
29432     case 1:
29433       d.op1 = op0;
29434       d.one_vector_p = true;
29435       break;
29436     }
29437
29438   return arm_expand_vec_perm_const_1 (&d);
29439 }
29440
29441 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29442
29443 static bool
29444 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29445 {
29446   struct expand_vec_perm_d d;
29447   unsigned int i, nelt, which;
29448   bool ret;
29449
29450   d.vmode = vmode;
29451   d.testing_p = true;
29452   d.perm.safe_splice (sel);
29453
29454   /* Categorize the set of elements in the selector.  */
29455   nelt = GET_MODE_NUNITS (d.vmode);
29456   for (i = which = 0; i < nelt; ++i)
29457     {
29458       unsigned int e = d.perm[i];
29459       gcc_assert (e < 2 * nelt);
29460       which |= (e < nelt ? 1 : 2);
29461     }
29462
29463   /* For all elements from second vector, fold the elements to first.  */
29464   if (which == 2)
29465     for (i = 0; i < nelt; ++i)
29466       d.perm[i] -= nelt;
29467
29468   /* Check whether the mask can be applied to the vector type.  */
29469   d.one_vector_p = (which != 3);
29470
29471   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29472   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29473   if (!d.one_vector_p)
29474     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29475
29476   start_sequence ();
29477   ret = arm_expand_vec_perm_const_1 (&d);
29478   end_sequence ();
29479
29480   return ret;
29481 }
29482
29483 bool
29484 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29485 {
29486   /* If we are soft float and we do not have ldrd
29487      then all auto increment forms are ok.  */
29488   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29489     return true;
29490
29491   switch (code)
29492     {
29493       /* Post increment and Pre Decrement are supported for all
29494          instruction forms except for vector forms.  */
29495     case ARM_POST_INC:
29496     case ARM_PRE_DEC:
29497       if (VECTOR_MODE_P (mode))
29498         {
29499           if (code != ARM_PRE_DEC)
29500             return true;
29501           else
29502             return false;
29503         }
29504
29505       return true;
29506
29507     case ARM_POST_DEC:
29508     case ARM_PRE_INC:
29509       /* Without LDRD and mode size greater than
29510          word size, there is no point in auto-incrementing
29511          because ldm and stm will not have these forms.  */
29512       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29513         return false;
29514
29515       /* Vector and floating point modes do not support
29516          these auto increment forms.  */
29517       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29518         return false;
29519
29520       return true;
29521
29522     default:
29523       return false;
29524
29525     }
29526
29527   return false;
29528 }
29529
29530 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29531    on ARM, since we know that shifts by negative amounts are no-ops.
29532    Additionally, the default expansion code is not available or suitable
29533    for post-reload insn splits (this can occur when the register allocator
29534    chooses not to do a shift in NEON).
29535
29536    This function is used in both initial expand and post-reload splits, and
29537    handles all kinds of 64-bit shifts.
29538
29539    Input requirements:
29540     - It is safe for the input and output to be the same register, but
29541       early-clobber rules apply for the shift amount and scratch registers.
29542     - Shift by register requires both scratch registers.  In all other cases
29543       the scratch registers may be NULL.
29544     - Ashiftrt by a register also clobbers the CC register.  */
29545 void
29546 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29547                                rtx amount, rtx scratch1, rtx scratch2)
29548 {
29549   rtx out_high = gen_highpart (SImode, out);
29550   rtx out_low = gen_lowpart (SImode, out);
29551   rtx in_high = gen_highpart (SImode, in);
29552   rtx in_low = gen_lowpart (SImode, in);
29553
29554   /* Terminology:
29555         in = the register pair containing the input value.
29556         out = the destination register pair.
29557         up = the high- or low-part of each pair.
29558         down = the opposite part to "up".
29559      In a shift, we can consider bits to shift from "up"-stream to
29560      "down"-stream, so in a left-shift "up" is the low-part and "down"
29561      is the high-part of each register pair.  */
29562
29563   rtx out_up   = code == ASHIFT ? out_low : out_high;
29564   rtx out_down = code == ASHIFT ? out_high : out_low;
29565   rtx in_up   = code == ASHIFT ? in_low : in_high;
29566   rtx in_down = code == ASHIFT ? in_high : in_low;
29567
29568   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29569   gcc_assert (out
29570               && (REG_P (out) || GET_CODE (out) == SUBREG)
29571               && GET_MODE (out) == DImode);
29572   gcc_assert (in
29573               && (REG_P (in) || GET_CODE (in) == SUBREG)
29574               && GET_MODE (in) == DImode);
29575   gcc_assert (amount
29576               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29577                    && GET_MODE (amount) == SImode)
29578                   || CONST_INT_P (amount)));
29579   gcc_assert (scratch1 == NULL
29580               || (GET_CODE (scratch1) == SCRATCH)
29581               || (GET_MODE (scratch1) == SImode
29582                   && REG_P (scratch1)));
29583   gcc_assert (scratch2 == NULL
29584               || (GET_CODE (scratch2) == SCRATCH)
29585               || (GET_MODE (scratch2) == SImode
29586                   && REG_P (scratch2)));
29587   gcc_assert (!REG_P (out) || !REG_P (amount)
29588               || !HARD_REGISTER_P (out)
29589               || (REGNO (out) != REGNO (amount)
29590                   && REGNO (out) + 1 != REGNO (amount)));
29591
29592   /* Macros to make following code more readable.  */
29593   #define SUB_32(DEST,SRC) \
29594             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29595   #define RSB_32(DEST,SRC) \
29596             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29597   #define SUB_S_32(DEST,SRC) \
29598             gen_addsi3_compare0 ((DEST), (SRC), \
29599                                  GEN_INT (-32))
29600   #define SET(DEST,SRC) \
29601             gen_rtx_SET ((DEST), (SRC))
29602   #define SHIFT(CODE,SRC,AMOUNT) \
29603             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29604   #define LSHIFT(CODE,SRC,AMOUNT) \
29605             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29606                             SImode, (SRC), (AMOUNT))
29607   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29608             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29609                             SImode, (SRC), (AMOUNT))
29610   #define ORR(A,B) \
29611             gen_rtx_IOR (SImode, (A), (B))
29612   #define BRANCH(COND,LABEL) \
29613             gen_arm_cond_branch ((LABEL), \
29614                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29615                                                    const0_rtx), \
29616                                  cc_reg)
29617
29618   /* Shifts by register and shifts by constant are handled separately.  */
29619   if (CONST_INT_P (amount))
29620     {
29621       /* We have a shift-by-constant.  */
29622
29623       /* First, handle out-of-range shift amounts.
29624          In both cases we try to match the result an ARM instruction in a
29625          shift-by-register would give.  This helps reduce execution
29626          differences between optimization levels, but it won't stop other
29627          parts of the compiler doing different things.  This is "undefined
29628          behavior, in any case.  */
29629       if (INTVAL (amount) <= 0)
29630         emit_insn (gen_movdi (out, in));
29631       else if (INTVAL (amount) >= 64)
29632         {
29633           if (code == ASHIFTRT)
29634             {
29635               rtx const31_rtx = GEN_INT (31);
29636               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29637               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29638             }
29639           else
29640             emit_insn (gen_movdi (out, const0_rtx));
29641         }
29642
29643       /* Now handle valid shifts. */
29644       else if (INTVAL (amount) < 32)
29645         {
29646           /* Shifts by a constant less than 32.  */
29647           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29648
29649           /* Clearing the out register in DImode first avoids lots
29650              of spilling and results in less stack usage.
29651              Later this redundant insn is completely removed.
29652              Do that only if "in" and "out" are different registers.  */
29653           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29654             emit_insn (SET (out, const0_rtx));
29655           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29656           emit_insn (SET (out_down,
29657                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29658                                out_down)));
29659           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29660         }
29661       else
29662         {
29663           /* Shifts by a constant greater than 31.  */
29664           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29665
29666           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29667             emit_insn (SET (out, const0_rtx));
29668           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29669           if (code == ASHIFTRT)
29670             emit_insn (gen_ashrsi3 (out_up, in_up,
29671                                     GEN_INT (31)));
29672           else
29673             emit_insn (SET (out_up, const0_rtx));
29674         }
29675     }
29676   else
29677     {
29678       /* We have a shift-by-register.  */
29679       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29680
29681       /* This alternative requires the scratch registers.  */
29682       gcc_assert (scratch1 && REG_P (scratch1));
29683       gcc_assert (scratch2 && REG_P (scratch2));
29684
29685       /* We will need the values "amount-32" and "32-amount" later.
29686          Swapping them around now allows the later code to be more general. */
29687       switch (code)
29688         {
29689         case ASHIFT:
29690           emit_insn (SUB_32 (scratch1, amount));
29691           emit_insn (RSB_32 (scratch2, amount));
29692           break;
29693         case ASHIFTRT:
29694           emit_insn (RSB_32 (scratch1, amount));
29695           /* Also set CC = amount > 32.  */
29696           emit_insn (SUB_S_32 (scratch2, amount));
29697           break;
29698         case LSHIFTRT:
29699           emit_insn (RSB_32 (scratch1, amount));
29700           emit_insn (SUB_32 (scratch2, amount));
29701           break;
29702         default:
29703           gcc_unreachable ();
29704         }
29705
29706       /* Emit code like this:
29707
29708          arithmetic-left:
29709             out_down = in_down << amount;
29710             out_down = (in_up << (amount - 32)) | out_down;
29711             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29712             out_up = in_up << amount;
29713
29714          arithmetic-right:
29715             out_down = in_down >> amount;
29716             out_down = (in_up << (32 - amount)) | out_down;
29717             if (amount < 32)
29718               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29719             out_up = in_up << amount;
29720
29721          logical-right:
29722             out_down = in_down >> amount;
29723             out_down = (in_up << (32 - amount)) | out_down;
29724             if (amount < 32)
29725               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29726             out_up = in_up << amount;
29727
29728           The ARM and Thumb2 variants are the same but implemented slightly
29729           differently.  If this were only called during expand we could just
29730           use the Thumb2 case and let combine do the right thing, but this
29731           can also be called from post-reload splitters.  */
29732
29733       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29734
29735       if (!TARGET_THUMB2)
29736         {
29737           /* Emit code for ARM mode.  */
29738           emit_insn (SET (out_down,
29739                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29740           if (code == ASHIFTRT)
29741             {
29742               rtx_code_label *done_label = gen_label_rtx ();
29743               emit_jump_insn (BRANCH (LT, done_label));
29744               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29745                                              out_down)));
29746               emit_label (done_label);
29747             }
29748           else
29749             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29750                                            out_down)));
29751         }
29752       else
29753         {
29754           /* Emit code for Thumb2 mode.
29755              Thumb2 can't do shift and or in one insn.  */
29756           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29757           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29758
29759           if (code == ASHIFTRT)
29760             {
29761               rtx_code_label *done_label = gen_label_rtx ();
29762               emit_jump_insn (BRANCH (LT, done_label));
29763               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29764               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29765               emit_label (done_label);
29766             }
29767           else
29768             {
29769               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29770               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29771             }
29772         }
29773
29774       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29775     }
29776
29777   #undef SUB_32
29778   #undef RSB_32
29779   #undef SUB_S_32
29780   #undef SET
29781   #undef SHIFT
29782   #undef LSHIFT
29783   #undef REV_LSHIFT
29784   #undef ORR
29785   #undef BRANCH
29786 }
29787
29788 /* Returns true if the pattern is a valid symbolic address, which is either a
29789    symbol_ref or (symbol_ref + addend).
29790
29791    According to the ARM ELF ABI, the initial addend of REL-type relocations
29792    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29793    literal field of the instruction as a 16-bit signed value in the range
29794    -32768 <= A < 32768.  */
29795
29796 bool
29797 arm_valid_symbolic_address_p (rtx addr)
29798 {
29799   rtx xop0, xop1 = NULL_RTX;
29800   rtx tmp = addr;
29801
29802   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29803     return true;
29804
29805   /* (const (plus: symbol_ref const_int))  */
29806   if (GET_CODE (addr) == CONST)
29807     tmp = XEXP (addr, 0);
29808
29809   if (GET_CODE (tmp) == PLUS)
29810     {
29811       xop0 = XEXP (tmp, 0);
29812       xop1 = XEXP (tmp, 1);
29813
29814       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29815           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29816     }
29817
29818   return false;
29819 }
29820
29821 /* Returns true if a valid comparison operation and makes
29822    the operands in a form that is valid.  */
29823 bool
29824 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29825 {
29826   enum rtx_code code = GET_CODE (*comparison);
29827   int code_int;
29828   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29829     ? GET_MODE (*op2) : GET_MODE (*op1);
29830
29831   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29832
29833   if (code == UNEQ || code == LTGT)
29834     return false;
29835
29836   code_int = (int)code;
29837   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29838   PUT_CODE (*comparison, (enum rtx_code)code_int);
29839
29840   switch (mode)
29841     {
29842     case E_SImode:
29843       if (!arm_add_operand (*op1, mode))
29844         *op1 = force_reg (mode, *op1);
29845       if (!arm_add_operand (*op2, mode))
29846         *op2 = force_reg (mode, *op2);
29847       return true;
29848
29849     case E_DImode:
29850       if (!cmpdi_operand (*op1, mode))
29851         *op1 = force_reg (mode, *op1);
29852       if (!cmpdi_operand (*op2, mode))
29853         *op2 = force_reg (mode, *op2);
29854       return true;
29855
29856     case E_HFmode:
29857       if (!TARGET_VFP_FP16INST)
29858         break;
29859       /* FP16 comparisons are done in SF mode.  */
29860       mode = SFmode;
29861       *op1 = convert_to_mode (mode, *op1, 1);
29862       *op2 = convert_to_mode (mode, *op2, 1);
29863       /* Fall through.  */
29864     case E_SFmode:
29865     case E_DFmode:
29866       if (!vfp_compare_operand (*op1, mode))
29867         *op1 = force_reg (mode, *op1);
29868       if (!vfp_compare_operand (*op2, mode))
29869         *op2 = force_reg (mode, *op2);
29870       return true;
29871     default:
29872       break;
29873     }
29874
29875   return false;
29876
29877 }
29878
29879 /* Maximum number of instructions to set block of memory.  */
29880 static int
29881 arm_block_set_max_insns (void)
29882 {
29883   if (optimize_function_for_size_p (cfun))
29884     return 4;
29885   else
29886     return current_tune->max_insns_inline_memset;
29887 }
29888
29889 /* Return TRUE if it's profitable to set block of memory for
29890    non-vectorized case.  VAL is the value to set the memory
29891    with.  LENGTH is the number of bytes to set.  ALIGN is the
29892    alignment of the destination memory in bytes.  UNALIGNED_P
29893    is TRUE if we can only set the memory with instructions
29894    meeting alignment requirements.  USE_STRD_P is TRUE if we
29895    can use strd to set the memory.  */
29896 static bool
29897 arm_block_set_non_vect_profit_p (rtx val,
29898                                  unsigned HOST_WIDE_INT length,
29899                                  unsigned HOST_WIDE_INT align,
29900                                  bool unaligned_p, bool use_strd_p)
29901 {
29902   int num = 0;
29903   /* For leftovers in bytes of 0-7, we can set the memory block using
29904      strb/strh/str with minimum instruction number.  */
29905   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29906
29907   if (unaligned_p)
29908     {
29909       num = arm_const_inline_cost (SET, val);
29910       num += length / align + length % align;
29911     }
29912   else if (use_strd_p)
29913     {
29914       num = arm_const_double_inline_cost (val);
29915       num += (length >> 3) + leftover[length & 7];
29916     }
29917   else
29918     {
29919       num = arm_const_inline_cost (SET, val);
29920       num += (length >> 2) + leftover[length & 3];
29921     }
29922
29923   /* We may be able to combine last pair STRH/STRB into a single STR
29924      by shifting one byte back.  */
29925   if (unaligned_access && length > 3 && (length & 3) == 3)
29926     num--;
29927
29928   return (num <= arm_block_set_max_insns ());
29929 }
29930
29931 /* Return TRUE if it's profitable to set block of memory for
29932    vectorized case.  LENGTH is the number of bytes to set.
29933    ALIGN is the alignment of destination memory in bytes.
29934    MODE is the vector mode used to set the memory.  */
29935 static bool
29936 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29937                              unsigned HOST_WIDE_INT align,
29938                              machine_mode mode)
29939 {
29940   int num;
29941   bool unaligned_p = ((align & 3) != 0);
29942   unsigned int nelt = GET_MODE_NUNITS (mode);
29943
29944   /* Instruction loading constant value.  */
29945   num = 1;
29946   /* Instructions storing the memory.  */
29947   num += (length + nelt - 1) / nelt;
29948   /* Instructions adjusting the address expression.  Only need to
29949      adjust address expression if it's 4 bytes aligned and bytes
29950      leftover can only be stored by mis-aligned store instruction.  */
29951   if (!unaligned_p && (length & 3) != 0)
29952     num++;
29953
29954   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29955   if (!unaligned_p && mode == V16QImode)
29956     num--;
29957
29958   return (num <= arm_block_set_max_insns ());
29959 }
29960
29961 /* Set a block of memory using vectorization instructions for the
29962    unaligned case.  We fill the first LENGTH bytes of the memory
29963    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29964    the alignment requirement of memory.  Return TRUE if succeeded.  */
29965 static bool
29966 arm_block_set_unaligned_vect (rtx dstbase,
29967                               unsigned HOST_WIDE_INT length,
29968                               unsigned HOST_WIDE_INT value,
29969                               unsigned HOST_WIDE_INT align)
29970 {
29971   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29972   rtx dst, mem;
29973   rtx val_vec, reg;
29974   rtx (*gen_func) (rtx, rtx);
29975   machine_mode mode;
29976   unsigned HOST_WIDE_INT v = value;
29977   unsigned int offset = 0;
29978   gcc_assert ((align & 0x3) != 0);
29979   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29980   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29981   if (length >= nelt_v16)
29982     {
29983       mode = V16QImode;
29984       gen_func = gen_movmisalignv16qi;
29985     }
29986   else
29987     {
29988       mode = V8QImode;
29989       gen_func = gen_movmisalignv8qi;
29990     }
29991   nelt_mode = GET_MODE_NUNITS (mode);
29992   gcc_assert (length >= nelt_mode);
29993   /* Skip if it isn't profitable.  */
29994   if (!arm_block_set_vect_profit_p (length, align, mode))
29995     return false;
29996
29997   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29998   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29999
30000   v = sext_hwi (v, BITS_PER_WORD);
30001
30002   reg = gen_reg_rtx (mode);
30003   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30004   /* Emit instruction loading the constant value.  */
30005   emit_move_insn (reg, val_vec);
30006
30007   /* Handle nelt_mode bytes in a vector.  */
30008   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30009     {
30010       emit_insn ((*gen_func) (mem, reg));
30011       if (i + 2 * nelt_mode <= length)
30012         {
30013           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30014           offset += nelt_mode;
30015           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30016         }
30017     }
30018
30019   /* If there are not less than nelt_v8 bytes leftover, we must be in
30020      V16QI mode.  */
30021   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30022
30023   /* Handle (8, 16) bytes leftover.  */
30024   if (i + nelt_v8 < length)
30025     {
30026       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30027       offset += length - i;
30028       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30029
30030       /* We are shifting bytes back, set the alignment accordingly.  */
30031       if ((length & 1) != 0 && align >= 2)
30032         set_mem_align (mem, BITS_PER_UNIT);
30033
30034       emit_insn (gen_movmisalignv16qi (mem, reg));
30035     }
30036   /* Handle (0, 8] bytes leftover.  */
30037   else if (i < length && i + nelt_v8 >= length)
30038     {
30039       if (mode == V16QImode)
30040         reg = gen_lowpart (V8QImode, reg);
30041
30042       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30043                                               + (nelt_mode - nelt_v8))));
30044       offset += (length - i) + (nelt_mode - nelt_v8);
30045       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30046
30047       /* We are shifting bytes back, set the alignment accordingly.  */
30048       if ((length & 1) != 0 && align >= 2)
30049         set_mem_align (mem, BITS_PER_UNIT);
30050
30051       emit_insn (gen_movmisalignv8qi (mem, reg));
30052     }
30053
30054   return true;
30055 }
30056
30057 /* Set a block of memory using vectorization instructions for the
30058    aligned case.  We fill the first LENGTH bytes of the memory area
30059    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30060    alignment requirement of memory.  Return TRUE if succeeded.  */
30061 static bool
30062 arm_block_set_aligned_vect (rtx dstbase,
30063                             unsigned HOST_WIDE_INT length,
30064                             unsigned HOST_WIDE_INT value,
30065                             unsigned HOST_WIDE_INT align)
30066 {
30067   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30068   rtx dst, addr, mem;
30069   rtx val_vec, reg;
30070   machine_mode mode;
30071   unsigned HOST_WIDE_INT v = value;
30072   unsigned int offset = 0;
30073
30074   gcc_assert ((align & 0x3) == 0);
30075   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30076   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30077   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30078     mode = V16QImode;
30079   else
30080     mode = V8QImode;
30081
30082   nelt_mode = GET_MODE_NUNITS (mode);
30083   gcc_assert (length >= nelt_mode);
30084   /* Skip if it isn't profitable.  */
30085   if (!arm_block_set_vect_profit_p (length, align, mode))
30086     return false;
30087
30088   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30089
30090   v = sext_hwi (v, BITS_PER_WORD);
30091
30092   reg = gen_reg_rtx (mode);
30093   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30094   /* Emit instruction loading the constant value.  */
30095   emit_move_insn (reg, val_vec);
30096
30097   i = 0;
30098   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30099   if (mode == V16QImode)
30100     {
30101       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30102       emit_insn (gen_movmisalignv16qi (mem, reg));
30103       i += nelt_mode;
30104       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30105       if (i + nelt_v8 < length && i + nelt_v16 > length)
30106         {
30107           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30108           offset += length - nelt_mode;
30109           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30110           /* We are shifting bytes back, set the alignment accordingly.  */
30111           if ((length & 0x3) == 0)
30112             set_mem_align (mem, BITS_PER_UNIT * 4);
30113           else if ((length & 0x1) == 0)
30114             set_mem_align (mem, BITS_PER_UNIT * 2);
30115           else
30116             set_mem_align (mem, BITS_PER_UNIT);
30117
30118           emit_insn (gen_movmisalignv16qi (mem, reg));
30119           return true;
30120         }
30121       /* Fall through for bytes leftover.  */
30122       mode = V8QImode;
30123       nelt_mode = GET_MODE_NUNITS (mode);
30124       reg = gen_lowpart (V8QImode, reg);
30125     }
30126
30127   /* Handle 8 bytes in a vector.  */
30128   for (; (i + nelt_mode <= length); i += nelt_mode)
30129     {
30130       addr = plus_constant (Pmode, dst, i);
30131       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30132       emit_move_insn (mem, reg);
30133     }
30134
30135   /* Handle single word leftover by shifting 4 bytes back.  We can
30136      use aligned access for this case.  */
30137   if (i + UNITS_PER_WORD == length)
30138     {
30139       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30140       offset += i - UNITS_PER_WORD;
30141       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30142       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30143       if (align > UNITS_PER_WORD)
30144         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30145
30146       emit_move_insn (mem, reg);
30147     }
30148   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30149      We have to use unaligned access for this case.  */
30150   else if (i < length)
30151     {
30152       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30153       offset += length - nelt_mode;
30154       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30155       /* We are shifting bytes back, set the alignment accordingly.  */
30156       if ((length & 1) == 0)
30157         set_mem_align (mem, BITS_PER_UNIT * 2);
30158       else
30159         set_mem_align (mem, BITS_PER_UNIT);
30160
30161       emit_insn (gen_movmisalignv8qi (mem, reg));
30162     }
30163
30164   return true;
30165 }
30166
30167 /* Set a block of memory using plain strh/strb instructions, only
30168    using instructions allowed by ALIGN on processor.  We fill the
30169    first LENGTH bytes of the memory area starting from DSTBASE
30170    with byte constant VALUE.  ALIGN is the alignment requirement
30171    of memory.  */
30172 static bool
30173 arm_block_set_unaligned_non_vect (rtx dstbase,
30174                                   unsigned HOST_WIDE_INT length,
30175                                   unsigned HOST_WIDE_INT value,
30176                                   unsigned HOST_WIDE_INT align)
30177 {
30178   unsigned int i;
30179   rtx dst, addr, mem;
30180   rtx val_exp, val_reg, reg;
30181   machine_mode mode;
30182   HOST_WIDE_INT v = value;
30183
30184   gcc_assert (align == 1 || align == 2);
30185
30186   if (align == 2)
30187     v |= (value << BITS_PER_UNIT);
30188
30189   v = sext_hwi (v, BITS_PER_WORD);
30190   val_exp = GEN_INT (v);
30191   /* Skip if it isn't profitable.  */
30192   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30193                                         align, true, false))
30194     return false;
30195
30196   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30197   mode = (align == 2 ? HImode : QImode);
30198   val_reg = force_reg (SImode, val_exp);
30199   reg = gen_lowpart (mode, val_reg);
30200
30201   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30202     {
30203       addr = plus_constant (Pmode, dst, i);
30204       mem = adjust_automodify_address (dstbase, mode, addr, i);
30205       emit_move_insn (mem, reg);
30206     }
30207
30208   /* Handle single byte leftover.  */
30209   if (i + 1 == length)
30210     {
30211       reg = gen_lowpart (QImode, val_reg);
30212       addr = plus_constant (Pmode, dst, i);
30213       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30214       emit_move_insn (mem, reg);
30215       i++;
30216     }
30217
30218   gcc_assert (i == length);
30219   return true;
30220 }
30221
30222 /* Set a block of memory using plain strd/str/strh/strb instructions,
30223    to permit unaligned copies on processors which support unaligned
30224    semantics for those instructions.  We fill the first LENGTH bytes
30225    of the memory area starting from DSTBASE with byte constant VALUE.
30226    ALIGN is the alignment requirement of memory.  */
30227 static bool
30228 arm_block_set_aligned_non_vect (rtx dstbase,
30229                                 unsigned HOST_WIDE_INT length,
30230                                 unsigned HOST_WIDE_INT value,
30231                                 unsigned HOST_WIDE_INT align)
30232 {
30233   unsigned int i;
30234   rtx dst, addr, mem;
30235   rtx val_exp, val_reg, reg;
30236   unsigned HOST_WIDE_INT v;
30237   bool use_strd_p;
30238
30239   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30240                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30241
30242   v = (value | (value << 8) | (value << 16) | (value << 24));
30243   if (length < UNITS_PER_WORD)
30244     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30245
30246   if (use_strd_p)
30247     v |= (v << BITS_PER_WORD);
30248   else
30249     v = sext_hwi (v, BITS_PER_WORD);
30250
30251   val_exp = GEN_INT (v);
30252   /* Skip if it isn't profitable.  */
30253   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30254                                         align, false, use_strd_p))
30255     {
30256       if (!use_strd_p)
30257         return false;
30258
30259       /* Try without strd.  */
30260       v = (v >> BITS_PER_WORD);
30261       v = sext_hwi (v, BITS_PER_WORD);
30262       val_exp = GEN_INT (v);
30263       use_strd_p = false;
30264       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30265                                             align, false, use_strd_p))
30266         return false;
30267     }
30268
30269   i = 0;
30270   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30271   /* Handle double words using strd if possible.  */
30272   if (use_strd_p)
30273     {
30274       val_reg = force_reg (DImode, val_exp);
30275       reg = val_reg;
30276       for (; (i + 8 <= length); i += 8)
30277         {
30278           addr = plus_constant (Pmode, dst, i);
30279           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30280           emit_move_insn (mem, reg);
30281         }
30282     }
30283   else
30284     val_reg = force_reg (SImode, val_exp);
30285
30286   /* Handle words.  */
30287   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30288   for (; (i + 4 <= length); i += 4)
30289     {
30290       addr = plus_constant (Pmode, dst, i);
30291       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30292       if ((align & 3) == 0)
30293         emit_move_insn (mem, reg);
30294       else
30295         emit_insn (gen_unaligned_storesi (mem, reg));
30296     }
30297
30298   /* Merge last pair of STRH and STRB into a STR if possible.  */
30299   if (unaligned_access && i > 0 && (i + 3) == length)
30300     {
30301       addr = plus_constant (Pmode, dst, i - 1);
30302       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30303       /* We are shifting one byte back, set the alignment accordingly.  */
30304       if ((align & 1) == 0)
30305         set_mem_align (mem, BITS_PER_UNIT);
30306
30307       /* Most likely this is an unaligned access, and we can't tell at
30308          compilation time.  */
30309       emit_insn (gen_unaligned_storesi (mem, reg));
30310       return true;
30311     }
30312
30313   /* Handle half word leftover.  */
30314   if (i + 2 <= length)
30315     {
30316       reg = gen_lowpart (HImode, val_reg);
30317       addr = plus_constant (Pmode, dst, i);
30318       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30319       if ((align & 1) == 0)
30320         emit_move_insn (mem, reg);
30321       else
30322         emit_insn (gen_unaligned_storehi (mem, reg));
30323
30324       i += 2;
30325     }
30326
30327   /* Handle single byte leftover.  */
30328   if (i + 1 == length)
30329     {
30330       reg = gen_lowpart (QImode, val_reg);
30331       addr = plus_constant (Pmode, dst, i);
30332       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30333       emit_move_insn (mem, reg);
30334     }
30335
30336   return true;
30337 }
30338
30339 /* Set a block of memory using vectorization instructions for both
30340    aligned and unaligned cases.  We fill the first LENGTH bytes of
30341    the memory area starting from DSTBASE with byte constant VALUE.
30342    ALIGN is the alignment requirement of memory.  */
30343 static bool
30344 arm_block_set_vect (rtx dstbase,
30345                     unsigned HOST_WIDE_INT length,
30346                     unsigned HOST_WIDE_INT value,
30347                     unsigned HOST_WIDE_INT align)
30348 {
30349   /* Check whether we need to use unaligned store instruction.  */
30350   if (((align & 3) != 0 || (length & 3) != 0)
30351       /* Check whether unaligned store instruction is available.  */
30352       && (!unaligned_access || BYTES_BIG_ENDIAN))
30353     return false;
30354
30355   if ((align & 3) == 0)
30356     return arm_block_set_aligned_vect (dstbase, length, value, align);
30357   else
30358     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30359 }
30360
30361 /* Expand string store operation.  Firstly we try to do that by using
30362    vectorization instructions, then try with ARM unaligned access and
30363    double-word store if profitable.  OPERANDS[0] is the destination,
30364    OPERANDS[1] is the number of bytes, operands[2] is the value to
30365    initialize the memory, OPERANDS[3] is the known alignment of the
30366    destination.  */
30367 bool
30368 arm_gen_setmem (rtx *operands)
30369 {
30370   rtx dstbase = operands[0];
30371   unsigned HOST_WIDE_INT length;
30372   unsigned HOST_WIDE_INT value;
30373   unsigned HOST_WIDE_INT align;
30374
30375   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30376     return false;
30377
30378   length = UINTVAL (operands[1]);
30379   if (length > 64)
30380     return false;
30381
30382   value = (UINTVAL (operands[2]) & 0xFF);
30383   align = UINTVAL (operands[3]);
30384   if (TARGET_NEON && length >= 8
30385       && current_tune->string_ops_prefer_neon
30386       && arm_block_set_vect (dstbase, length, value, align))
30387     return true;
30388
30389   if (!unaligned_access && (align & 3) != 0)
30390     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30391
30392   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30393 }
30394
30395
30396 static bool
30397 arm_macro_fusion_p (void)
30398 {
30399   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30400 }
30401
30402 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30403    for MOVW / MOVT macro fusion.  */
30404
30405 static bool
30406 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30407 {
30408   /* We are trying to fuse
30409      movw imm / movt imm
30410     instructions as a group that gets scheduled together.  */
30411
30412   rtx set_dest = SET_DEST (curr_set);
30413
30414   if (GET_MODE (set_dest) != SImode)
30415     return false;
30416
30417   /* We are trying to match:
30418      prev (movw)  == (set (reg r0) (const_int imm16))
30419      curr (movt) == (set (zero_extract (reg r0)
30420                                         (const_int 16)
30421                                         (const_int 16))
30422                           (const_int imm16_1))
30423      or
30424      prev (movw) == (set (reg r1)
30425                           (high (symbol_ref ("SYM"))))
30426     curr (movt) == (set (reg r0)
30427                         (lo_sum (reg r1)
30428                                 (symbol_ref ("SYM"))))  */
30429
30430     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30431       {
30432         if (CONST_INT_P (SET_SRC (curr_set))
30433             && CONST_INT_P (SET_SRC (prev_set))
30434             && REG_P (XEXP (set_dest, 0))
30435             && REG_P (SET_DEST (prev_set))
30436             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30437           return true;
30438
30439       }
30440     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30441              && REG_P (SET_DEST (curr_set))
30442              && REG_P (SET_DEST (prev_set))
30443              && GET_CODE (SET_SRC (prev_set)) == HIGH
30444              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30445       return true;
30446
30447   return false;
30448 }
30449
30450 static bool
30451 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30452 {
30453   rtx prev_set = single_set (prev);
30454   rtx curr_set = single_set (curr);
30455
30456   if (!prev_set
30457       || !curr_set)
30458     return false;
30459
30460   if (any_condjump_p (curr))
30461     return false;
30462
30463   if (!arm_macro_fusion_p ())
30464     return false;
30465
30466   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30467       && aarch_crypto_can_dual_issue (prev, curr))
30468     return true;
30469
30470   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30471       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30472     return true;
30473
30474   return false;
30475 }
30476
30477 /* Return true iff the instruction fusion described by OP is enabled.  */
30478 bool
30479 arm_fusion_enabled_p (tune_params::fuse_ops op)
30480 {
30481   return current_tune->fusible_ops & op;
30482 }
30483
30484 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30485    scheduled for speculative execution.  Reject the long-running division
30486    and square-root instructions.  */
30487
30488 static bool
30489 arm_sched_can_speculate_insn (rtx_insn *insn)
30490 {
30491   switch (get_attr_type (insn))
30492     {
30493       case TYPE_SDIV:
30494       case TYPE_UDIV:
30495       case TYPE_FDIVS:
30496       case TYPE_FDIVD:
30497       case TYPE_FSQRTS:
30498       case TYPE_FSQRTD:
30499       case TYPE_NEON_FP_SQRT_S:
30500       case TYPE_NEON_FP_SQRT_D:
30501       case TYPE_NEON_FP_SQRT_S_Q:
30502       case TYPE_NEON_FP_SQRT_D_Q:
30503       case TYPE_NEON_FP_DIV_S:
30504       case TYPE_NEON_FP_DIV_D:
30505       case TYPE_NEON_FP_DIV_S_Q:
30506       case TYPE_NEON_FP_DIV_D_Q:
30507         return false;
30508       default:
30509         return true;
30510     }
30511 }
30512
30513 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30514
30515 static unsigned HOST_WIDE_INT
30516 arm_asan_shadow_offset (void)
30517 {
30518   return HOST_WIDE_INT_1U << 29;
30519 }
30520
30521
30522 /* This is a temporary fix for PR60655.  Ideally we need
30523    to handle most of these cases in the generic part but
30524    currently we reject minus (..) (sym_ref).  We try to
30525    ameliorate the case with minus (sym_ref1) (sym_ref2)
30526    where they are in the same section.  */
30527
30528 static bool
30529 arm_const_not_ok_for_debug_p (rtx p)
30530 {
30531   tree decl_op0 = NULL;
30532   tree decl_op1 = NULL;
30533
30534   if (GET_CODE (p) == UNSPEC)
30535     return true;
30536   if (GET_CODE (p) == MINUS)
30537     {
30538       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30539         {
30540           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30541           if (decl_op1
30542               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30543               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30544             {
30545               if ((VAR_P (decl_op1)
30546                    || TREE_CODE (decl_op1) == CONST_DECL)
30547                   && (VAR_P (decl_op0)
30548                       || TREE_CODE (decl_op0) == CONST_DECL))
30549                 return (get_variable_section (decl_op1, false)
30550                         != get_variable_section (decl_op0, false));
30551
30552               if (TREE_CODE (decl_op1) == LABEL_DECL
30553                   && TREE_CODE (decl_op0) == LABEL_DECL)
30554                 return (DECL_CONTEXT (decl_op1)
30555                         != DECL_CONTEXT (decl_op0));
30556             }
30557
30558           return true;
30559         }
30560     }
30561
30562   return false;
30563 }
30564
30565 /* return TRUE if x is a reference to a value in a constant pool */
30566 extern bool
30567 arm_is_constant_pool_ref (rtx x)
30568 {
30569   return (MEM_P (x)
30570           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30571           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30572 }
30573
30574 /* Remember the last target of arm_set_current_function.  */
30575 static GTY(()) tree arm_previous_fndecl;
30576
30577 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30578
30579 void
30580 save_restore_target_globals (tree new_tree)
30581 {
30582   /* If we have a previous state, use it.  */
30583   if (TREE_TARGET_GLOBALS (new_tree))
30584     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30585   else if (new_tree == target_option_default_node)
30586     restore_target_globals (&default_target_globals);
30587   else
30588     {
30589       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30590       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30591     }
30592
30593   arm_option_params_internal ();
30594 }
30595
30596 /* Invalidate arm_previous_fndecl.  */
30597
30598 void
30599 arm_reset_previous_fndecl (void)
30600 {
30601   arm_previous_fndecl = NULL_TREE;
30602 }
30603
30604 /* Establish appropriate back-end context for processing the function
30605    FNDECL.  The argument might be NULL to indicate processing at top
30606    level, outside of any function scope.  */
30607
30608 static void
30609 arm_set_current_function (tree fndecl)
30610 {
30611   if (!fndecl || fndecl == arm_previous_fndecl)
30612     return;
30613
30614   tree old_tree = (arm_previous_fndecl
30615                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30616                    : NULL_TREE);
30617
30618   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30619
30620   /* If current function has no attributes but previous one did,
30621      use the default node.  */
30622   if (! new_tree && old_tree)
30623     new_tree = target_option_default_node;
30624
30625   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30626      the default have been handled by save_restore_target_globals from
30627      arm_pragma_target_parse.  */
30628   if (old_tree == new_tree)
30629     return;
30630
30631   arm_previous_fndecl = fndecl;
30632
30633   /* First set the target options.  */
30634   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30635
30636   save_restore_target_globals (new_tree);
30637 }
30638
30639 /* Implement TARGET_OPTION_PRINT.  */
30640
30641 static void
30642 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30643 {
30644   int flags = ptr->x_target_flags;
30645   const char *fpu_name;
30646
30647   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30648               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30649
30650   fprintf (file, "%*sselected isa %s\n", indent, "",
30651            TARGET_THUMB2_P (flags) ? "thumb2" :
30652            TARGET_THUMB_P (flags) ? "thumb1" :
30653            "arm");
30654
30655   if (ptr->x_arm_arch_string)
30656     fprintf (file, "%*sselected architecture %s\n", indent, "",
30657              ptr->x_arm_arch_string);
30658
30659   if (ptr->x_arm_cpu_string)
30660     fprintf (file, "%*sselected CPU %s\n", indent, "",
30661              ptr->x_arm_cpu_string);
30662
30663   if (ptr->x_arm_tune_string)
30664     fprintf (file, "%*sselected tune %s\n", indent, "",
30665              ptr->x_arm_tune_string);
30666
30667   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30668 }
30669
30670 /* Hook to determine if one function can safely inline another.  */
30671
30672 static bool
30673 arm_can_inline_p (tree caller, tree callee)
30674 {
30675   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30676   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30677   bool can_inline = true;
30678
30679   struct cl_target_option *caller_opts
30680         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30681                                            : target_option_default_node);
30682
30683   struct cl_target_option *callee_opts
30684         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30685                                            : target_option_default_node);
30686
30687   if (callee_opts == caller_opts)
30688     return true;
30689
30690   /* Callee's ISA features should be a subset of the caller's.  */
30691   struct arm_build_target caller_target;
30692   struct arm_build_target callee_target;
30693   caller_target.isa = sbitmap_alloc (isa_num_bits);
30694   callee_target.isa = sbitmap_alloc (isa_num_bits);
30695
30696   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30697                               false);
30698   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30699                               false);
30700   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30701     can_inline = false;
30702
30703   sbitmap_free (caller_target.isa);
30704   sbitmap_free (callee_target.isa);
30705
30706   /* OK to inline between different modes.
30707      Function with mode specific instructions, e.g using asm,
30708      must be explicitly protected with noinline.  */
30709   return can_inline;
30710 }
30711
30712 /* Hook to fix function's alignment affected by target attribute.  */
30713
30714 static void
30715 arm_relayout_function (tree fndecl)
30716 {
30717   if (DECL_USER_ALIGN (fndecl))
30718     return;
30719
30720   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30721
30722   if (!callee_tree)
30723     callee_tree = target_option_default_node;
30724
30725   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30726   SET_DECL_ALIGN
30727     (fndecl,
30728      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30729 }
30730
30731 /* Inner function to process the attribute((target(...))), take an argument and
30732    set the current options from the argument.  If we have a list, recursively
30733    go over the list.  */
30734
30735 static bool
30736 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30737 {
30738   if (TREE_CODE (args) == TREE_LIST)
30739     {
30740       bool ret = true;
30741
30742       for (; args; args = TREE_CHAIN (args))
30743         if (TREE_VALUE (args)
30744             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30745           ret = false;
30746       return ret;
30747     }
30748
30749   else if (TREE_CODE (args) != STRING_CST)
30750     {
30751       error ("attribute %<target%> argument not a string");
30752       return false;
30753     }
30754
30755   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30756   char *q;
30757
30758   while ((q = strtok (argstr, ",")) != NULL)
30759     {
30760       while (ISSPACE (*q)) ++q;
30761
30762       argstr = NULL;
30763       if (!strncmp (q, "thumb", 5))
30764           opts->x_target_flags |= MASK_THUMB;
30765
30766       else if (!strncmp (q, "arm", 3))
30767           opts->x_target_flags &= ~MASK_THUMB;
30768
30769       else if (!strncmp (q, "fpu=", 4))
30770         {
30771           int fpu_index;
30772           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30773                                        &fpu_index, CL_TARGET))
30774             {
30775               error ("invalid fpu for target attribute or pragma %qs", q);
30776               return false;
30777             }
30778           if (fpu_index == TARGET_FPU_auto)
30779             {
30780               /* This doesn't really make sense until we support
30781                  general dynamic selection of the architecture and all
30782                  sub-features.  */
30783               sorry ("auto fpu selection not currently permitted here");
30784               return false;
30785             }
30786           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30787         }
30788       else if (!strncmp (q, "arch=", 5))
30789         {
30790           char* arch = q+5;
30791           const arch_option *arm_selected_arch
30792              = arm_parse_arch_option_name (all_architectures, "arch", arch);
30793
30794           if (!arm_selected_arch)
30795             {
30796               error ("invalid architecture for target attribute or pragma %qs",
30797                      q);
30798               return false;
30799             }
30800
30801           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30802         }
30803       else if (q[0] == '+')
30804         {
30805           opts->x_arm_arch_string
30806             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30807         }
30808       else
30809         {
30810           error ("unknown target attribute or pragma %qs", q);
30811           return false;
30812         }
30813     }
30814
30815   return true;
30816 }
30817
30818 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30819
30820 tree
30821 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30822                                  struct gcc_options *opts_set)
30823 {
30824   struct cl_target_option cl_opts;
30825
30826   if (!arm_valid_target_attribute_rec (args, opts))
30827     return NULL_TREE;
30828
30829   cl_target_option_save (&cl_opts, opts);
30830   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30831   arm_option_check_internal (opts);
30832   /* Do any overrides, such as global options arch=xxx.
30833      We do this since arm_active_target was overridden.  */
30834   arm_option_reconfigure_globals ();
30835   arm_options_perform_arch_sanity_checks ();
30836   arm_option_override_internal (opts, opts_set);
30837
30838   return build_target_option_node (opts);
30839 }
30840
30841 static void
30842 add_attribute  (const char * mode, tree *attributes)
30843 {
30844   size_t len = strlen (mode);
30845   tree value = build_string (len, mode);
30846
30847   TREE_TYPE (value) = build_array_type (char_type_node,
30848                                         build_index_type (size_int (len)));
30849
30850   *attributes = tree_cons (get_identifier ("target"),
30851                            build_tree_list (NULL_TREE, value),
30852                            *attributes);
30853 }
30854
30855 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30856
30857 static void
30858 arm_insert_attributes (tree fndecl, tree * attributes)
30859 {
30860   const char *mode;
30861
30862   if (! TARGET_FLIP_THUMB)
30863     return;
30864
30865   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30866       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30867    return;
30868
30869   /* Nested definitions must inherit mode.  */
30870   if (current_function_decl)
30871    {
30872      mode = TARGET_THUMB ? "thumb" : "arm";
30873      add_attribute (mode, attributes);
30874      return;
30875    }
30876
30877   /* If there is already a setting don't change it.  */
30878   if (lookup_attribute ("target", *attributes) != NULL)
30879     return;
30880
30881   mode = thumb_flipper ? "thumb" : "arm";
30882   add_attribute (mode, attributes);
30883
30884   thumb_flipper = !thumb_flipper;
30885 }
30886
30887 /* Hook to validate attribute((target("string"))).  */
30888
30889 static bool
30890 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30891                               tree args, int ARG_UNUSED (flags))
30892 {
30893   bool ret = true;
30894   struct gcc_options func_options;
30895   tree cur_tree, new_optimize;
30896   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30897
30898   /* Get the optimization options of the current function.  */
30899   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30900
30901   /* If the function changed the optimization levels as well as setting target
30902      options, start with the optimizations specified.  */
30903   if (!func_optimize)
30904     func_optimize = optimization_default_node;
30905
30906   /* Init func_options.  */
30907   memset (&func_options, 0, sizeof (func_options));
30908   init_options_struct (&func_options, NULL);
30909   lang_hooks.init_options_struct (&func_options);
30910
30911   /* Initialize func_options to the defaults.  */
30912   cl_optimization_restore (&func_options,
30913                            TREE_OPTIMIZATION (func_optimize));
30914
30915   cl_target_option_restore (&func_options,
30916                             TREE_TARGET_OPTION (target_option_default_node));
30917
30918   /* Set func_options flags with new target mode.  */
30919   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30920                                               &global_options_set);
30921
30922   if (cur_tree == NULL_TREE)
30923     ret = false;
30924
30925   new_optimize = build_optimization_node (&func_options);
30926
30927   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30928
30929   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30930
30931   finalize_options_struct (&func_options);
30932
30933   return ret;
30934 }
30935
30936 /* Match an ISA feature bitmap to a named FPU.  We always use the
30937    first entry that exactly matches the feature set, so that we
30938    effectively canonicalize the FPU name for the assembler.  */
30939 static const char*
30940 arm_identify_fpu_from_isa (sbitmap isa)
30941 {
30942   auto_sbitmap fpubits (isa_num_bits);
30943   auto_sbitmap cand_fpubits (isa_num_bits);
30944
30945   bitmap_and (fpubits, isa, isa_all_fpubits);
30946
30947   /* If there are no ISA feature bits relating to the FPU, we must be
30948      doing soft-float.  */
30949   if (bitmap_empty_p (fpubits))
30950     return "softvfp";
30951
30952   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30953     {
30954       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30955       if (bitmap_equal_p (fpubits, cand_fpubits))
30956         return all_fpus[i].name;
30957     }
30958   /* We must find an entry, or things have gone wrong.  */
30959   gcc_unreachable ();
30960 }
30961
30962 /* The last .arch and .fpu assembly strings that we printed.  */
30963 static std::string arm_last_printed_arch_string;
30964 static std::string arm_last_printed_fpu_string;
30965
30966 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
30967    by the function fndecl.  */
30968 void
30969 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30970 {
30971   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30972
30973   struct cl_target_option *targ_options;
30974   if (target_parts)
30975     targ_options = TREE_TARGET_OPTION (target_parts);
30976   else
30977     targ_options = TREE_TARGET_OPTION (target_option_current_node);
30978   gcc_assert (targ_options);
30979
30980   /* Only update the assembler .arch string if it is distinct from the last
30981      such string we printed.  */
30982   std::string arch_to_print = targ_options->x_arm_arch_string;
30983   if (arch_to_print != arm_last_printed_arch_string)
30984     {
30985       std::string arch_name
30986         = arch_to_print.substr (0, arch_to_print.find ("+"));
30987       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30988       const arch_option *arch
30989         = arm_parse_arch_option_name (all_architectures, "-march",
30990                                       targ_options->x_arm_arch_string);
30991       auto_sbitmap opt_bits (isa_num_bits);
30992
30993       gcc_assert (arch);
30994       if (arch->common.extensions)
30995         {
30996           for (const struct cpu_arch_extension *opt = arch->common.extensions;
30997                opt->name != NULL;
30998                opt++)
30999             {
31000               if (!opt->remove)
31001                 {
31002                   arm_initialize_isa (opt_bits, opt->isa_bits);
31003                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31004                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31005                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31006                                  opt->name);
31007                 }
31008              }
31009         }
31010
31011       arm_last_printed_arch_string = arch_to_print;
31012     }
31013
31014   fprintf (stream, "\t.syntax unified\n");
31015
31016   if (TARGET_THUMB)
31017     {
31018       if (is_called_in_ARM_mode (decl)
31019           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31020               && cfun->is_thunk))
31021         fprintf (stream, "\t.code 32\n");
31022       else if (TARGET_THUMB1)
31023         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31024       else
31025         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31026     }
31027   else
31028     fprintf (stream, "\t.arm\n");
31029
31030   std::string fpu_to_print
31031     = TARGET_SOFT_FLOAT
31032         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31033
31034   if (fpu_to_print != arm_last_printed_arch_string)
31035     {
31036       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31037       arm_last_printed_fpu_string = fpu_to_print;
31038     }
31039
31040   if (TARGET_POKE_FUNCTION_NAME)
31041     arm_poke_function_name (stream, (const char *) name);
31042 }
31043
31044 /* If MEM is in the form of [base+offset], extract the two parts
31045    of address and set to BASE and OFFSET, otherwise return false
31046    after clearing BASE and OFFSET.  */
31047
31048 static bool
31049 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31050 {
31051   rtx addr;
31052
31053   gcc_assert (MEM_P (mem));
31054
31055   addr = XEXP (mem, 0);
31056
31057   /* Strip off const from addresses like (const (addr)).  */
31058   if (GET_CODE (addr) == CONST)
31059     addr = XEXP (addr, 0);
31060
31061   if (GET_CODE (addr) == REG)
31062     {
31063       *base = addr;
31064       *offset = const0_rtx;
31065       return true;
31066     }
31067
31068   if (GET_CODE (addr) == PLUS
31069       && GET_CODE (XEXP (addr, 0)) == REG
31070       && CONST_INT_P (XEXP (addr, 1)))
31071     {
31072       *base = XEXP (addr, 0);
31073       *offset = XEXP (addr, 1);
31074       return true;
31075     }
31076
31077   *base = NULL_RTX;
31078   *offset = NULL_RTX;
31079
31080   return false;
31081 }
31082
31083 /* If INSN is a load or store of address in the form of [base+offset],
31084    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31085    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31086    otherwise return FALSE.  */
31087
31088 static bool
31089 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31090 {
31091   rtx x, dest, src;
31092
31093   gcc_assert (INSN_P (insn));
31094   x = PATTERN (insn);
31095   if (GET_CODE (x) != SET)
31096     return false;
31097
31098   src = SET_SRC (x);
31099   dest = SET_DEST (x);
31100   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31101     {
31102       *is_load = false;
31103       extract_base_offset_in_addr (dest, base, offset);
31104     }
31105   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31106     {
31107       *is_load = true;
31108       extract_base_offset_in_addr (src, base, offset);
31109     }
31110   else
31111     return false;
31112
31113   return (*base != NULL_RTX && *offset != NULL_RTX);
31114 }
31115
31116 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31117
31118    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31119    and PRI are only calculated for these instructions.  For other instruction,
31120    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31121    instruction fusion can be supported by returning different priorities.
31122
31123    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31124
31125 static void
31126 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31127                            int *fusion_pri, int *pri)
31128 {
31129   int tmp, off_val;
31130   bool is_load;
31131   rtx base, offset;
31132
31133   gcc_assert (INSN_P (insn));
31134
31135   tmp = max_pri - 1;
31136   if (!fusion_load_store (insn, &base, &offset, &is_load))
31137     {
31138       *pri = tmp;
31139       *fusion_pri = tmp;
31140       return;
31141     }
31142
31143   /* Load goes first.  */
31144   if (is_load)
31145     *fusion_pri = tmp - 1;
31146   else
31147     *fusion_pri = tmp - 2;
31148
31149   tmp /= 2;
31150
31151   /* INSN with smaller base register goes first.  */
31152   tmp -= ((REGNO (base) & 0xff) << 20);
31153
31154   /* INSN with smaller offset goes first.  */
31155   off_val = (int)(INTVAL (offset));
31156   if (off_val >= 0)
31157     tmp -= (off_val & 0xfffff);
31158   else
31159     tmp += ((- off_val) & 0xfffff);
31160
31161   *pri = tmp;
31162   return;
31163 }
31164
31165
31166 /* Construct and return a PARALLEL RTX vector with elements numbering the
31167    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31168    the vector - from the perspective of the architecture.  This does not
31169    line up with GCC's perspective on lane numbers, so we end up with
31170    different masks depending on our target endian-ness.  The diagram
31171    below may help.  We must draw the distinction when building masks
31172    which select one half of the vector.  An instruction selecting
31173    architectural low-lanes for a big-endian target, must be described using
31174    a mask selecting GCC high-lanes.
31175
31176                  Big-Endian             Little-Endian
31177
31178 GCC             0   1   2   3           3   2   1   0
31179               | x | x | x | x |       | x | x | x | x |
31180 Architecture    3   2   1   0           3   2   1   0
31181
31182 Low Mask:         { 2, 3 }                { 0, 1 }
31183 High Mask:        { 0, 1 }                { 2, 3 }
31184 */
31185
31186 rtx
31187 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31188 {
31189   int nunits = GET_MODE_NUNITS (mode);
31190   rtvec v = rtvec_alloc (nunits / 2);
31191   int high_base = nunits / 2;
31192   int low_base = 0;
31193   int base;
31194   rtx t1;
31195   int i;
31196
31197   if (BYTES_BIG_ENDIAN)
31198     base = high ? low_base : high_base;
31199   else
31200     base = high ? high_base : low_base;
31201
31202   for (i = 0; i < nunits / 2; i++)
31203     RTVEC_ELT (v, i) = GEN_INT (base + i);
31204
31205   t1 = gen_rtx_PARALLEL (mode, v);
31206   return t1;
31207 }
31208
31209 /* Check OP for validity as a PARALLEL RTX vector with elements
31210    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31211    from the perspective of the architecture.  See the diagram above
31212    arm_simd_vect_par_cnst_half_p for more details.  */
31213
31214 bool
31215 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31216                                        bool high)
31217 {
31218   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31219   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31220   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31221   int i = 0;
31222
31223   if (!VECTOR_MODE_P (mode))
31224     return false;
31225
31226   if (count_op != count_ideal)
31227     return false;
31228
31229   for (i = 0; i < count_ideal; i++)
31230     {
31231       rtx elt_op = XVECEXP (op, 0, i);
31232       rtx elt_ideal = XVECEXP (ideal, 0, i);
31233
31234       if (!CONST_INT_P (elt_op)
31235           || INTVAL (elt_ideal) != INTVAL (elt_op))
31236         return false;
31237     }
31238   return true;
31239 }
31240
31241 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31242    in Thumb1.  */
31243 static bool
31244 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31245                          const_tree)
31246 {
31247   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31248   if (vcall_offset && TARGET_THUMB1)
31249     return false;
31250
31251   /* Otherwise ok.  */
31252   return true;
31253 }
31254
31255 /* Generate RTL for a conditional branch with rtx comparison CODE in
31256    mode CC_MODE. The destination of the unlikely conditional branch
31257    is LABEL_REF.  */
31258
31259 void
31260 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31261                           rtx label_ref)
31262 {
31263   rtx x;
31264   x = gen_rtx_fmt_ee (code, VOIDmode,
31265                       gen_rtx_REG (cc_mode, CC_REGNUM),
31266                       const0_rtx);
31267
31268   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31269                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31270                             pc_rtx);
31271   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31272 }
31273
31274 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31275
31276    For pure-code sections there is no letter code for this attribute, so
31277    output all the section flags numerically when this is needed.  */
31278
31279 static bool
31280 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31281 {
31282
31283   if (flags & SECTION_ARM_PURECODE)
31284     {
31285       *num = 0x20000000;
31286
31287       if (!(flags & SECTION_DEBUG))
31288         *num |= 0x2;
31289       if (flags & SECTION_EXCLUDE)
31290         *num |= 0x80000000;
31291       if (flags & SECTION_WRITE)
31292         *num |= 0x1;
31293       if (flags & SECTION_CODE)
31294         *num |= 0x4;
31295       if (flags & SECTION_MERGE)
31296         *num |= 0x10;
31297       if (flags & SECTION_STRINGS)
31298         *num |= 0x20;
31299       if (flags & SECTION_TLS)
31300         *num |= 0x400;
31301       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31302         *num |= 0x200;
31303
31304         return true;
31305     }
31306
31307   return false;
31308 }
31309
31310 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31311
31312    If pure-code is passed as an option, make sure all functions are in
31313    sections that have the SHF_ARM_PURECODE attribute.  */
31314
31315 static section *
31316 arm_function_section (tree decl, enum node_frequency freq,
31317                       bool startup, bool exit)
31318 {
31319   const char * section_name;
31320   section * sec;
31321
31322   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31323     return default_function_section (decl, freq, startup, exit);
31324
31325   if (!target_pure_code)
31326     return default_function_section (decl, freq, startup, exit);
31327
31328
31329   section_name = DECL_SECTION_NAME (decl);
31330
31331   /* If a function is not in a named section then it falls under the 'default'
31332      text section, also known as '.text'.  We can preserve previous behavior as
31333      the default text section already has the SHF_ARM_PURECODE section
31334      attribute.  */
31335   if (!section_name)
31336     {
31337       section *default_sec = default_function_section (decl, freq, startup,
31338                                                        exit);
31339
31340       /* If default_sec is not null, then it must be a special section like for
31341          example .text.startup.  We set the pure-code attribute and return the
31342          same section to preserve existing behavior.  */
31343       if (default_sec)
31344           default_sec->common.flags |= SECTION_ARM_PURECODE;
31345       return default_sec;
31346     }
31347
31348   /* Otherwise look whether a section has already been created with
31349      'section_name'.  */
31350   sec = get_named_section (decl, section_name, 0);
31351   if (!sec)
31352     /* If that is not the case passing NULL as the section's name to
31353        'get_named_section' will create a section with the declaration's
31354        section name.  */
31355     sec = get_named_section (decl, NULL, 0);
31356
31357   /* Set the SHF_ARM_PURECODE attribute.  */
31358   sec->common.flags |= SECTION_ARM_PURECODE;
31359
31360   return sec;
31361 }
31362
31363 /* Implements the TARGET_SECTION_FLAGS hook.
31364
31365    If DECL is a function declaration and pure-code is passed as an option
31366    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31367    section's name and RELOC indicates whether the declarations initializer may
31368    contain runtime relocations.  */
31369
31370 static unsigned int
31371 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31372 {
31373   unsigned int flags = default_section_type_flags (decl, name, reloc);
31374
31375   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31376     flags |= SECTION_ARM_PURECODE;
31377
31378   return flags;
31379 }
31380
31381 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31382
31383 static void
31384 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31385                            rtx op0, rtx op1,
31386                            rtx *quot_p, rtx *rem_p)
31387 {
31388   if (mode == SImode)
31389     gcc_assert (!TARGET_IDIV);
31390
31391   scalar_int_mode libval_mode
31392     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31393
31394   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31395                                         libval_mode,
31396                                         op0, GET_MODE (op0),
31397                                         op1, GET_MODE (op1));
31398
31399   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31400   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31401                                        GET_MODE_SIZE (mode));
31402
31403   gcc_assert (quotient);
31404   gcc_assert (remainder);
31405
31406   *quot_p = quotient;
31407   *rem_p = remainder;
31408 }
31409
31410 /*  This function checks for the availability of the coprocessor builtin passed
31411     in BUILTIN for the current target.  Returns true if it is available and
31412     false otherwise.  If a BUILTIN is passed for which this function has not
31413     been implemented it will cause an exception.  */
31414
31415 bool
31416 arm_coproc_builtin_available (enum unspecv builtin)
31417 {
31418   /* None of these builtins are available in Thumb mode if the target only
31419      supports Thumb-1.  */
31420   if (TARGET_THUMB1)
31421     return false;
31422
31423   switch (builtin)
31424     {
31425       case VUNSPEC_CDP:
31426       case VUNSPEC_LDC:
31427       case VUNSPEC_LDCL:
31428       case VUNSPEC_STC:
31429       case VUNSPEC_STCL:
31430       case VUNSPEC_MCR:
31431       case VUNSPEC_MRC:
31432         if (arm_arch4)
31433           return true;
31434         break;
31435       case VUNSPEC_CDP2:
31436       case VUNSPEC_LDC2:
31437       case VUNSPEC_LDC2L:
31438       case VUNSPEC_STC2:
31439       case VUNSPEC_STC2L:
31440       case VUNSPEC_MCR2:
31441       case VUNSPEC_MRC2:
31442         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31443            ARMv8-{A,M}.  */
31444         if (arm_arch5)
31445           return true;
31446         break;
31447       case VUNSPEC_MCRR:
31448       case VUNSPEC_MRRC:
31449         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31450            ARMv8-{A,M}.  */
31451         if (arm_arch6 || arm_arch5te)
31452           return true;
31453         break;
31454       case VUNSPEC_MCRR2:
31455       case VUNSPEC_MRRC2:
31456         if (arm_arch6)
31457           return true;
31458         break;
31459       default:
31460         gcc_unreachable ();
31461     }
31462   return false;
31463 }
31464
31465 /* This function returns true if OP is a valid memory operand for the ldc and
31466    stc coprocessor instructions and false otherwise.  */
31467
31468 bool
31469 arm_coproc_ldc_stc_legitimate_address (rtx op)
31470 {
31471   HOST_WIDE_INT range;
31472   /* Has to be a memory operand.  */
31473   if (!MEM_P (op))
31474     return false;
31475
31476   op = XEXP (op, 0);
31477
31478   /* We accept registers.  */
31479   if (REG_P (op))
31480     return true;
31481
31482   switch GET_CODE (op)
31483     {
31484       case PLUS:
31485         {
31486           /* Or registers with an offset.  */
31487           if (!REG_P (XEXP (op, 0)))
31488             return false;
31489
31490           op = XEXP (op, 1);
31491
31492           /* The offset must be an immediate though.  */
31493           if (!CONST_INT_P (op))
31494             return false;
31495
31496           range = INTVAL (op);
31497
31498           /* Within the range of [-1020,1020].  */
31499           if (!IN_RANGE (range, -1020, 1020))
31500             return false;
31501
31502           /* And a multiple of 4.  */
31503           return (range % 4) == 0;
31504         }
31505       case PRE_INC:
31506       case POST_INC:
31507       case PRE_DEC:
31508       case POST_DEC:
31509         return REG_P (XEXP (op, 0));
31510       default:
31511         gcc_unreachable ();
31512     }
31513   return false;
31514 }
31515
31516 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31517
31518    In VFPv1, VFP registers could only be accessed in the mode they were
31519    set, so subregs would be invalid there.  However, we don't support
31520    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31521
31522    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31523    VFP registers in little-endian order.  We can't describe that accurately to
31524    GCC, so avoid taking subregs of such values.
31525
31526    The only exception is going from a 128-bit to a 64-bit type.  In that
31527    case the data layout happens to be consistent for big-endian, so we
31528    explicitly allow that case.  */
31529
31530 static bool
31531 arm_can_change_mode_class (machine_mode from, machine_mode to,
31532                            reg_class_t rclass)
31533 {
31534   if (TARGET_BIG_END
31535       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31536       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31537           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31538       && reg_classes_intersect_p (VFP_REGS, rclass))
31539     return false;
31540   return true;
31541 }
31542
31543 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31544    strcpy from constants will be faster.  */
31545
31546 static HOST_WIDE_INT
31547 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31548 {
31549   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31550   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31551     return MAX (align, BITS_PER_WORD * factor);
31552   return align;
31553 }
31554
31555 #if CHECKING_P
31556 namespace selftest {
31557
31558 /* Scan the static data tables generated by parsecpu.awk looking for
31559    potential issues with the data.  We primarily check for
31560    inconsistencies in the option extensions at present (extensions
31561    that duplicate others but aren't marked as aliases).  Furthermore,
31562    for correct canonicalization later options must never be a subset
31563    of an earlier option.  Any extension should also only specify other
31564    feature bits and never an architecture bit.  The architecture is inferred
31565    from the declaration of the extension.  */
31566 static void
31567 arm_test_cpu_arch_data (void)
31568 {
31569   const arch_option *arch;
31570   const cpu_option *cpu;
31571   auto_sbitmap target_isa (isa_num_bits);
31572   auto_sbitmap isa1 (isa_num_bits);
31573   auto_sbitmap isa2 (isa_num_bits);
31574
31575   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31576     {
31577       const cpu_arch_extension *ext1, *ext2;
31578
31579       if (arch->common.extensions == NULL)
31580         continue;
31581
31582       arm_initialize_isa (target_isa, arch->common.isa_bits);
31583
31584       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31585         {
31586           if (ext1->alias)
31587             continue;
31588
31589           arm_initialize_isa (isa1, ext1->isa_bits);
31590           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31591             {
31592               if (ext2->alias || ext1->remove != ext2->remove)
31593                 continue;
31594
31595               arm_initialize_isa (isa2, ext2->isa_bits);
31596               /* If the option is a subset of the parent option, it doesn't
31597                  add anything and so isn't useful.  */
31598               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31599
31600               /* If the extension specifies any architectural bits then
31601                  disallow it.  Extensions should only specify feature bits.  */
31602               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31603             }
31604         }
31605     }
31606
31607   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31608     {
31609       const cpu_arch_extension *ext1, *ext2;
31610
31611       if (cpu->common.extensions == NULL)
31612         continue;
31613
31614       arm_initialize_isa (target_isa, arch->common.isa_bits);
31615
31616       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31617         {
31618           if (ext1->alias)
31619             continue;
31620
31621           arm_initialize_isa (isa1, ext1->isa_bits);
31622           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31623             {
31624               if (ext2->alias || ext1->remove != ext2->remove)
31625                 continue;
31626
31627               arm_initialize_isa (isa2, ext2->isa_bits);
31628               /* If the option is a subset of the parent option, it doesn't
31629                  add anything and so isn't useful.  */
31630               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31631
31632               /* If the extension specifies any architectural bits then
31633                  disallow it.  Extensions should only specify feature bits.  */
31634               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31635             }
31636         }
31637     }
31638 }
31639
31640 /* Scan the static data tables generated by parsecpu.awk looking for
31641    potential issues with the data.  Here we check for consistency between the
31642    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31643    a feature bit that is not defined by any FPU flag.  */
31644 static void
31645 arm_test_fpu_data (void)
31646 {
31647   auto_sbitmap isa_all_fpubits (isa_num_bits);
31648   auto_sbitmap fpubits (isa_num_bits);
31649   auto_sbitmap tmpset (isa_num_bits);
31650
31651   static const enum isa_feature fpu_bitlist[]
31652     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31653   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31654
31655   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31656   {
31657     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31658     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31659     bitmap_clear (isa_all_fpubits);
31660     bitmap_copy (isa_all_fpubits, tmpset);
31661   }
31662
31663   if (!bitmap_empty_p (isa_all_fpubits))
31664     {
31665         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31666                          " group that are not defined by any FPU.\n"
31667                          "       Check your arm-cpus.in.\n");
31668         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31669     }
31670 }
31671
31672 static void
31673 arm_run_selftests (void)
31674 {
31675   arm_test_cpu_arch_data ();
31676   arm_test_fpu_data ();
31677 }
31678 } /* Namespace selftest.  */
31679
31680 #undef TARGET_RUN_TARGET_SELFTESTS
31681 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31682 #endif /* CHECKING_P */
31683
31684 struct gcc_target targetm = TARGET_INITIALIZER;
31685
31686 #include "gt-arm.h"