gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 320 \f
 321 /* Table of machine attributes.  */
 322 static const struct attribute_spec arm_attribute_table[] =
 323 {
 324   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 325        affects_type_identity } */
 326   /* Function calls made to this symbol must be done indirectly, because
 327      it may lie outside of the 26 bit addressing range of a normal function
 328      call.  */
 329   { "long_call",    0, 0, false, true,  true,  NULL, false },
 330   /* Whereas these functions are always known to reside within the 26 bit
 331      addressing range.  */
 332   { "short_call",   0, 0, false, true,  true,  NULL, false },
 333   /* Specify the procedure call conventions for a function.  */
 334   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 335     false },
 336   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 337   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 338     false },
 339   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 340     false },
 341   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 342     false },
 343 #ifdef ARM_PE
 344   /* ARM/PE has three new attributes:
 345      interfacearm - ?
 346      dllexport - for exporting a function/variable that will live in a dll
 347      dllimport - for importing a function/variable from a dll
 348
 349      Microsoft allows multiple declspecs in one __declspec, separating
 350      them with spaces.  We do NOT support this.  Instead, use __declspec
 351      multiple times.
 352   */
 353   { "dllimport",    0, 0, true,  false, false, NULL, false },
 354   { "dllexport",    0, 0, true,  false, false, NULL, false },
 355   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 356     false },
 357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 358   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 360   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 361     false },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 365     arm_handle_cmse_nonsecure_entry, false },
 366   { "cmse_nonsecure_call", 0, 0, true, false, false,
 367     arm_handle_cmse_nonsecure_call, true },
 368   { NULL,           0, 0, false, false, false, NULL, false }
 369 };
 370 \f
 371 /* Initialize the GCC target structure.  */
 372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 373 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 375 #endif
 376
 377 #undef TARGET_LEGITIMIZE_ADDRESS
 378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 379
 380 #undef  TARGET_ATTRIBUTE_TABLE
 381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 382
 383 #undef  TARGET_INSERT_ATTRIBUTES
 384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 385
 386 #undef TARGET_ASM_FILE_START
 387 #define TARGET_ASM_FILE_START arm_file_start
 388 #undef TARGET_ASM_FILE_END
 389 #define TARGET_ASM_FILE_END arm_file_end
 390
 391 #undef  TARGET_ASM_ALIGNED_SI_OP
 392 #define TARGET_ASM_ALIGNED_SI_OP NULL
 393 #undef  TARGET_ASM_INTEGER
 394 #define TARGET_ASM_INTEGER arm_assemble_integer
 395
 396 #undef TARGET_PRINT_OPERAND
 397 #define TARGET_PRINT_OPERAND arm_print_operand
 398 #undef TARGET_PRINT_OPERAND_ADDRESS
 399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 402
 403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 405
 406 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 408
 409 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 411
 412 #undef TARGET_CAN_INLINE_P
 413 #define TARGET_CAN_INLINE_P arm_can_inline_p
 414
 415 #undef TARGET_RELAYOUT_FUNCTION
 416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 417
 418 #undef  TARGET_OPTION_OVERRIDE
 419 #define TARGET_OPTION_OVERRIDE arm_option_override
 420
 421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 423
 424 #undef TARGET_OPTION_SAVE
 425 #define TARGET_OPTION_SAVE arm_option_save
 426
 427 #undef TARGET_OPTION_RESTORE
 428 #define TARGET_OPTION_RESTORE arm_option_restore
 429
 430 #undef TARGET_OPTION_PRINT
 431 #define TARGET_OPTION_PRINT arm_option_print
 432
 433 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 435
 436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_P
 440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 444
 445 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 447
 448 #undef  TARGET_SCHED_ADJUST_COST
 449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 450
 451 #undef TARGET_SET_CURRENT_FUNCTION
 452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 453
 454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 456
 457 #undef TARGET_SCHED_REORDER
 458 #define TARGET_SCHED_REORDER arm_sched_reorder
 459
 460 #undef TARGET_REGISTER_MOVE_COST
 461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 462
 463 #undef TARGET_MEMORY_MOVE_COST
 464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 465
 466 #undef TARGET_ENCODE_SECTION_INFO
 467 #ifdef ARM_PE
 468 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 469 #else
 470 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 471 #endif
 472
 473 #undef  TARGET_STRIP_NAME_ENCODING
 474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 475
 476 #undef  TARGET_ASM_INTERNAL_LABEL
 477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 478
 479 #undef TARGET_FLOATN_MODE
 480 #define TARGET_FLOATN_MODE arm_floatn_mode
 481
 482 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 484
 485 #undef  TARGET_FUNCTION_VALUE
 486 #define TARGET_FUNCTION_VALUE arm_function_value
 487
 488 #undef  TARGET_LIBCALL_VALUE
 489 #define TARGET_LIBCALL_VALUE arm_libcall_value
 490
 491 #undef TARGET_FUNCTION_VALUE_REGNO_P
 492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 493
 494 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 496 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 498
 499 #undef  TARGET_RTX_COSTS
 500 #define TARGET_RTX_COSTS arm_rtx_costs
 501 #undef  TARGET_ADDRESS_COST
 502 #define TARGET_ADDRESS_COST arm_address_cost
 503
 504 #undef TARGET_SHIFT_TRUNCATION_MASK
 505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 514   arm_autovectorize_vector_sizes
 515
 516 #undef  TARGET_MACHINE_DEPENDENT_REORG
 517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 518
 519 #undef  TARGET_INIT_BUILTINS
 520 #define TARGET_INIT_BUILTINS  arm_init_builtins
 521 #undef  TARGET_EXPAND_BUILTIN
 522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 523 #undef  TARGET_BUILTIN_DECL
 524 #define TARGET_BUILTIN_DECL arm_builtin_decl
 525
 526 #undef TARGET_INIT_LIBFUNCS
 527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 528
 529 #undef TARGET_PROMOTE_FUNCTION_MODE
 530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 531 #undef TARGET_PROMOTE_PROTOTYPES
 532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 533 #undef TARGET_PASS_BY_REFERENCE
 534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 535 #undef TARGET_ARG_PARTIAL_BYTES
 536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 537 #undef TARGET_FUNCTION_ARG
 538 #define TARGET_FUNCTION_ARG arm_function_arg
 539 #undef TARGET_FUNCTION_ARG_ADVANCE
 540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 541 #undef TARGET_FUNCTION_ARG_PADDING
 542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 543 #undef TARGET_FUNCTION_ARG_BOUNDARY
 544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 545
 546 #undef  TARGET_SETUP_INCOMING_VARARGS
 547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 548
 549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 551
 552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 554 #undef TARGET_TRAMPOLINE_INIT
 555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 558
 559 #undef TARGET_WARN_FUNC_RETURN
 560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 561
 562 #undef TARGET_DEFAULT_SHORT_ENUMS
 563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 564
 565 #undef TARGET_ALIGN_ANON_BITFIELD
 566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 567
 568 #undef TARGET_NARROW_VOLATILE_BITFIELD
 569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 570
 571 #undef TARGET_CXX_GUARD_TYPE
 572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 573
 574 #undef TARGET_CXX_GUARD_MASK_BIT
 575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 576
 577 #undef TARGET_CXX_GET_COOKIE_SIZE
 578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 579
 580 #undef TARGET_CXX_COOKIE_HAS_SIZE
 581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 582
 583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 585
 586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 588
 589 #undef TARGET_CXX_USE_AEABI_ATEXIT
 590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 591
 592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 594   arm_cxx_determine_class_data_visibility
 595
 596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 598
 599 #undef TARGET_RETURN_IN_MSB
 600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 601
 602 #undef TARGET_RETURN_IN_MEMORY
 603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 604
 605 #undef TARGET_MUST_PASS_IN_STACK
 606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 607
 608 #if ARM_UNWIND_INFO
 609 #undef TARGET_ASM_UNWIND_EMIT
 610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 611
 612 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 613 #undef TARGET_ASM_TTYPE
 614 #define TARGET_ASM_TTYPE arm_output_ttype
 615
 616 #undef TARGET_ARM_EABI_UNWINDER
 617 #define TARGET_ARM_EABI_UNWINDER true
 618
 619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 621
 622 #endif /* ARM_UNWIND_INFO */
 623
 624 #undef TARGET_ASM_INIT_SECTIONS
 625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 626
 627 #undef TARGET_DWARF_REGISTER_SPAN
 628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 629
 630 #undef  TARGET_CANNOT_COPY_INSN_P
 631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 632
 633 #ifdef HAVE_AS_TLS
 634 #undef TARGET_HAVE_TLS
 635 #define TARGET_HAVE_TLS true
 636 #endif
 637
 638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 640
 641 #undef TARGET_LEGITIMATE_CONSTANT_P
 642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 643
 644 #undef TARGET_CANNOT_FORCE_CONST_MEM
 645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 646
 647 #undef TARGET_MAX_ANCHOR_OFFSET
 648 #define TARGET_MAX_ANCHOR_OFFSET 4095
 649
 650 /* The minimum is set such that the total size of the block
 651    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 652    divisible by eight, ensuring natural spacing of anchors.  */
 653 #undef TARGET_MIN_ANCHOR_OFFSET
 654 #define TARGET_MIN_ANCHOR_OFFSET -4088
 655
 656 #undef TARGET_SCHED_ISSUE_RATE
 657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 658
 659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 661   arm_first_cycle_multipass_dfa_lookahead
 662
 663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 665   arm_first_cycle_multipass_dfa_lookahead_guard
 666
 667 #undef TARGET_MANGLE_TYPE
 668 #define TARGET_MANGLE_TYPE arm_mangle_type
 669
 670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 672
 673 #undef TARGET_BUILD_BUILTIN_VA_LIST
 674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 675 #undef TARGET_EXPAND_BUILTIN_VA_START
 676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 679
 680 #ifdef HAVE_AS_TLS
 681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 683 #endif
 684
 685 #undef TARGET_LEGITIMATE_ADDRESS_P
 686 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 687
 688 #undef TARGET_PREFERRED_RELOAD_CLASS
 689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 690
 691 #undef TARGET_PROMOTED_TYPE
 692 #define TARGET_PROMOTED_TYPE arm_promoted_type
 693
 694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 696
 697 #undef TARGET_COMPUTE_FRAME_LAYOUT
 698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 699
 700 #undef TARGET_FRAME_POINTER_REQUIRED
 701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 702
 703 #undef TARGET_CAN_ELIMINATE
 704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 705
 706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 708
 709 #undef TARGET_CLASS_LIKELY_SPILLED_P
 710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 711
 712 #undef TARGET_VECTORIZE_BUILTINS
 713 #define TARGET_VECTORIZE_BUILTINS
 714
 715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 717   arm_builtin_vectorized_function
 718
 719 #undef TARGET_VECTOR_ALIGNMENT
 720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 721
 722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 724   arm_vector_alignment_reachable
 725
 726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 728   arm_builtin_support_vector_misalignment
 729
 730 #undef TARGET_PREFERRED_RENAME_CLASS
 731 #define TARGET_PREFERRED_RENAME_CLASS \
 732   arm_preferred_rename_class
 733
 734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 736   arm_vectorize_vec_perm_const_ok
 737
 738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 740   arm_builtin_vectorization_cost
 741 #undef TARGET_VECTORIZE_ADD_STMT_COST
 742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 743
 744 #undef TARGET_CANONICALIZE_COMPARISON
 745 #define TARGET_CANONICALIZE_COMPARISON \
 746   arm_canonicalize_comparison
 747
 748 #undef TARGET_ASAN_SHADOW_OFFSET
 749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 750
 751 #undef MAX_INSN_PER_IT_BLOCK
 752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 753
 754 #undef TARGET_CAN_USE_DOLOOP_P
 755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 756
 757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 759
 760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 762
 763 #undef TARGET_SCHED_FUSION_PRIORITY
 764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 765
 766 #undef  TARGET_ASM_FUNCTION_SECTION
 767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 768
 769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 771
 772 #undef TARGET_SECTION_TYPE_FLAGS
 773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 774
 775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 777
 778 #undef TARGET_C_EXCESS_PRECISION
 779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 780
 781 /* Although the architecture reserves bits 0 and 1, only the former is
 782    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 785
 786 #undef TARGET_FIXED_CONDITION_CODE_REGS
 787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 788
 789 #undef TARGET_HARD_REGNO_NREGS
 790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 791 #undef TARGET_HARD_REGNO_MODE_OK
 792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 793
 794 #undef TARGET_MODES_TIEABLE_P
 795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 796
 797 #undef TARGET_CAN_CHANGE_MODE_CLASS
 798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 799
 800 #undef TARGET_CONSTANT_ALIGNMENT
 801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 802 \f
 803 /* Obstack for minipool constant handling.  */
 804 static struct obstack minipool_obstack;
 805 static char *         minipool_startobj;
 806
 807 /* The maximum number of insns skipped which
 808    will be conditionalised if possible.  */
 809 static int max_insns_skipped = 5;
 810
 811 extern FILE * asm_out_file;
 812
 813 /* True if we are currently building a constant table.  */
 814 int making_const_table;
 815
 816 /* The processor for which instructions should be scheduled.  */
 817 enum processor_type arm_tune = TARGET_CPU_arm_none;
 818
 819 /* The current tuning set.  */
 820 const struct tune_params *current_tune;
 821
 822 /* Which floating point hardware to schedule for.  */
 823 int arm_fpu_attr;
 824
 825 /* Used for Thumb call_via trampolines.  */
 826 rtx thumb_call_via_label[14];
 827 static int thumb_call_reg_needed;
 828
 829 /* The bits in this mask specify which instruction scheduling options should
 830    be used.  */
 831 unsigned int tune_flags = 0;
 832
 833 /* The highest ARM architecture version supported by the
 834    target.  */
 835 enum base_architecture arm_base_arch = BASE_ARCH_0;
 836
 837 /* Active target architecture and tuning.  */
 838
 839 struct arm_build_target arm_active_target;
 840
 841 /* The following are used in the arm.md file as equivalents to bits
 842    in the above two flag variables.  */
 843
 844 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 845 int arm_arch3m = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 848 int arm_arch4 = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 851 int arm_arch4t = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 854 int arm_arch5 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 857 int arm_arch5e = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 860 int arm_arch5te = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 863 int arm_arch6 = 0;
 864
 865 /* Nonzero if this chip supports the ARM 6K extensions.  */
 866 int arm_arch6k = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 869 int arm_arch6kz = 0;
 870
 871 /* Nonzero if instructions present in ARMv6-M can be used.  */
 872 int arm_arch6m = 0;
 873
 874 /* Nonzero if this chip supports the ARM 7 extensions.  */
 875 int arm_arch7 = 0;
 876
 877 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 878 int arm_arch_lpae = 0;
 879
 880 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 881 int arm_arch_notm = 0;
 882
 883 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 884 int arm_arch7em = 0;
 885
 886 /* Nonzero if instructions present in ARMv8 can be used.  */
 887 int arm_arch8 = 0;
 888
 889 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 890 int arm_arch8_1 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 893 int arm_arch8_2 = 0;
 894
 895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 896    Architecture 8.2.  */
 897 int arm_fp16_inst = 0;
 898
 899 /* Nonzero if this chip can benefit from load scheduling.  */
 900 int arm_ld_sched = 0;
 901
 902 /* Nonzero if this chip is a StrongARM.  */
 903 int arm_tune_strongarm = 0;
 904
 905 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 906 int arm_arch_iwmmxt = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 909 int arm_arch_iwmmxt2 = 0;
 910
 911 /* Nonzero if this chip is an XScale.  */
 912 int arm_arch_xscale = 0;
 913
 914 /* Nonzero if tuning for XScale  */
 915 int arm_tune_xscale = 0;
 916
 917 /* Nonzero if we want to tune for stores that access the write-buffer.
 918    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 919 int arm_tune_wbuf = 0;
 920
 921 /* Nonzero if tuning for Cortex-A9.  */
 922 int arm_tune_cortex_a9 = 0;
 923
 924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 925    preprocessor.
 926    XXX This is a bit of a hack, it's intended to help work around
 927    problems in GLD which doesn't understand that armv5t code is
 928    interworking clean.  */
 929 int arm_cpp_interwork = 0;
 930
 931 /* Nonzero if chip supports Thumb 1.  */
 932 int arm_arch_thumb1;
 933
 934 /* Nonzero if chip supports Thumb 2.  */
 935 int arm_arch_thumb2;
 936
 937 /* Nonzero if chip supports integer division instruction.  */
 938 int arm_arch_arm_hwdiv;
 939 int arm_arch_thumb_hwdiv;
 940
 941 /* Nonzero if chip disallows volatile memory access in IT block.  */
 942 int arm_arch_no_volatile_ce;
 943
 944 /* Nonzero if we should use Neon to handle 64-bits operations rather
 945    than core registers.  */
 946 int prefer_neon_for_64bits = 0;
 947
 948 /* Nonzero if we shouldn't use literal pools.  */
 949 bool arm_disable_literal_pool = false;
 950
 951 /* The register number to be used for the PIC offset register.  */
 952 unsigned arm_pic_register = INVALID_REGNUM;
 953
 954 enum arm_pcs arm_pcs_default;
 955
 956 /* For an explanation of these variables, see final_prescan_insn below.  */
 957 int arm_ccfsm_state;
 958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 959 enum arm_cond_code arm_current_cc;
 960
 961 rtx arm_target_insn;
 962 int arm_target_label;
 963 /* The number of conditionally executed insns, including the current insn.  */
 964 int arm_condexec_count = 0;
 965 /* A bitmask specifying the patterns for the IT block.
 966    Zero means do not output an IT block before this insn. */
 967 int arm_condexec_mask = 0;
 968 /* The number of bits used in arm_condexec_mask.  */
 969 int arm_condexec_masklen = 0;
 970
 971 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 972 int arm_arch_crc = 0;
 973
 974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 975 int arm_arch_dotprod = 0;
 976
 977 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 978 int arm_arch_cmse = 0;
 979
 980 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 981 int arm_m_profile_small_mul = 0;
 982
 983 /* The condition codes of the ARM, and the inverse function.  */
 984 static const char * const arm_condition_codes[] =
 985 {
 986   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 987   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 988 };
 989
 990 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 991 int arm_regs_in_sequence[] =
 992 {
 993   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 994 };
 995
 996 #define ARM_LSL_NAME "lsl"
 997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 998
 999 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1002 \f
1003 /* Initialization code.  */
1004
1005 struct cpu_tune
1006 {
1007   enum processor_type scheduler;
1008   unsigned int tune_flags;
1009   const struct tune_params *tune;
1010 };
1011
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1014   {                                                             \
1015     num_slots,                                                  \
1016     l1_size,                                                    \
1017     l1_line_size                                                \
1018   }
1019
1020 /* arm generic vectorizer costs.  */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023   1,                                    /* scalar_stmt_cost.  */
1024   1,                                    /* scalar load_cost.  */
1025   1,                                    /* scalar_store_cost.  */
1026   1,                                    /* vec_stmt_cost.  */
1027   1,                                    /* vec_to_scalar_cost.  */
1028   1,                                    /* scalar_to_vec_cost.  */
1029   1,                                    /* vec_align_load_cost.  */
1030   1,                                    /* vec_unalign_load_cost.  */
1031   1,                                    /* vec_unalign_store_cost.  */
1032   1,                                    /* vec_store_cost.  */
1033   3,                                    /* cond_taken_branch_cost.  */
1034   1,                                    /* cond_not_taken_branch_cost.  */
1035 };
1036
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1038 #include "aarch-cost-tables.h"
1039
1040
1041
1042 const struct cpu_cost_table cortexa9_extra_costs =
1043 {
1044   /* ALU */
1045   {
1046     0,                  /* arith.  */
1047     0,                  /* logical.  */
1048     0,                  /* shift.  */
1049     COSTS_N_INSNS (1),  /* shift_reg.  */
1050     COSTS_N_INSNS (1),  /* arith_shift.  */
1051     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1052     0,                  /* log_shift.  */
1053     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1054     COSTS_N_INSNS (1),  /* extend.  */
1055     COSTS_N_INSNS (2),  /* extend_arith.  */
1056     COSTS_N_INSNS (1),  /* bfi.  */
1057     COSTS_N_INSNS (1),  /* bfx.  */
1058     0,                  /* clz.  */
1059     0,                  /* rev.  */
1060     0,                  /* non_exec.  */
1061     true                /* non_exec_costs_exec.  */
1062   },
1063   {
1064     /* MULT SImode */
1065     {
1066       COSTS_N_INSNS (3),        /* simple.  */
1067       COSTS_N_INSNS (3),        /* flag_setting.  */
1068       COSTS_N_INSNS (2),        /* extend.  */
1069       COSTS_N_INSNS (3),        /* add.  */
1070       COSTS_N_INSNS (2),        /* extend_add.  */
1071       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1072     },
1073     /* MULT DImode */
1074     {
1075       0,                        /* simple (N/A).  */
1076       0,                        /* flag_setting (N/A).  */
1077       COSTS_N_INSNS (4),        /* extend.  */
1078       0,                        /* add (N/A).  */
1079       COSTS_N_INSNS (4),        /* extend_add.  */
1080       0                         /* idiv (N/A).  */
1081     }
1082   },
1083   /* LD/ST */
1084   {
1085     COSTS_N_INSNS (2),  /* load.  */
1086     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1087     COSTS_N_INSNS (2),  /* ldrd.  */
1088     COSTS_N_INSNS (2),  /* ldm_1st.  */
1089     1,                  /* ldm_regs_per_insn_1st.  */
1090     2,                  /* ldm_regs_per_insn_subsequent.  */
1091     COSTS_N_INSNS (5),  /* loadf.  */
1092     COSTS_N_INSNS (5),  /* loadd.  */
1093     COSTS_N_INSNS (1),  /* load_unaligned.  */
1094     COSTS_N_INSNS (2),  /* store.  */
1095     COSTS_N_INSNS (2),  /* strd.  */
1096     COSTS_N_INSNS (2),  /* stm_1st.  */
1097     1,                  /* stm_regs_per_insn_1st.  */
1098     2,                  /* stm_regs_per_insn_subsequent.  */
1099     COSTS_N_INSNS (1),  /* storef.  */
1100     COSTS_N_INSNS (1),  /* stored.  */
1101     COSTS_N_INSNS (1),  /* store_unaligned.  */
1102     COSTS_N_INSNS (1),  /* loadv.  */
1103     COSTS_N_INSNS (1)   /* storev.  */
1104   },
1105   {
1106     /* FP SFmode */
1107     {
1108       COSTS_N_INSNS (14),       /* div.  */
1109       COSTS_N_INSNS (4),        /* mult.  */
1110       COSTS_N_INSNS (7),        /* mult_addsub. */
1111       COSTS_N_INSNS (30),       /* fma.  */
1112       COSTS_N_INSNS (3),        /* addsub.  */
1113       COSTS_N_INSNS (1),        /* fpconst.  */
1114       COSTS_N_INSNS (1),        /* neg.  */
1115       COSTS_N_INSNS (3),        /* compare.  */
1116       COSTS_N_INSNS (3),        /* widen.  */
1117       COSTS_N_INSNS (3),        /* narrow.  */
1118       COSTS_N_INSNS (3),        /* toint.  */
1119       COSTS_N_INSNS (3),        /* fromint.  */
1120       COSTS_N_INSNS (3)         /* roundint.  */
1121     },
1122     /* FP DFmode */
1123     {
1124       COSTS_N_INSNS (24),       /* div.  */
1125       COSTS_N_INSNS (5),        /* mult.  */
1126       COSTS_N_INSNS (8),        /* mult_addsub.  */
1127       COSTS_N_INSNS (30),       /* fma.  */
1128       COSTS_N_INSNS (3),        /* addsub.  */
1129       COSTS_N_INSNS (1),        /* fpconst.  */
1130       COSTS_N_INSNS (1),        /* neg.  */
1131       COSTS_N_INSNS (3),        /* compare.  */
1132       COSTS_N_INSNS (3),        /* widen.  */
1133       COSTS_N_INSNS (3),        /* narrow.  */
1134       COSTS_N_INSNS (3),        /* toint.  */
1135       COSTS_N_INSNS (3),        /* fromint.  */
1136       COSTS_N_INSNS (3)         /* roundint.  */
1137     }
1138   },
1139   /* Vector */
1140   {
1141     COSTS_N_INSNS (1)   /* alu.  */
1142   }
1143 };
1144
1145 const struct cpu_cost_table cortexa8_extra_costs =
1146 {
1147   /* ALU */
1148   {
1149     0,                  /* arith.  */
1150     0,                  /* logical.  */
1151     COSTS_N_INSNS (1),  /* shift.  */
1152     0,                  /* shift_reg.  */
1153     COSTS_N_INSNS (1),  /* arith_shift.  */
1154     0,                  /* arith_shift_reg.  */
1155     COSTS_N_INSNS (1),  /* log_shift.  */
1156     0,                  /* log_shift_reg.  */
1157     0,                  /* extend.  */
1158     0,                  /* extend_arith.  */
1159     0,                  /* bfi.  */
1160     0,                  /* bfx.  */
1161     0,                  /* clz.  */
1162     0,                  /* rev.  */
1163     0,                  /* non_exec.  */
1164     true                /* non_exec_costs_exec.  */
1165   },
1166   {
1167     /* MULT SImode */
1168     {
1169       COSTS_N_INSNS (1),        /* simple.  */
1170       COSTS_N_INSNS (1),        /* flag_setting.  */
1171       COSTS_N_INSNS (1),        /* extend.  */
1172       COSTS_N_INSNS (1),        /* add.  */
1173       COSTS_N_INSNS (1),        /* extend_add.  */
1174       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1175     },
1176     /* MULT DImode */
1177     {
1178       0,                        /* simple (N/A).  */
1179       0,                        /* flag_setting (N/A).  */
1180       COSTS_N_INSNS (2),        /* extend.  */
1181       0,                        /* add (N/A).  */
1182       COSTS_N_INSNS (2),        /* extend_add.  */
1183       0                         /* idiv (N/A).  */
1184     }
1185   },
1186   /* LD/ST */
1187   {
1188     COSTS_N_INSNS (1),  /* load.  */
1189     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1190     COSTS_N_INSNS (1),  /* ldrd.  */
1191     COSTS_N_INSNS (1),  /* ldm_1st.  */
1192     1,                  /* ldm_regs_per_insn_1st.  */
1193     2,                  /* ldm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (1),  /* loadf.  */
1195     COSTS_N_INSNS (1),  /* loadd.  */
1196     COSTS_N_INSNS (1),  /* load_unaligned.  */
1197     COSTS_N_INSNS (1),  /* store.  */
1198     COSTS_N_INSNS (1),  /* strd.  */
1199     COSTS_N_INSNS (1),  /* stm_1st.  */
1200     1,                  /* stm_regs_per_insn_1st.  */
1201     2,                  /* stm_regs_per_insn_subsequent.  */
1202     COSTS_N_INSNS (1),  /* storef.  */
1203     COSTS_N_INSNS (1),  /* stored.  */
1204     COSTS_N_INSNS (1),  /* store_unaligned.  */
1205     COSTS_N_INSNS (1),  /* loadv.  */
1206     COSTS_N_INSNS (1)   /* storev.  */
1207   },
1208   {
1209     /* FP SFmode */
1210     {
1211       COSTS_N_INSNS (36),       /* div.  */
1212       COSTS_N_INSNS (11),       /* mult.  */
1213       COSTS_N_INSNS (20),       /* mult_addsub. */
1214       COSTS_N_INSNS (30),       /* fma.  */
1215       COSTS_N_INSNS (9),        /* addsub.  */
1216       COSTS_N_INSNS (3),        /* fpconst.  */
1217       COSTS_N_INSNS (3),        /* neg.  */
1218       COSTS_N_INSNS (6),        /* compare.  */
1219       COSTS_N_INSNS (4),        /* widen.  */
1220       COSTS_N_INSNS (4),        /* narrow.  */
1221       COSTS_N_INSNS (8),        /* toint.  */
1222       COSTS_N_INSNS (8),        /* fromint.  */
1223       COSTS_N_INSNS (8)         /* roundint.  */
1224     },
1225     /* FP DFmode */
1226     {
1227       COSTS_N_INSNS (64),       /* div.  */
1228       COSTS_N_INSNS (16),       /* mult.  */
1229       COSTS_N_INSNS (25),       /* mult_addsub.  */
1230       COSTS_N_INSNS (30),       /* fma.  */
1231       COSTS_N_INSNS (9),        /* addsub.  */
1232       COSTS_N_INSNS (3),        /* fpconst.  */
1233       COSTS_N_INSNS (3),        /* neg.  */
1234       COSTS_N_INSNS (6),        /* compare.  */
1235       COSTS_N_INSNS (6),        /* widen.  */
1236       COSTS_N_INSNS (6),        /* narrow.  */
1237       COSTS_N_INSNS (8),        /* toint.  */
1238       COSTS_N_INSNS (8),        /* fromint.  */
1239       COSTS_N_INSNS (8)         /* roundint.  */
1240     }
1241   },
1242   /* Vector */
1243   {
1244     COSTS_N_INSNS (1)   /* alu.  */
1245   }
1246 };
1247
1248 const struct cpu_cost_table cortexa5_extra_costs =
1249 {
1250   /* ALU */
1251   {
1252     0,                  /* arith.  */
1253     0,                  /* logical.  */
1254     COSTS_N_INSNS (1),  /* shift.  */
1255     COSTS_N_INSNS (1),  /* shift_reg.  */
1256     COSTS_N_INSNS (1),  /* arith_shift.  */
1257     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1258     COSTS_N_INSNS (1),  /* log_shift.  */
1259     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1260     COSTS_N_INSNS (1),  /* extend.  */
1261     COSTS_N_INSNS (1),  /* extend_arith.  */
1262     COSTS_N_INSNS (1),  /* bfi.  */
1263     COSTS_N_INSNS (1),  /* bfx.  */
1264     COSTS_N_INSNS (1),  /* clz.  */
1265     COSTS_N_INSNS (1),  /* rev.  */
1266     0,                  /* non_exec.  */
1267     true                /* non_exec_costs_exec.  */
1268   },
1269
1270   {
1271     /* MULT SImode */
1272     {
1273       0,                        /* simple.  */
1274       COSTS_N_INSNS (1),        /* flag_setting.  */
1275       COSTS_N_INSNS (1),        /* extend.  */
1276       COSTS_N_INSNS (1),        /* add.  */
1277       COSTS_N_INSNS (1),        /* extend_add.  */
1278       COSTS_N_INSNS (7)         /* idiv.  */
1279     },
1280     /* MULT DImode */
1281     {
1282       0,                        /* simple (N/A).  */
1283       0,                        /* flag_setting (N/A).  */
1284       COSTS_N_INSNS (1),        /* extend.  */
1285       0,                        /* add.  */
1286       COSTS_N_INSNS (2),        /* extend_add.  */
1287       0                         /* idiv (N/A).  */
1288     }
1289   },
1290   /* LD/ST */
1291   {
1292     COSTS_N_INSNS (1),  /* load.  */
1293     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1294     COSTS_N_INSNS (6),  /* ldrd.  */
1295     COSTS_N_INSNS (1),  /* ldm_1st.  */
1296     1,                  /* ldm_regs_per_insn_1st.  */
1297     2,                  /* ldm_regs_per_insn_subsequent.  */
1298     COSTS_N_INSNS (2),  /* loadf.  */
1299     COSTS_N_INSNS (4),  /* loadd.  */
1300     COSTS_N_INSNS (1),  /* load_unaligned.  */
1301     COSTS_N_INSNS (1),  /* store.  */
1302     COSTS_N_INSNS (3),  /* strd.  */
1303     COSTS_N_INSNS (1),  /* stm_1st.  */
1304     1,                  /* stm_regs_per_insn_1st.  */
1305     2,                  /* stm_regs_per_insn_subsequent.  */
1306     COSTS_N_INSNS (2),  /* storef.  */
1307     COSTS_N_INSNS (2),  /* stored.  */
1308     COSTS_N_INSNS (1),  /* store_unaligned.  */
1309     COSTS_N_INSNS (1),  /* loadv.  */
1310     COSTS_N_INSNS (1)   /* storev.  */
1311   },
1312   {
1313     /* FP SFmode */
1314     {
1315       COSTS_N_INSNS (15),       /* div.  */
1316       COSTS_N_INSNS (3),        /* mult.  */
1317       COSTS_N_INSNS (7),        /* mult_addsub. */
1318       COSTS_N_INSNS (7),        /* fma.  */
1319       COSTS_N_INSNS (3),        /* addsub.  */
1320       COSTS_N_INSNS (3),        /* fpconst.  */
1321       COSTS_N_INSNS (3),        /* neg.  */
1322       COSTS_N_INSNS (3),        /* compare.  */
1323       COSTS_N_INSNS (3),        /* widen.  */
1324       COSTS_N_INSNS (3),        /* narrow.  */
1325       COSTS_N_INSNS (3),        /* toint.  */
1326       COSTS_N_INSNS (3),        /* fromint.  */
1327       COSTS_N_INSNS (3)         /* roundint.  */
1328     },
1329     /* FP DFmode */
1330     {
1331       COSTS_N_INSNS (30),       /* div.  */
1332       COSTS_N_INSNS (6),        /* mult.  */
1333       COSTS_N_INSNS (10),       /* mult_addsub.  */
1334       COSTS_N_INSNS (7),        /* fma.  */
1335       COSTS_N_INSNS (3),        /* addsub.  */
1336       COSTS_N_INSNS (3),        /* fpconst.  */
1337       COSTS_N_INSNS (3),        /* neg.  */
1338       COSTS_N_INSNS (3),        /* compare.  */
1339       COSTS_N_INSNS (3),        /* widen.  */
1340       COSTS_N_INSNS (3),        /* narrow.  */
1341       COSTS_N_INSNS (3),        /* toint.  */
1342       COSTS_N_INSNS (3),        /* fromint.  */
1343       COSTS_N_INSNS (3)         /* roundint.  */
1344     }
1345   },
1346   /* Vector */
1347   {
1348     COSTS_N_INSNS (1)   /* alu.  */
1349   }
1350 };
1351
1352
1353 const struct cpu_cost_table cortexa7_extra_costs =
1354 {
1355   /* ALU */
1356   {
1357     0,                  /* arith.  */
1358     0,                  /* logical.  */
1359     COSTS_N_INSNS (1),  /* shift.  */
1360     COSTS_N_INSNS (1),  /* shift_reg.  */
1361     COSTS_N_INSNS (1),  /* arith_shift.  */
1362     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1363     COSTS_N_INSNS (1),  /* log_shift.  */
1364     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1365     COSTS_N_INSNS (1),  /* extend.  */
1366     COSTS_N_INSNS (1),  /* extend_arith.  */
1367     COSTS_N_INSNS (1),  /* bfi.  */
1368     COSTS_N_INSNS (1),  /* bfx.  */
1369     COSTS_N_INSNS (1),  /* clz.  */
1370     COSTS_N_INSNS (1),  /* rev.  */
1371     0,                  /* non_exec.  */
1372     true                /* non_exec_costs_exec.  */
1373   },
1374
1375   {
1376     /* MULT SImode */
1377     {
1378       0,                        /* simple.  */
1379       COSTS_N_INSNS (1),        /* flag_setting.  */
1380       COSTS_N_INSNS (1),        /* extend.  */
1381       COSTS_N_INSNS (1),        /* add.  */
1382       COSTS_N_INSNS (1),        /* extend_add.  */
1383       COSTS_N_INSNS (7)         /* idiv.  */
1384     },
1385     /* MULT DImode */
1386     {
1387       0,                        /* simple (N/A).  */
1388       0,                        /* flag_setting (N/A).  */
1389       COSTS_N_INSNS (1),        /* extend.  */
1390       0,                        /* add.  */
1391       COSTS_N_INSNS (2),        /* extend_add.  */
1392       0                         /* idiv (N/A).  */
1393     }
1394   },
1395   /* LD/ST */
1396   {
1397     COSTS_N_INSNS (1),  /* load.  */
1398     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1399     COSTS_N_INSNS (3),  /* ldrd.  */
1400     COSTS_N_INSNS (1),  /* ldm_1st.  */
1401     1,                  /* ldm_regs_per_insn_1st.  */
1402     2,                  /* ldm_regs_per_insn_subsequent.  */
1403     COSTS_N_INSNS (2),  /* loadf.  */
1404     COSTS_N_INSNS (2),  /* loadd.  */
1405     COSTS_N_INSNS (1),  /* load_unaligned.  */
1406     COSTS_N_INSNS (1),  /* store.  */
1407     COSTS_N_INSNS (3),  /* strd.  */
1408     COSTS_N_INSNS (1),  /* stm_1st.  */
1409     1,                  /* stm_regs_per_insn_1st.  */
1410     2,                  /* stm_regs_per_insn_subsequent.  */
1411     COSTS_N_INSNS (2),  /* storef.  */
1412     COSTS_N_INSNS (2),  /* stored.  */
1413     COSTS_N_INSNS (1),  /* store_unaligned.  */
1414     COSTS_N_INSNS (1),  /* loadv.  */
1415     COSTS_N_INSNS (1)   /* storev.  */
1416   },
1417   {
1418     /* FP SFmode */
1419     {
1420       COSTS_N_INSNS (15),       /* div.  */
1421       COSTS_N_INSNS (3),        /* mult.  */
1422       COSTS_N_INSNS (7),        /* mult_addsub. */
1423       COSTS_N_INSNS (7),        /* fma.  */
1424       COSTS_N_INSNS (3),        /* addsub.  */
1425       COSTS_N_INSNS (3),        /* fpconst.  */
1426       COSTS_N_INSNS (3),        /* neg.  */
1427       COSTS_N_INSNS (3),        /* compare.  */
1428       COSTS_N_INSNS (3),        /* widen.  */
1429       COSTS_N_INSNS (3),        /* narrow.  */
1430       COSTS_N_INSNS (3),        /* toint.  */
1431       COSTS_N_INSNS (3),        /* fromint.  */
1432       COSTS_N_INSNS (3)         /* roundint.  */
1433     },
1434     /* FP DFmode */
1435     {
1436       COSTS_N_INSNS (30),       /* div.  */
1437       COSTS_N_INSNS (6),        /* mult.  */
1438       COSTS_N_INSNS (10),       /* mult_addsub.  */
1439       COSTS_N_INSNS (7),        /* fma.  */
1440       COSTS_N_INSNS (3),        /* addsub.  */
1441       COSTS_N_INSNS (3),        /* fpconst.  */
1442       COSTS_N_INSNS (3),        /* neg.  */
1443       COSTS_N_INSNS (3),        /* compare.  */
1444       COSTS_N_INSNS (3),        /* widen.  */
1445       COSTS_N_INSNS (3),        /* narrow.  */
1446       COSTS_N_INSNS (3),        /* toint.  */
1447       COSTS_N_INSNS (3),        /* fromint.  */
1448       COSTS_N_INSNS (3)         /* roundint.  */
1449     }
1450   },
1451   /* Vector */
1452   {
1453     COSTS_N_INSNS (1)   /* alu.  */
1454   }
1455 };
1456
1457 const struct cpu_cost_table cortexa12_extra_costs =
1458 {
1459   /* ALU */
1460   {
1461     0,                  /* arith.  */
1462     0,                  /* logical.  */
1463     0,                  /* shift.  */
1464     COSTS_N_INSNS (1),  /* shift_reg.  */
1465     COSTS_N_INSNS (1),  /* arith_shift.  */
1466     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1467     COSTS_N_INSNS (1),  /* log_shift.  */
1468     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1469     0,                  /* extend.  */
1470     COSTS_N_INSNS (1),  /* extend_arith.  */
1471     0,                  /* bfi.  */
1472     COSTS_N_INSNS (1),  /* bfx.  */
1473     COSTS_N_INSNS (1),  /* clz.  */
1474     COSTS_N_INSNS (1),  /* rev.  */
1475     0,                  /* non_exec.  */
1476     true                /* non_exec_costs_exec.  */
1477   },
1478   /* MULT SImode */
1479   {
1480     {
1481       COSTS_N_INSNS (2),        /* simple.  */
1482       COSTS_N_INSNS (3),        /* flag_setting.  */
1483       COSTS_N_INSNS (2),        /* extend.  */
1484       COSTS_N_INSNS (3),        /* add.  */
1485       COSTS_N_INSNS (2),        /* extend_add.  */
1486       COSTS_N_INSNS (18)        /* idiv.  */
1487     },
1488     /* MULT DImode */
1489     {
1490       0,                        /* simple (N/A).  */
1491       0,                        /* flag_setting (N/A).  */
1492       COSTS_N_INSNS (3),        /* extend.  */
1493       0,                        /* add (N/A).  */
1494       COSTS_N_INSNS (3),        /* extend_add.  */
1495       0                         /* idiv (N/A).  */
1496     }
1497   },
1498   /* LD/ST */
1499   {
1500     COSTS_N_INSNS (3),  /* load.  */
1501     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1502     COSTS_N_INSNS (3),  /* ldrd.  */
1503     COSTS_N_INSNS (3),  /* ldm_1st.  */
1504     1,                  /* ldm_regs_per_insn_1st.  */
1505     2,                  /* ldm_regs_per_insn_subsequent.  */
1506     COSTS_N_INSNS (3),  /* loadf.  */
1507     COSTS_N_INSNS (3),  /* loadd.  */
1508     0,                  /* load_unaligned.  */
1509     0,                  /* store.  */
1510     0,                  /* strd.  */
1511     0,                  /* stm_1st.  */
1512     1,                  /* stm_regs_per_insn_1st.  */
1513     2,                  /* stm_regs_per_insn_subsequent.  */
1514     COSTS_N_INSNS (2),  /* storef.  */
1515     COSTS_N_INSNS (2),  /* stored.  */
1516     0,                  /* store_unaligned.  */
1517     COSTS_N_INSNS (1),  /* loadv.  */
1518     COSTS_N_INSNS (1)   /* storev.  */
1519   },
1520   {
1521     /* FP SFmode */
1522     {
1523       COSTS_N_INSNS (17),       /* div.  */
1524       COSTS_N_INSNS (4),        /* mult.  */
1525       COSTS_N_INSNS (8),        /* mult_addsub. */
1526       COSTS_N_INSNS (8),        /* fma.  */
1527       COSTS_N_INSNS (4),        /* addsub.  */
1528       COSTS_N_INSNS (2),        /* fpconst. */
1529       COSTS_N_INSNS (2),        /* neg.  */
1530       COSTS_N_INSNS (2),        /* compare.  */
1531       COSTS_N_INSNS (4),        /* widen.  */
1532       COSTS_N_INSNS (4),        /* narrow.  */
1533       COSTS_N_INSNS (4),        /* toint.  */
1534       COSTS_N_INSNS (4),        /* fromint.  */
1535       COSTS_N_INSNS (4)         /* roundint.  */
1536     },
1537     /* FP DFmode */
1538     {
1539       COSTS_N_INSNS (31),       /* div.  */
1540       COSTS_N_INSNS (4),        /* mult.  */
1541       COSTS_N_INSNS (8),        /* mult_addsub.  */
1542       COSTS_N_INSNS (8),        /* fma.  */
1543       COSTS_N_INSNS (4),        /* addsub.  */
1544       COSTS_N_INSNS (2),        /* fpconst.  */
1545       COSTS_N_INSNS (2),        /* neg.  */
1546       COSTS_N_INSNS (2),        /* compare.  */
1547       COSTS_N_INSNS (4),        /* widen.  */
1548       COSTS_N_INSNS (4),        /* narrow.  */
1549       COSTS_N_INSNS (4),        /* toint.  */
1550       COSTS_N_INSNS (4),        /* fromint.  */
1551       COSTS_N_INSNS (4)         /* roundint.  */
1552     }
1553   },
1554   /* Vector */
1555   {
1556     COSTS_N_INSNS (1)   /* alu.  */
1557   }
1558 };
1559
1560 const struct cpu_cost_table cortexa15_extra_costs =
1561 {
1562   /* ALU */
1563   {
1564     0,                  /* arith.  */
1565     0,                  /* logical.  */
1566     0,                  /* shift.  */
1567     0,                  /* shift_reg.  */
1568     COSTS_N_INSNS (1),  /* arith_shift.  */
1569     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1570     COSTS_N_INSNS (1),  /* log_shift.  */
1571     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1572     0,                  /* extend.  */
1573     COSTS_N_INSNS (1),  /* extend_arith.  */
1574     COSTS_N_INSNS (1),  /* bfi.  */
1575     0,                  /* bfx.  */
1576     0,                  /* clz.  */
1577     0,                  /* rev.  */
1578     0,                  /* non_exec.  */
1579     true                /* non_exec_costs_exec.  */
1580   },
1581   /* MULT SImode */
1582   {
1583     {
1584       COSTS_N_INSNS (2),        /* simple.  */
1585       COSTS_N_INSNS (3),        /* flag_setting.  */
1586       COSTS_N_INSNS (2),        /* extend.  */
1587       COSTS_N_INSNS (2),        /* add.  */
1588       COSTS_N_INSNS (2),        /* extend_add.  */
1589       COSTS_N_INSNS (18)        /* idiv.  */
1590     },
1591     /* MULT DImode */
1592     {
1593       0,                        /* simple (N/A).  */
1594       0,                        /* flag_setting (N/A).  */
1595       COSTS_N_INSNS (3),        /* extend.  */
1596       0,                        /* add (N/A).  */
1597       COSTS_N_INSNS (3),        /* extend_add.  */
1598       0                         /* idiv (N/A).  */
1599     }
1600   },
1601   /* LD/ST */
1602   {
1603     COSTS_N_INSNS (3),  /* load.  */
1604     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1605     COSTS_N_INSNS (3),  /* ldrd.  */
1606     COSTS_N_INSNS (4),  /* ldm_1st.  */
1607     1,                  /* ldm_regs_per_insn_1st.  */
1608     2,                  /* ldm_regs_per_insn_subsequent.  */
1609     COSTS_N_INSNS (4),  /* loadf.  */
1610     COSTS_N_INSNS (4),  /* loadd.  */
1611     0,                  /* load_unaligned.  */
1612     0,                  /* store.  */
1613     0,                  /* strd.  */
1614     COSTS_N_INSNS (1),  /* stm_1st.  */
1615     1,                  /* stm_regs_per_insn_1st.  */
1616     2,                  /* stm_regs_per_insn_subsequent.  */
1617     0,                  /* storef.  */
1618     0,                  /* stored.  */
1619     0,                  /* store_unaligned.  */
1620     COSTS_N_INSNS (1),  /* loadv.  */
1621     COSTS_N_INSNS (1)   /* storev.  */
1622   },
1623   {
1624     /* FP SFmode */
1625     {
1626       COSTS_N_INSNS (17),       /* div.  */
1627       COSTS_N_INSNS (4),        /* mult.  */
1628       COSTS_N_INSNS (8),        /* mult_addsub. */
1629       COSTS_N_INSNS (8),        /* fma.  */
1630       COSTS_N_INSNS (4),        /* addsub.  */
1631       COSTS_N_INSNS (2),        /* fpconst. */
1632       COSTS_N_INSNS (2),        /* neg.  */
1633       COSTS_N_INSNS (5),        /* compare.  */
1634       COSTS_N_INSNS (4),        /* widen.  */
1635       COSTS_N_INSNS (4),        /* narrow.  */
1636       COSTS_N_INSNS (4),        /* toint.  */
1637       COSTS_N_INSNS (4),        /* fromint.  */
1638       COSTS_N_INSNS (4)         /* roundint.  */
1639     },
1640     /* FP DFmode */
1641     {
1642       COSTS_N_INSNS (31),       /* div.  */
1643       COSTS_N_INSNS (4),        /* mult.  */
1644       COSTS_N_INSNS (8),        /* mult_addsub.  */
1645       COSTS_N_INSNS (8),        /* fma.  */
1646       COSTS_N_INSNS (4),        /* addsub.  */
1647       COSTS_N_INSNS (2),        /* fpconst.  */
1648       COSTS_N_INSNS (2),        /* neg.  */
1649       COSTS_N_INSNS (2),        /* compare.  */
1650       COSTS_N_INSNS (4),        /* widen.  */
1651       COSTS_N_INSNS (4),        /* narrow.  */
1652       COSTS_N_INSNS (4),        /* toint.  */
1653       COSTS_N_INSNS (4),        /* fromint.  */
1654       COSTS_N_INSNS (4)         /* roundint.  */
1655     }
1656   },
1657   /* Vector */
1658   {
1659     COSTS_N_INSNS (1)   /* alu.  */
1660   }
1661 };
1662
1663 const struct cpu_cost_table v7m_extra_costs =
1664 {
1665   /* ALU */
1666   {
1667     0,                  /* arith.  */
1668     0,                  /* logical.  */
1669     0,                  /* shift.  */
1670     0,                  /* shift_reg.  */
1671     0,                  /* arith_shift.  */
1672     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1673     0,                  /* log_shift.  */
1674     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1675     0,                  /* extend.  */
1676     COSTS_N_INSNS (1),  /* extend_arith.  */
1677     0,                  /* bfi.  */
1678     0,                  /* bfx.  */
1679     0,                  /* clz.  */
1680     0,                  /* rev.  */
1681     COSTS_N_INSNS (1),  /* non_exec.  */
1682     false               /* non_exec_costs_exec.  */
1683   },
1684   {
1685     /* MULT SImode */
1686     {
1687       COSTS_N_INSNS (1),        /* simple.  */
1688       COSTS_N_INSNS (1),        /* flag_setting.  */
1689       COSTS_N_INSNS (2),        /* extend.  */
1690       COSTS_N_INSNS (1),        /* add.  */
1691       COSTS_N_INSNS (3),        /* extend_add.  */
1692       COSTS_N_INSNS (8)         /* idiv.  */
1693     },
1694     /* MULT DImode */
1695     {
1696       0,                        /* simple (N/A).  */
1697       0,                        /* flag_setting (N/A).  */
1698       COSTS_N_INSNS (2),        /* extend.  */
1699       0,                        /* add (N/A).  */
1700       COSTS_N_INSNS (3),        /* extend_add.  */
1701       0                         /* idiv (N/A).  */
1702     }
1703   },
1704   /* LD/ST */
1705   {
1706     COSTS_N_INSNS (2),  /* load.  */
1707     0,                  /* load_sign_extend.  */
1708     COSTS_N_INSNS (3),  /* ldrd.  */
1709     COSTS_N_INSNS (2),  /* ldm_1st.  */
1710     1,                  /* ldm_regs_per_insn_1st.  */
1711     1,                  /* ldm_regs_per_insn_subsequent.  */
1712     COSTS_N_INSNS (2),  /* loadf.  */
1713     COSTS_N_INSNS (3),  /* loadd.  */
1714     COSTS_N_INSNS (1),  /* load_unaligned.  */
1715     COSTS_N_INSNS (2),  /* store.  */
1716     COSTS_N_INSNS (3),  /* strd.  */
1717     COSTS_N_INSNS (2),  /* stm_1st.  */
1718     1,                  /* stm_regs_per_insn_1st.  */
1719     1,                  /* stm_regs_per_insn_subsequent.  */
1720     COSTS_N_INSNS (2),  /* storef.  */
1721     COSTS_N_INSNS (3),  /* stored.  */
1722     COSTS_N_INSNS (1),  /* store_unaligned.  */
1723     COSTS_N_INSNS (1),  /* loadv.  */
1724     COSTS_N_INSNS (1)   /* storev.  */
1725   },
1726   {
1727     /* FP SFmode */
1728     {
1729       COSTS_N_INSNS (7),        /* div.  */
1730       COSTS_N_INSNS (2),        /* mult.  */
1731       COSTS_N_INSNS (5),        /* mult_addsub.  */
1732       COSTS_N_INSNS (3),        /* fma.  */
1733       COSTS_N_INSNS (1),        /* addsub.  */
1734       0,                        /* fpconst.  */
1735       0,                        /* neg.  */
1736       0,                        /* compare.  */
1737       0,                        /* widen.  */
1738       0,                        /* narrow.  */
1739       0,                        /* toint.  */
1740       0,                        /* fromint.  */
1741       0                         /* roundint.  */
1742     },
1743     /* FP DFmode */
1744     {
1745       COSTS_N_INSNS (15),       /* div.  */
1746       COSTS_N_INSNS (5),        /* mult.  */
1747       COSTS_N_INSNS (7),        /* mult_addsub.  */
1748       COSTS_N_INSNS (7),        /* fma.  */
1749       COSTS_N_INSNS (3),        /* addsub.  */
1750       0,                        /* fpconst.  */
1751       0,                        /* neg.  */
1752       0,                        /* compare.  */
1753       0,                        /* widen.  */
1754       0,                        /* narrow.  */
1755       0,                        /* toint.  */
1756       0,                        /* fromint.  */
1757       0                         /* roundint.  */
1758     }
1759   },
1760   /* Vector */
1761   {
1762     COSTS_N_INSNS (1)   /* alu.  */
1763   }
1764 };
1765
1766 const struct tune_params arm_slowmul_tune =
1767 {
1768   &generic_extra_costs,                 /* Insn extra costs.  */
1769   NULL,                                 /* Sched adj cost.  */
1770   arm_default_branch_cost,
1771   &arm_default_vec_cost,
1772   3,                                            /* Constant limit.  */
1773   5,                                            /* Max cond insns.  */
1774   8,                                            /* Memset max inline.  */
1775   1,                                            /* Issue rate.  */
1776   ARM_PREFETCH_NOT_BENEFICIAL,
1777   tune_params::PREF_CONST_POOL_TRUE,
1778   tune_params::PREF_LDRD_FALSE,
1779   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1780   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1781   tune_params::DISPARAGE_FLAGS_NEITHER,
1782   tune_params::PREF_NEON_64_FALSE,
1783   tune_params::PREF_NEON_STRINGOPS_FALSE,
1784   tune_params::FUSE_NOTHING,
1785   tune_params::SCHED_AUTOPREF_OFF
1786 };
1787
1788 const struct tune_params arm_fastmul_tune =
1789 {
1790   &generic_extra_costs,                 /* Insn extra costs.  */
1791   NULL,                                 /* Sched adj cost.  */
1792   arm_default_branch_cost,
1793   &arm_default_vec_cost,
1794   1,                                            /* Constant limit.  */
1795   5,                                            /* Max cond insns.  */
1796   8,                                            /* Memset max inline.  */
1797   1,                                            /* Issue rate.  */
1798   ARM_PREFETCH_NOT_BENEFICIAL,
1799   tune_params::PREF_CONST_POOL_TRUE,
1800   tune_params::PREF_LDRD_FALSE,
1801   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1802   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1803   tune_params::DISPARAGE_FLAGS_NEITHER,
1804   tune_params::PREF_NEON_64_FALSE,
1805   tune_params::PREF_NEON_STRINGOPS_FALSE,
1806   tune_params::FUSE_NOTHING,
1807   tune_params::SCHED_AUTOPREF_OFF
1808 };
1809
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1812
1813 const struct tune_params arm_strongarm_tune =
1814 {
1815   &generic_extra_costs,                 /* Insn extra costs.  */
1816   NULL,                                 /* Sched adj cost.  */
1817   arm_default_branch_cost,
1818   &arm_default_vec_cost,
1819   1,                                            /* Constant limit.  */
1820   3,                                            /* Max cond insns.  */
1821   8,                                            /* Memset max inline.  */
1822   1,                                            /* Issue rate.  */
1823   ARM_PREFETCH_NOT_BENEFICIAL,
1824   tune_params::PREF_CONST_POOL_TRUE,
1825   tune_params::PREF_LDRD_FALSE,
1826   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1827   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1828   tune_params::DISPARAGE_FLAGS_NEITHER,
1829   tune_params::PREF_NEON_64_FALSE,
1830   tune_params::PREF_NEON_STRINGOPS_FALSE,
1831   tune_params::FUSE_NOTHING,
1832   tune_params::SCHED_AUTOPREF_OFF
1833 };
1834
1835 const struct tune_params arm_xscale_tune =
1836 {
1837   &generic_extra_costs,                 /* Insn extra costs.  */
1838   xscale_sched_adjust_cost,
1839   arm_default_branch_cost,
1840   &arm_default_vec_cost,
1841   2,                                            /* Constant limit.  */
1842   3,                                            /* Max cond insns.  */
1843   8,                                            /* Memset max inline.  */
1844   1,                                            /* Issue rate.  */
1845   ARM_PREFETCH_NOT_BENEFICIAL,
1846   tune_params::PREF_CONST_POOL_TRUE,
1847   tune_params::PREF_LDRD_FALSE,
1848   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1849   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1850   tune_params::DISPARAGE_FLAGS_NEITHER,
1851   tune_params::PREF_NEON_64_FALSE,
1852   tune_params::PREF_NEON_STRINGOPS_FALSE,
1853   tune_params::FUSE_NOTHING,
1854   tune_params::SCHED_AUTOPREF_OFF
1855 };
1856
1857 const struct tune_params arm_9e_tune =
1858 {
1859   &generic_extra_costs,                 /* Insn extra costs.  */
1860   NULL,                                 /* Sched adj cost.  */
1861   arm_default_branch_cost,
1862   &arm_default_vec_cost,
1863   1,                                            /* Constant limit.  */
1864   5,                                            /* Max cond insns.  */
1865   8,                                            /* Memset max inline.  */
1866   1,                                            /* Issue rate.  */
1867   ARM_PREFETCH_NOT_BENEFICIAL,
1868   tune_params::PREF_CONST_POOL_TRUE,
1869   tune_params::PREF_LDRD_FALSE,
1870   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1871   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1872   tune_params::DISPARAGE_FLAGS_NEITHER,
1873   tune_params::PREF_NEON_64_FALSE,
1874   tune_params::PREF_NEON_STRINGOPS_FALSE,
1875   tune_params::FUSE_NOTHING,
1876   tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_marvell_pj4_tune =
1880 {
1881   &generic_extra_costs,                 /* Insn extra costs.  */
1882   NULL,                                 /* Sched adj cost.  */
1883   arm_default_branch_cost,
1884   &arm_default_vec_cost,
1885   1,                                            /* Constant limit.  */
1886   5,                                            /* Max cond insns.  */
1887   8,                                            /* Memset max inline.  */
1888   2,                                            /* Issue rate.  */
1889   ARM_PREFETCH_NOT_BENEFICIAL,
1890   tune_params::PREF_CONST_POOL_TRUE,
1891   tune_params::PREF_LDRD_FALSE,
1892   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1893   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1894   tune_params::DISPARAGE_FLAGS_NEITHER,
1895   tune_params::PREF_NEON_64_FALSE,
1896   tune_params::PREF_NEON_STRINGOPS_FALSE,
1897   tune_params::FUSE_NOTHING,
1898   tune_params::SCHED_AUTOPREF_OFF
1899 };
1900
1901 const struct tune_params arm_v6t2_tune =
1902 {
1903   &generic_extra_costs,                 /* Insn extra costs.  */
1904   NULL,                                 /* Sched adj cost.  */
1905   arm_default_branch_cost,
1906   &arm_default_vec_cost,
1907   1,                                            /* Constant limit.  */
1908   5,                                            /* Max cond insns.  */
1909   8,                                            /* Memset max inline.  */
1910   1,                                            /* Issue rate.  */
1911   ARM_PREFETCH_NOT_BENEFICIAL,
1912   tune_params::PREF_CONST_POOL_FALSE,
1913   tune_params::PREF_LDRD_FALSE,
1914   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1915   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1916   tune_params::DISPARAGE_FLAGS_NEITHER,
1917   tune_params::PREF_NEON_64_FALSE,
1918   tune_params::PREF_NEON_STRINGOPS_FALSE,
1919   tune_params::FUSE_NOTHING,
1920   tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923
1924 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1925 const struct tune_params arm_cortex_tune =
1926 {
1927   &generic_extra_costs,
1928   NULL,                                 /* Sched adj cost.  */
1929   arm_default_branch_cost,
1930   &arm_default_vec_cost,
1931   1,                                            /* Constant limit.  */
1932   5,                                            /* Max cond insns.  */
1933   8,                                            /* Memset max inline.  */
1934   2,                                            /* Issue rate.  */
1935   ARM_PREFETCH_NOT_BENEFICIAL,
1936   tune_params::PREF_CONST_POOL_FALSE,
1937   tune_params::PREF_LDRD_FALSE,
1938   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1939   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1940   tune_params::DISPARAGE_FLAGS_NEITHER,
1941   tune_params::PREF_NEON_64_FALSE,
1942   tune_params::PREF_NEON_STRINGOPS_FALSE,
1943   tune_params::FUSE_NOTHING,
1944   tune_params::SCHED_AUTOPREF_OFF
1945 };
1946
1947 const struct tune_params arm_cortex_a8_tune =
1948 {
1949   &cortexa8_extra_costs,
1950   NULL,                                 /* Sched adj cost.  */
1951   arm_default_branch_cost,
1952   &arm_default_vec_cost,
1953   1,                                            /* Constant limit.  */
1954   5,                                            /* Max cond insns.  */
1955   8,                                            /* Memset max inline.  */
1956   2,                                            /* Issue rate.  */
1957   ARM_PREFETCH_NOT_BENEFICIAL,
1958   tune_params::PREF_CONST_POOL_FALSE,
1959   tune_params::PREF_LDRD_FALSE,
1960   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1961   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1962   tune_params::DISPARAGE_FLAGS_NEITHER,
1963   tune_params::PREF_NEON_64_FALSE,
1964   tune_params::PREF_NEON_STRINGOPS_TRUE,
1965   tune_params::FUSE_NOTHING,
1966   tune_params::SCHED_AUTOPREF_OFF
1967 };
1968
1969 const struct tune_params arm_cortex_a7_tune =
1970 {
1971   &cortexa7_extra_costs,
1972   NULL,                                 /* Sched adj cost.  */
1973   arm_default_branch_cost,
1974   &arm_default_vec_cost,
1975   1,                                            /* Constant limit.  */
1976   5,                                            /* Max cond insns.  */
1977   8,                                            /* Memset max inline.  */
1978   2,                                            /* Issue rate.  */
1979   ARM_PREFETCH_NOT_BENEFICIAL,
1980   tune_params::PREF_CONST_POOL_FALSE,
1981   tune_params::PREF_LDRD_FALSE,
1982   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1983   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1984   tune_params::DISPARAGE_FLAGS_NEITHER,
1985   tune_params::PREF_NEON_64_FALSE,
1986   tune_params::PREF_NEON_STRINGOPS_TRUE,
1987   tune_params::FUSE_NOTHING,
1988   tune_params::SCHED_AUTOPREF_OFF
1989 };
1990
1991 const struct tune_params arm_cortex_a15_tune =
1992 {
1993   &cortexa15_extra_costs,
1994   NULL,                                 /* Sched adj cost.  */
1995   arm_default_branch_cost,
1996   &arm_default_vec_cost,
1997   1,                                            /* Constant limit.  */
1998   2,                                            /* Max cond insns.  */
1999   8,                                            /* Memset max inline.  */
2000   3,                                            /* Issue rate.  */
2001   ARM_PREFETCH_NOT_BENEFICIAL,
2002   tune_params::PREF_CONST_POOL_FALSE,
2003   tune_params::PREF_LDRD_TRUE,
2004   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2005   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2006   tune_params::DISPARAGE_FLAGS_ALL,
2007   tune_params::PREF_NEON_64_FALSE,
2008   tune_params::PREF_NEON_STRINGOPS_TRUE,
2009   tune_params::FUSE_NOTHING,
2010   tune_params::SCHED_AUTOPREF_FULL
2011 };
2012
2013 const struct tune_params arm_cortex_a35_tune =
2014 {
2015   &cortexa53_extra_costs,
2016   NULL,                                 /* Sched adj cost.  */
2017   arm_default_branch_cost,
2018   &arm_default_vec_cost,
2019   1,                                            /* Constant limit.  */
2020   5,                                            /* Max cond insns.  */
2021   8,                                            /* Memset max inline.  */
2022   1,                                            /* Issue rate.  */
2023   ARM_PREFETCH_NOT_BENEFICIAL,
2024   tune_params::PREF_CONST_POOL_FALSE,
2025   tune_params::PREF_LDRD_FALSE,
2026   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2027   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2028   tune_params::DISPARAGE_FLAGS_NEITHER,
2029   tune_params::PREF_NEON_64_FALSE,
2030   tune_params::PREF_NEON_STRINGOPS_TRUE,
2031   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032   tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a53_tune =
2036 {
2037   &cortexa53_extra_costs,
2038   NULL,                                 /* Sched adj cost.  */
2039   arm_default_branch_cost,
2040   &arm_default_vec_cost,
2041   1,                                            /* Constant limit.  */
2042   5,                                            /* Max cond insns.  */
2043   8,                                            /* Memset max inline.  */
2044   2,                                            /* Issue rate.  */
2045   ARM_PREFETCH_NOT_BENEFICIAL,
2046   tune_params::PREF_CONST_POOL_FALSE,
2047   tune_params::PREF_LDRD_FALSE,
2048   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2050   tune_params::DISPARAGE_FLAGS_NEITHER,
2051   tune_params::PREF_NEON_64_FALSE,
2052   tune_params::PREF_NEON_STRINGOPS_TRUE,
2053   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054   tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a57_tune =
2058 {
2059   &cortexa57_extra_costs,
2060   NULL,                                 /* Sched adj cost.  */
2061   arm_default_branch_cost,
2062   &arm_default_vec_cost,
2063   1,                                            /* Constant limit.  */
2064   2,                                            /* Max cond insns.  */
2065   8,                                            /* Memset max inline.  */
2066   3,                                            /* Issue rate.  */
2067   ARM_PREFETCH_NOT_BENEFICIAL,
2068   tune_params::PREF_CONST_POOL_FALSE,
2069   tune_params::PREF_LDRD_TRUE,
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2072   tune_params::DISPARAGE_FLAGS_ALL,
2073   tune_params::PREF_NEON_64_FALSE,
2074   tune_params::PREF_NEON_STRINGOPS_TRUE,
2075   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076   tune_params::SCHED_AUTOPREF_FULL
2077 };
2078
2079 const struct tune_params arm_exynosm1_tune =
2080 {
2081   &exynosm1_extra_costs,
2082   NULL,                                         /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   2,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   3,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_TRUE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_ALL,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   tune_params::FUSE_NOTHING,
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_xgene1_tune =
2102 {
2103   &xgene1_extra_costs,
2104   NULL,                                 /* Sched adj cost.  */
2105   arm_default_branch_cost,
2106   &arm_default_vec_cost,
2107   1,                                            /* Constant limit.  */
2108   2,                                            /* Max cond insns.  */
2109   32,                                           /* Memset max inline.  */
2110   4,                                            /* Issue rate.  */
2111   ARM_PREFETCH_NOT_BENEFICIAL,
2112   tune_params::PREF_CONST_POOL_FALSE,
2113   tune_params::PREF_LDRD_TRUE,
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2116   tune_params::DISPARAGE_FLAGS_ALL,
2117   tune_params::PREF_NEON_64_FALSE,
2118   tune_params::PREF_NEON_STRINGOPS_FALSE,
2119   tune_params::FUSE_NOTHING,
2120   tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124    less appealing.  Set max_insns_skipped to a low value.  */
2125
2126 const struct tune_params arm_cortex_a5_tune =
2127 {
2128   &cortexa5_extra_costs,
2129   NULL,                                 /* Sched adj cost.  */
2130   arm_cortex_a5_branch_cost,
2131   &arm_default_vec_cost,
2132   1,                                            /* Constant limit.  */
2133   1,                                            /* Max cond insns.  */
2134   8,                                            /* Memset max inline.  */
2135   2,                                            /* Issue rate.  */
2136   ARM_PREFETCH_NOT_BENEFICIAL,
2137   tune_params::PREF_CONST_POOL_FALSE,
2138   tune_params::PREF_LDRD_FALSE,
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2140   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2141   tune_params::DISPARAGE_FLAGS_NEITHER,
2142   tune_params::PREF_NEON_64_FALSE,
2143   tune_params::PREF_NEON_STRINGOPS_TRUE,
2144   tune_params::FUSE_NOTHING,
2145   tune_params::SCHED_AUTOPREF_OFF
2146 };
2147
2148 const struct tune_params arm_cortex_a9_tune =
2149 {
2150   &cortexa9_extra_costs,
2151   cortex_a9_sched_adjust_cost,
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,                                            /* Constant limit.  */
2155   5,                                            /* Max cond insns.  */
2156   8,                                            /* Memset max inline.  */
2157   2,                                            /* Issue rate.  */
2158   ARM_PREFETCH_BENEFICIAL(4,32,32),
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_FALSE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_NEITHER,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 const struct tune_params arm_cortex_a12_tune =
2171 {
2172   &cortexa12_extra_costs,
2173   NULL,                                 /* Sched adj cost.  */
2174   arm_default_branch_cost,
2175   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2176   1,                                            /* Constant limit.  */
2177   2,                                            /* Max cond insns.  */
2178   8,                                            /* Memset max inline.  */
2179   2,                                            /* Issue rate.  */
2180   ARM_PREFETCH_NOT_BENEFICIAL,
2181   tune_params::PREF_CONST_POOL_FALSE,
2182   tune_params::PREF_LDRD_TRUE,
2183   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2184   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2185   tune_params::DISPARAGE_FLAGS_ALL,
2186   tune_params::PREF_NEON_64_FALSE,
2187   tune_params::PREF_NEON_STRINGOPS_TRUE,
2188   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189   tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a73_tune =
2193 {
2194   &cortexa57_extra_costs,
2195   NULL,                                         /* Sched adj cost.  */
2196   arm_default_branch_cost,
2197   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2198   1,                                            /* Constant limit.  */
2199   2,                                            /* Max cond insns.  */
2200   8,                                            /* Memset max inline.  */
2201   2,                                            /* Issue rate.  */
2202   ARM_PREFETCH_NOT_BENEFICIAL,
2203   tune_params::PREF_CONST_POOL_FALSE,
2204   tune_params::PREF_LDRD_TRUE,
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2206   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2207   tune_params::DISPARAGE_FLAGS_ALL,
2208   tune_params::PREF_NEON_64_FALSE,
2209   tune_params::PREF_NEON_STRINGOPS_TRUE,
2210   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211   tune_params::SCHED_AUTOPREF_FULL
2212 };
2213
2214 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2215    cycle to execute each.  An LDR from the constant pool also takes two cycles
2216    to execute, but mildly increases pipelining opportunity (consecutive
2217    loads/stores can be pipelined together, saving one cycle), and may also
2218    improve icache utilisation.  Hence we prefer the constant pool for such
2219    processors.  */
2220
2221 const struct tune_params arm_v7m_tune =
2222 {
2223   &v7m_extra_costs,
2224   NULL,                                 /* Sched adj cost.  */
2225   arm_cortex_m_branch_cost,
2226   &arm_default_vec_cost,
2227   1,                                            /* Constant limit.  */
2228   2,                                            /* Max cond insns.  */
2229   8,                                            /* Memset max inline.  */
2230   1,                                            /* Issue rate.  */
2231   ARM_PREFETCH_NOT_BENEFICIAL,
2232   tune_params::PREF_CONST_POOL_TRUE,
2233   tune_params::PREF_LDRD_FALSE,
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2235   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2236   tune_params::DISPARAGE_FLAGS_NEITHER,
2237   tune_params::PREF_NEON_64_FALSE,
2238   tune_params::PREF_NEON_STRINGOPS_FALSE,
2239   tune_params::FUSE_NOTHING,
2240   tune_params::SCHED_AUTOPREF_OFF
2241 };
2242
2243 /* Cortex-M7 tuning.  */
2244
2245 const struct tune_params arm_cortex_m7_tune =
2246 {
2247   &v7m_extra_costs,
2248   NULL,                                 /* Sched adj cost.  */
2249   arm_cortex_m7_branch_cost,
2250   &arm_default_vec_cost,
2251   0,                                            /* Constant limit.  */
2252   1,                                            /* Max cond insns.  */
2253   8,                                            /* Memset max inline.  */
2254   2,                                            /* Issue rate.  */
2255   ARM_PREFETCH_NOT_BENEFICIAL,
2256   tune_params::PREF_CONST_POOL_TRUE,
2257   tune_params::PREF_LDRD_FALSE,
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2259   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2260   tune_params::DISPARAGE_FLAGS_NEITHER,
2261   tune_params::PREF_NEON_64_FALSE,
2262   tune_params::PREF_NEON_STRINGOPS_FALSE,
2263   tune_params::FUSE_NOTHING,
2264   tune_params::SCHED_AUTOPREF_OFF
2265 };
2266
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269    cortex-m23.  */
2270 const struct tune_params arm_v6m_tune =
2271 {
2272   &generic_extra_costs,                 /* Insn extra costs.  */
2273   NULL,                                 /* Sched adj cost.  */
2274   arm_default_branch_cost,
2275   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2276   1,                                            /* Constant limit.  */
2277   5,                                            /* Max cond insns.  */
2278   8,                                            /* Memset max inline.  */
2279   1,                                            /* Issue rate.  */
2280   ARM_PREFETCH_NOT_BENEFICIAL,
2281   tune_params::PREF_CONST_POOL_FALSE,
2282   tune_params::PREF_LDRD_FALSE,
2283   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2285   tune_params::DISPARAGE_FLAGS_NEITHER,
2286   tune_params::PREF_NEON_64_FALSE,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_fa726te_tune =
2293 {
2294   &generic_extra_costs,                         /* Insn extra costs.  */
2295   fa726te_sched_adjust_cost,
2296   arm_default_branch_cost,
2297   &arm_default_vec_cost,
2298   1,                                            /* Constant limit.  */
2299   5,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* Auto-generated CPU, FPU and architecture tables.  */
2315 #include "arm-cpu-data.h"
2316
2317 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2318    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319    is thus chosen to be big enough to hold the longest architecture name.  */
2320
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2322
2323 /* Supported TLS relocations.  */
2324
2325 enum tls_reloc {
2326   TLS_GD32,
2327   TLS_LDM32,
2328   TLS_LDO32,
2329   TLS_IE32,
2330   TLS_LE32,
2331   TLS_DESCSEQ   /* GNU scheme */
2332 };
2333
2334 /* The maximum number of insns to be used when loading a constant.  */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2337 {
2338   return size_p ? 1 : current_tune->constant_limit;
2339 }
2340
2341 /* Emit an insn that's a simple single-set.  Both the operands must be known
2342    to be valid.  */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2345 {
2346   return emit_insn (gen_rtx_SET (x, y));
2347 }
2348
2349 /* Return the number of bits set in VALUE.  */
2350 static unsigned
2351 bit_count (unsigned long value)
2352 {
2353   unsigned long count = 0;
2354
2355   while (value)
2356     {
2357       count++;
2358       value &= value - 1;  /* Clear the least-significant set bit.  */
2359     }
2360
2361   return count;
2362 }
2363
2364 /* Return the number of bits set in BMAP.  */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2367 {
2368   unsigned int count = 0;
2369   unsigned int n = 0;
2370   sbitmap_iterator sbi;
2371
2372   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373     count++;
2374   return count;
2375 }
2376
2377 typedef struct
2378 {
2379   machine_mode mode;
2380   const char *name;
2381 } arm_fixed_mode_set;
2382
2383 /* A small helper for setting fixed-point library libfuncs.  */
2384
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387                              const char *funcname, const char *modename,
2388                              int num_suffix)
2389 {
2390   char buffer[50];
2391
2392   if (num_suffix == 0)
2393     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394   else
2395     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2396
2397   set_optab_libfunc (optable, mode, buffer);
2398 }
2399
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402                             machine_mode from, const char *funcname,
2403                             const char *toname, const char *fromname)
2404 {
2405   char buffer[50];
2406   const char *maybe_suffix_2 = "";
2407
2408   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2409   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412     maybe_suffix_2 = "2";
2413
2414   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415            maybe_suffix_2);
2416
2417   set_conv_libfunc (optable, to, from, buffer);
2418 }
2419
2420 /* Set up library functions unique to ARM.  */
2421
2422 static void
2423 arm_init_libfuncs (void)
2424 {
2425   /* For Linux, we have access to kernel support for atomic operations.  */
2426   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2428
2429   /* There are no special library functions unless we are using the
2430      ARM BPABI.  */
2431   if (!TARGET_BPABI)
2432     return;
2433
2434   /* The functions below are described in Section 4 of the "Run-Time
2435      ABI for the ARM architecture", Version 1.0.  */
2436
2437   /* Double-precision floating-point arithmetic.  Table 2.  */
2438   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2443
2444   /* Double-precision comparisons.  Table 3.  */
2445   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446   set_optab_libfunc (ne_optab, DFmode, NULL);
2447   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2452
2453   /* Single-precision floating-point arithmetic.  Table 4.  */
2454   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2459
2460   /* Single-precision comparisons.  Table 5.  */
2461   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462   set_optab_libfunc (ne_optab, SFmode, NULL);
2463   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2468
2469   /* Floating-point to integer conversions.  Table 6.  */
2470   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2478
2479   /* Conversions between floating types.  Table 7.  */
2480   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2482
2483   /* Integer to floating-point conversions.  Table 8.  */
2484   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2492
2493   /* Long long.  Table 9.  */
2494   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2502
2503   /* Integer (32/32->32) division.  \S 4.3.1.  */
2504   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2506
2507   /* The divmod functions are designed so that they can be used for
2508      plain division, even though they return both the quotient and the
2509      remainder.  The quotient is returned in the usual location (i.e.,
2510      r0 for SImode, {r0, r1} for DImode), just as would be expected
2511      for an ordinary division routine.  Because the AAPCS calling
2512      conventions specify that all of { r0, r1, r2, r3 } are
2513      callee-saved registers, there is no need to tell the compiler
2514      explicitly that those registers are clobbered by these
2515      routines.  */
2516   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2518
2519   /* For SImode division the ABI provides div-without-mod routines,
2520      which are faster.  */
2521   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2523
2524   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2525      divmod libcalls instead.  */
2526   set_optab_libfunc (smod_optab, DImode, NULL);
2527   set_optab_libfunc (umod_optab, DImode, NULL);
2528   set_optab_libfunc (smod_optab, SImode, NULL);
2529   set_optab_libfunc (umod_optab, SImode, NULL);
2530
2531   /* Half-precision float operations.  The compiler handles all operations
2532      with NULL libfuncs by converting the SFmode.  */
2533   switch (arm_fp16_format)
2534     {
2535     case ARM_FP16_FORMAT_IEEE:
2536     case ARM_FP16_FORMAT_ALTERNATIVE:
2537
2538       /* Conversions.  */
2539       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541                          ? "__gnu_f2h_ieee"
2542                          : "__gnu_f2h_alternative"));
2543       set_conv_libfunc (sext_optab, SFmode, HFmode,
2544                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545                          ? "__gnu_h2f_ieee"
2546                          : "__gnu_h2f_alternative"));
2547
2548       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550                          ? "__gnu_d2h_ieee"
2551                          : "__gnu_d2h_alternative"));
2552
2553       /* Arithmetic.  */
2554       set_optab_libfunc (add_optab, HFmode, NULL);
2555       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556       set_optab_libfunc (smul_optab, HFmode, NULL);
2557       set_optab_libfunc (neg_optab, HFmode, NULL);
2558       set_optab_libfunc (sub_optab, HFmode, NULL);
2559
2560       /* Comparisons.  */
2561       set_optab_libfunc (eq_optab, HFmode, NULL);
2562       set_optab_libfunc (ne_optab, HFmode, NULL);
2563       set_optab_libfunc (lt_optab, HFmode, NULL);
2564       set_optab_libfunc (le_optab, HFmode, NULL);
2565       set_optab_libfunc (ge_optab, HFmode, NULL);
2566       set_optab_libfunc (gt_optab, HFmode, NULL);
2567       set_optab_libfunc (unord_optab, HFmode, NULL);
2568       break;
2569
2570     default:
2571       break;
2572     }
2573
2574   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2575   {
2576     const arm_fixed_mode_set fixed_arith_modes[] =
2577       {
2578         { E_QQmode, "qq" },
2579         { E_UQQmode, "uqq" },
2580         { E_HQmode, "hq" },
2581         { E_UHQmode, "uhq" },
2582         { E_SQmode, "sq" },
2583         { E_USQmode, "usq" },
2584         { E_DQmode, "dq" },
2585         { E_UDQmode, "udq" },
2586         { E_TQmode, "tq" },
2587         { E_UTQmode, "utq" },
2588         { E_HAmode, "ha" },
2589         { E_UHAmode, "uha" },
2590         { E_SAmode, "sa" },
2591         { E_USAmode, "usa" },
2592         { E_DAmode, "da" },
2593         { E_UDAmode, "uda" },
2594         { E_TAmode, "ta" },
2595         { E_UTAmode, "uta" }
2596       };
2597     const arm_fixed_mode_set fixed_conv_modes[] =
2598       {
2599         { E_QQmode, "qq" },
2600         { E_UQQmode, "uqq" },
2601         { E_HQmode, "hq" },
2602         { E_UHQmode, "uhq" },
2603         { E_SQmode, "sq" },
2604         { E_USQmode, "usq" },
2605         { E_DQmode, "dq" },
2606         { E_UDQmode, "udq" },
2607         { E_TQmode, "tq" },
2608         { E_UTQmode, "utq" },
2609         { E_HAmode, "ha" },
2610         { E_UHAmode, "uha" },
2611         { E_SAmode, "sa" },
2612         { E_USAmode, "usa" },
2613         { E_DAmode, "da" },
2614         { E_UDAmode, "uda" },
2615         { E_TAmode, "ta" },
2616         { E_UTAmode, "uta" },
2617         { E_QImode, "qi" },
2618         { E_HImode, "hi" },
2619         { E_SImode, "si" },
2620         { E_DImode, "di" },
2621         { E_TImode, "ti" },
2622         { E_SFmode, "sf" },
2623         { E_DFmode, "df" }
2624       };
2625     unsigned int i, j;
2626
2627     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2628       {
2629         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630                                      "add", fixed_arith_modes[i].name, 3);
2631         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632                                      "ssadd", fixed_arith_modes[i].name, 3);
2633         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634                                      "usadd", fixed_arith_modes[i].name, 3);
2635         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636                                      "sub", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638                                      "sssub", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640                                      "ussub", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642                                      "mul", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644                                      "ssmul", fixed_arith_modes[i].name, 3);
2645         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646                                      "usmul", fixed_arith_modes[i].name, 3);
2647         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648                                      "div", fixed_arith_modes[i].name, 3);
2649         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650                                      "udiv", fixed_arith_modes[i].name, 3);
2651         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652                                      "ssdiv", fixed_arith_modes[i].name, 3);
2653         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654                                      "usdiv", fixed_arith_modes[i].name, 3);
2655         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656                                      "neg", fixed_arith_modes[i].name, 2);
2657         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658                                      "ssneg", fixed_arith_modes[i].name, 2);
2659         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660                                      "usneg", fixed_arith_modes[i].name, 2);
2661         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662                                      "ashl", fixed_arith_modes[i].name, 3);
2663         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664                                      "ashr", fixed_arith_modes[i].name, 3);
2665         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666                                      "lshr", fixed_arith_modes[i].name, 3);
2667         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668                                      "ssashl", fixed_arith_modes[i].name, 3);
2669         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670                                      "usashl", fixed_arith_modes[i].name, 3);
2671         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672                                      "cmp", fixed_arith_modes[i].name, 2);
2673       }
2674
2675     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2677         {
2678           if (i == j
2679               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681             continue;
2682
2683           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684                                       fixed_conv_modes[j].mode, "fract",
2685                                       fixed_conv_modes[i].name,
2686                                       fixed_conv_modes[j].name);
2687           arm_set_fixed_conv_libfunc (satfract_optab,
2688                                       fixed_conv_modes[i].mode,
2689                                       fixed_conv_modes[j].mode, "satfract",
2690                                       fixed_conv_modes[i].name,
2691                                       fixed_conv_modes[j].name);
2692           arm_set_fixed_conv_libfunc (fractuns_optab,
2693                                       fixed_conv_modes[i].mode,
2694                                       fixed_conv_modes[j].mode, "fractuns",
2695                                       fixed_conv_modes[i].name,
2696                                       fixed_conv_modes[j].name);
2697           arm_set_fixed_conv_libfunc (satfractuns_optab,
2698                                       fixed_conv_modes[i].mode,
2699                                       fixed_conv_modes[j].mode, "satfractuns",
2700                                       fixed_conv_modes[i].name,
2701                                       fixed_conv_modes[j].name);
2702         }
2703   }
2704
2705   if (TARGET_AAPCS_BASED)
2706     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2707 }
2708
2709 /* On AAPCS systems, this is the "struct __va_list".  */
2710 static GTY(()) tree va_list_type;
2711
2712 /* Return the type to use as __builtin_va_list.  */
2713 static tree
2714 arm_build_builtin_va_list (void)
2715 {
2716   tree va_list_name;
2717   tree ap_field;
2718
2719   if (!TARGET_AAPCS_BASED)
2720     return std_build_builtin_va_list ();
2721
2722   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723      defined as:
2724
2725        struct __va_list
2726        {
2727          void *__ap;
2728        };
2729
2730      The C Library ABI further reinforces this definition in \S
2731      4.1.
2732
2733      We must follow this definition exactly.  The structure tag
2734      name is visible in C++ mangled names, and thus forms a part
2735      of the ABI.  The field name may be used by people who
2736      #include <stdarg.h>.  */
2737   /* Create the type.  */
2738   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739   /* Give it the required name.  */
2740   va_list_name = build_decl (BUILTINS_LOCATION,
2741                              TYPE_DECL,
2742                              get_identifier ("__va_list"),
2743                              va_list_type);
2744   DECL_ARTIFICIAL (va_list_name) = 1;
2745   TYPE_NAME (va_list_type) = va_list_name;
2746   TYPE_STUB_DECL (va_list_type) = va_list_name;
2747   /* Create the __ap field.  */
2748   ap_field = build_decl (BUILTINS_LOCATION,
2749                          FIELD_DECL,
2750                          get_identifier ("__ap"),
2751                          ptr_type_node);
2752   DECL_ARTIFICIAL (ap_field) = 1;
2753   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754   TYPE_FIELDS (va_list_type) = ap_field;
2755   /* Compute its layout.  */
2756   layout_type (va_list_type);
2757
2758   return va_list_type;
2759 }
2760
2761 /* Return an expression of type "void *" pointing to the next
2762    available argument in a variable-argument list.  VALIST is the
2763    user-level va_list object, of type __builtin_va_list.  */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2766 {
2767   if (TREE_TYPE (valist) == error_mark_node)
2768     return error_mark_node;
2769
2770   /* On an AAPCS target, the pointer is stored within "struct
2771      va_list".  */
2772   if (TARGET_AAPCS_BASED)
2773     {
2774       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776                        valist, ap_field, NULL_TREE);
2777     }
2778
2779   return valist;
2780 }
2781
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2785 {
2786   valist = arm_extract_valist_ptr (valist);
2787   std_expand_builtin_va_start (valist, nextarg);
2788 }
2789
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793                           gimple_seq *post_p)
2794 {
2795   valist = arm_extract_valist_ptr (valist);
2796   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2797 }
2798
2799 /* Check any incompatible options that the user has specified.  */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2802 {
2803   int flags = opts->x_target_flags;
2804
2805   /* iWMMXt and NEON are incompatible.  */
2806   if (TARGET_IWMMXT
2807       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808     error ("iWMMXt and NEON are incompatible");
2809
2810   /* Make sure that the processor choice does not conflict with any of the
2811      other command line choices.  */
2812   if (TARGET_ARM_P (flags)
2813       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814     error ("target CPU does not support ARM mode");
2815
2816   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2817   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2819
2820   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2822
2823   /* If this target is normally configured to use APCS frames, warn if they
2824      are turned off and debugging is turned on.  */
2825   if (TARGET_ARM_P (flags)
2826       && write_symbols != NO_DEBUG
2827       && !TARGET_APCS_FRAME
2828       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2830
2831   /* iWMMXt unsupported under Thumb mode.  */
2832   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833     error ("iWMMXt unsupported under Thumb mode");
2834
2835   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836     error ("can not use -mtp=cp15 with 16-bit Thumb");
2837
2838   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2839     {
2840       error ("RTP PIC is incompatible with Thumb");
2841       flag_pic = 0;
2842     }
2843
2844   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845      with MOVT.  */
2846   if ((target_pure_code || target_slow_flash_data)
2847       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2848     {
2849       const char *flag = (target_pure_code ? "-mpure-code" :
2850                                              "-mslow-flash-data");
2851       error ("%s only supports non-pic code on M-profile targets with the "
2852              "MOVT instruction", flag);
2853     }
2854
2855 }
2856
2857 /* Recompute the global settings depending on target attribute options.  */
2858
2859 static void
2860 arm_option_params_internal (void)
2861 {
2862   /* If we are not using the default (ARM mode) section anchor offset
2863      ranges, then set the correct ranges now.  */
2864   if (TARGET_THUMB1)
2865     {
2866       /* Thumb-1 LDR instructions cannot have negative offsets.
2867          Permissible positive offset ranges are 5-bit (for byte loads),
2868          6-bit (for halfword loads), or 7-bit (for word loads).
2869          Empirical results suggest a 7-bit anchor range gives the best
2870          overall code size.  */
2871       targetm.min_anchor_offset = 0;
2872       targetm.max_anchor_offset = 127;
2873     }
2874   else if (TARGET_THUMB2)
2875     {
2876       /* The minimum is set such that the total size of the block
2877          for a particular anchor is 248 + 1 + 4095 bytes, which is
2878          divisible by eight, ensuring natural spacing of anchors.  */
2879       targetm.min_anchor_offset = -248;
2880       targetm.max_anchor_offset = 4095;
2881     }
2882   else
2883     {
2884       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2886     }
2887
2888   /* Increase the number of conditional instructions with -Os.  */
2889   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2890
2891   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2892   if (TARGET_THUMB2)
2893     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897    mode, false if it should next add an attribute for the opposite mode.  */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override.  */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906   if (opts->x_align_functions <= 0)
2907     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908       && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change.  */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916   arm_configure_build_target (&arm_active_target,
2917                               TREE_TARGET_OPTION (target_option_default_node),
2918                               &global_options_set, false);
2919
2920   arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 /* Implement TARGET_OPTION_SAVE.  */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2926 {
2927   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2930 }
2931
2932 /* Implement TARGET_OPTION_RESTORE.  */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2935 {
2936   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940                               false);
2941 }
2942
2943 /* Reset options between modes that the user has specified.  */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946                               struct gcc_options *opts_set)
2947 {
2948   arm_override_options_after_change_1 (opts);
2949
2950   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2951     {
2952       /* The default is to enable interworking, so this warning message would
2953          be confusing to users who have just compiled with, eg, -march=armv3.  */
2954       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955       opts->x_target_flags &= ~MASK_INTERWORK;
2956     }
2957
2958   if (TARGET_THUMB_P (opts->x_target_flags)
2959       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2960     {
2961       warning (0, "target CPU does not support THUMB instructions");
2962       opts->x_target_flags &= ~MASK_THUMB;
2963     }
2964
2965   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2966     {
2967       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968       opts->x_target_flags &= ~MASK_APCS_FRAME;
2969     }
2970
2971   /* Callee super interworking implies thumb interworking.  Adding
2972      this to the flags here simplifies the logic elsewhere.  */
2973   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974     opts->x_target_flags |= MASK_INTERWORK;
2975
2976   /* need to remember initial values so combinaisons of options like
2977      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2978   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2979
2980   if (! opts_set->x_arm_restrict_it)
2981     opts->x_arm_restrict_it = arm_arch8;
2982
2983   /* ARM execution state and M profile don't have [restrict] IT.  */
2984   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985     opts->x_arm_restrict_it = 0;
2986
2987   /* Enable -munaligned-access by default for
2988      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989      i.e. Thumb2 and ARM state only.
2990      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991      - ARMv8 architecture-base processors.
2992
2993      Disable -munaligned-access by default for
2994      - all pre-ARMv6 architecture-based processors
2995      - ARMv6-M architecture-based processors
2996      - ARMv8-M Baseline processors.  */
2997
2998   if (! opts_set->x_unaligned_access)
2999     {
3000       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3002     }
3003   else if (opts->x_unaligned_access == 1
3004            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3005     {
3006       warning (0, "target CPU does not support unaligned accesses");
3007      opts->x_unaligned_access = 0;
3008     }
3009
3010   /* Don't warn since it's on by default in -O2.  */
3011   if (TARGET_THUMB1_P (opts->x_target_flags))
3012     opts->x_flag_schedule_insns = 0;
3013   else
3014     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3015
3016   /* Disable shrink-wrap when optimizing function for size, since it tends to
3017      generate additional returns.  */
3018   if (optimize_function_for_size_p (cfun)
3019       && TARGET_THUMB2_P (opts->x_target_flags))
3020     opts->x_flag_shrink_wrap = false;
3021   else
3022     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3023
3024   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025      - epilogue_insns - does not accurately model the corresponding insns
3026      emitted in the asm file.  In particular, see the comment in thumb_exit
3027      'Find out how many of the (return) argument registers we can corrupt'.
3028      As a consequence, the epilogue may clobber registers without fipa-ra
3029      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3030      TODO: Accurately model clobbers for epilogue_insns and reenable
3031      fipa-ra.  */
3032   if (TARGET_THUMB1_P (opts->x_target_flags))
3033     opts->x_flag_ipa_ra = 0;
3034   else
3035     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3036
3037   /* Thumb2 inline assembly code should always use unified syntax.
3038      This will apply to ARM and Thumb1 eventually.  */
3039   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3040
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3044 }
3045
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3048
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051    architecture have been specified, but the two are not identical.  */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054                             struct cl_target_option *opts,
3055                             struct gcc_options *opts_set,
3056                             bool warn_compatible)
3057 {
3058   const cpu_option *arm_selected_tune = NULL;
3059   const arch_option *arm_selected_arch = NULL;
3060   const cpu_option *arm_selected_cpu = NULL;
3061   const arm_fpu_desc *arm_selected_fpu = NULL;
3062   const char *tune_opts = NULL;
3063   const char *arch_opts = NULL;
3064   const char *cpu_opts = NULL;
3065
3066   bitmap_clear (target->isa);
3067   target->core_name = NULL;
3068   target->arch_name = NULL;
3069
3070   if (opts_set->x_arm_arch_string)
3071     {
3072       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073                                                       "-march",
3074                                                       opts->x_arm_arch_string);
3075       arch_opts = strchr (opts->x_arm_arch_string, '+');
3076     }
3077
3078   if (opts_set->x_arm_cpu_string)
3079     {
3080       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081                                                     opts->x_arm_cpu_string);
3082       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083       arm_selected_tune = arm_selected_cpu;
3084       /* If taking the tuning from -mcpu, we don't need to rescan the
3085          options for tuning.  */
3086     }
3087
3088   if (opts_set->x_arm_tune_string)
3089     {
3090       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091                                                      opts->x_arm_tune_string);
3092       tune_opts = strchr (opts->x_arm_tune_string, '+');
3093     }
3094
3095   if (arm_selected_arch)
3096     {
3097       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099                                  arch_opts);
3100
3101       if (arm_selected_cpu)
3102         {
3103           auto_sbitmap cpu_isa (isa_num_bits);
3104           auto_sbitmap isa_delta (isa_num_bits);
3105
3106           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108                                      cpu_opts);
3109           bitmap_xor (isa_delta, cpu_isa, target->isa);
3110           /* Ignore any bits that are quirk bits.  */
3111           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112           /* Ignore (for now) any bits that might be set by -mfpu.  */
3113           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3114
3115           if (!bitmap_empty_p (isa_delta))
3116             {
3117               if (warn_compatible)
3118                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119                          arm_selected_cpu->common.name,
3120                          arm_selected_arch->common.name);
3121               /* -march wins for code generation.
3122                  -mcpu wins for default tuning.  */
3123               if (!arm_selected_tune)
3124                 arm_selected_tune = arm_selected_cpu;
3125
3126               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127               target->arch_name = arm_selected_arch->common.name;
3128             }
3129           else
3130             {
3131               /* Architecture and CPU are essentially the same.
3132                  Prefer the CPU setting.  */
3133               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134               target->core_name = arm_selected_cpu->common.name;
3135               /* Copy the CPU's capabilities, so that we inherit the
3136                  appropriate extensions and quirks.  */
3137               bitmap_copy (target->isa, cpu_isa);
3138             }
3139         }
3140       else
3141         {
3142           /* Pick a CPU based on the architecture.  */
3143           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144           target->arch_name = arm_selected_arch->common.name;
3145           /* Note: target->core_name is left unset in this path.  */
3146         }
3147     }
3148   else if (arm_selected_cpu)
3149     {
3150       target->core_name = arm_selected_cpu->common.name;
3151       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153                                  cpu_opts);
3154       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3155     }
3156   /* If the user did not specify a processor or architecture, choose
3157      one for them.  */
3158   else
3159     {
3160       const cpu_option *sel;
3161       auto_sbitmap sought_isa (isa_num_bits);
3162       bitmap_clear (sought_isa);
3163       auto_sbitmap default_isa (isa_num_bits);
3164
3165       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166                                                     TARGET_CPU_DEFAULT);
3167       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168       gcc_assert (arm_selected_cpu->common.name);
3169
3170       /* RWE: All of the selection logic below (to the end of this
3171          'if' clause) looks somewhat suspect.  It appears to be mostly
3172          there to support forcing thumb support when the default CPU
3173          does not have thumb (somewhat dubious in terms of what the
3174          user might be expecting).  I think it should be removed once
3175          support for the pre-thumb era cores is removed.  */
3176       sel = arm_selected_cpu;
3177       arm_initialize_isa (default_isa, sel->common.isa_bits);
3178       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179                                  cpu_opts);
3180
3181       /* Now check to see if the user has specified any command line
3182          switches that require certain abilities from the cpu.  */
3183
3184       if (TARGET_INTERWORK || TARGET_THUMB)
3185         {
3186           bitmap_set_bit (sought_isa, isa_bit_thumb);
3187           bitmap_set_bit (sought_isa, isa_bit_mode32);
3188
3189           /* There are no ARM processors that support both APCS-26 and
3190              interworking.  Therefore we forcibly remove MODE26 from
3191              from the isa features here (if it was set), so that the
3192              search below will always be able to find a compatible
3193              processor.  */
3194           bitmap_clear_bit (default_isa, isa_bit_mode26);
3195         }
3196
3197       /* If there are such requirements and the default CPU does not
3198          satisfy them, we need to run over the complete list of
3199          cores looking for one that is satisfactory.  */
3200       if (!bitmap_empty_p (sought_isa)
3201           && !bitmap_subset_p (sought_isa, default_isa))
3202         {
3203           auto_sbitmap candidate_isa (isa_num_bits);
3204           /* We're only interested in a CPU with at least the
3205              capabilities of the default CPU and the required
3206              additional features.  */
3207           bitmap_ior (default_isa, default_isa, sought_isa);
3208
3209           /* Try to locate a CPU type that supports all of the abilities
3210              of the default CPU, plus the extra abilities requested by
3211              the user.  */
3212           for (sel = all_cores; sel->common.name != NULL; sel++)
3213             {
3214               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215               /* An exact match?  */
3216               if (bitmap_equal_p (default_isa, candidate_isa))
3217                 break;
3218             }
3219
3220           if (sel->common.name == NULL)
3221             {
3222               unsigned current_bit_count = isa_num_bits;
3223               const cpu_option *best_fit = NULL;
3224
3225               /* Ideally we would like to issue an error message here
3226                  saying that it was not possible to find a CPU compatible
3227                  with the default CPU, but which also supports the command
3228                  line options specified by the programmer, and so they
3229                  ought to use the -mcpu=<name> command line option to
3230                  override the default CPU type.
3231
3232                  If we cannot find a CPU that has exactly the
3233                  characteristics of the default CPU and the given
3234                  command line options we scan the array again looking
3235                  for a best match.  The best match must have at least
3236                  the capabilities of the perfect match.  */
3237               for (sel = all_cores; sel->common.name != NULL; sel++)
3238                 {
3239                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3240
3241                   if (bitmap_subset_p (default_isa, candidate_isa))
3242                     {
3243                       unsigned count;
3244
3245                       bitmap_and_compl (candidate_isa, candidate_isa,
3246                                         default_isa);
3247                       count = bitmap_popcount (candidate_isa);
3248
3249                       if (count < current_bit_count)
3250                         {
3251                           best_fit = sel;
3252                           current_bit_count = count;
3253                         }
3254                     }
3255
3256                   gcc_assert (best_fit);
3257                   sel = best_fit;
3258                 }
3259             }
3260           arm_selected_cpu = sel;
3261         }
3262
3263       /* Now we know the CPU, we can finally initialize the target
3264          structure.  */
3265       target->core_name = arm_selected_cpu->common.name;
3266       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268                                  cpu_opts);
3269       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3270     }
3271
3272   gcc_assert (arm_selected_cpu);
3273   gcc_assert (arm_selected_arch);
3274
3275   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3276     {
3277       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278       auto_sbitmap fpu_bits (isa_num_bits);
3279
3280       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282       bitmap_ior (target->isa, target->isa, fpu_bits);
3283     }
3284
3285   if (!arm_selected_tune)
3286     arm_selected_tune = arm_selected_cpu;
3287   else /* Validate the features passed to -mtune.  */
3288     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3289
3290   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3291
3292   /* Finish initializing the target structure.  */
3293   target->arch_pp_name = arm_selected_arch->arch;
3294   target->base_arch = arm_selected_arch->base_arch;
3295   target->profile = arm_selected_arch->profile;
3296
3297   target->tune_flags = tune_data->tune_flags;
3298   target->tune = tune_data->tune;
3299   target->tune_core = tune_data->scheduler;
3300 }
3301
3302 /* Fix up any incompatible options that the user has specified.  */
3303 static void
3304 arm_option_override (void)
3305 {
3306   static const enum isa_feature fpu_bitlist[]
3307     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309   cl_target_option opts;
3310
3311   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3313
3314   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3316
3317   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3318
3319   if (!global_options_set.x_arm_fpu_index)
3320     {
3321       bool ok;
3322       int fpu_index;
3323
3324       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325                                   CL_TARGET);
3326       gcc_assert (ok);
3327       arm_fpu_index = (enum fpu_type) fpu_index;
3328     }
3329
3330   cl_target_option_save (&opts, &global_options);
3331   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332                               true);
3333
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335   SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3337
3338   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3339   arm_base_arch = arm_active_target.base_arch;
3340
3341   arm_tune = arm_active_target.tune_core;
3342   tune_flags = arm_active_target.tune_flags;
3343   current_tune = arm_active_target.tune;
3344
3345   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3346   if (TARGET_APCS_FRAME)
3347     flag_shrink_wrap = false;
3348
3349   /* BPABI targets use linker tricks to allow interworking on cores
3350      without thumb support.  */
3351   if (TARGET_INTERWORK
3352       && !TARGET_BPABI
3353       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3354     {
3355       warning (0, "target CPU does not support interworking" );
3356       target_flags &= ~MASK_INTERWORK;
3357     }
3358
3359   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3360     {
3361       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3362       target_flags |= MASK_APCS_FRAME;
3363     }
3364
3365   if (TARGET_POKE_FUNCTION_NAME)
3366     target_flags |= MASK_APCS_FRAME;
3367
3368   if (TARGET_APCS_REENT && flag_pic)
3369     error ("-fpic and -mapcs-reent are incompatible");
3370
3371   if (TARGET_APCS_REENT)
3372     warning (0, "APCS reentrant code not supported.  Ignored");
3373
3374   /* Initialize boolean versions of the architectural flags, for use
3375      in the arm.md file.  */
3376   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3377   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3378   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3380   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3381   arm_arch5te = arm_arch5e
3382     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3383   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3384   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3385   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3386   arm_arch6m = arm_arch6 && !arm_arch_notm;
3387   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3388   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3389   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3390   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3391   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3392   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3393   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3394   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3395   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3396   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3397   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3398   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3399   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3400   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3401   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3402   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3403   if (arm_fp16_inst)
3404     {
3405       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3406         error ("selected fp16 options are incompatible");
3407       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3408     }
3409
3410
3411   /* Set up some tuning parameters.  */
3412   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3413   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3414   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3415   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3416   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3417   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3418
3419   /* And finally, set up some quirks.  */
3420   arm_arch_no_volatile_ce
3421     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3422   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3423                                             isa_bit_quirk_armv6kz);
3424
3425   /* V5 code we generate is completely interworking capable, so we turn off
3426      TARGET_INTERWORK here to avoid many tests later on.  */
3427
3428   /* XXX However, we must pass the right pre-processor defines to CPP
3429      or GLD can get confused.  This is a hack.  */
3430   if (TARGET_INTERWORK)
3431     arm_cpp_interwork = 1;
3432
3433   if (arm_arch5)
3434     target_flags &= ~MASK_INTERWORK;
3435
3436   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3437     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3438
3439   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3440     error ("iwmmxt abi requires an iwmmxt capable cpu");
3441
3442   /* If soft-float is specified then don't use FPU.  */
3443   if (TARGET_SOFT_FLOAT)
3444     arm_fpu_attr = FPU_NONE;
3445   else
3446     arm_fpu_attr = FPU_VFP;
3447
3448   if (TARGET_AAPCS_BASED)
3449     {
3450       if (TARGET_CALLER_INTERWORKING)
3451         error ("AAPCS does not support -mcaller-super-interworking");
3452       else
3453         if (TARGET_CALLEE_INTERWORKING)
3454           error ("AAPCS does not support -mcallee-super-interworking");
3455     }
3456
3457   /* __fp16 support currently assumes the core has ldrh.  */
3458   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3459     sorry ("__fp16 and no ldrh");
3460
3461   if (TARGET_AAPCS_BASED)
3462     {
3463       if (arm_abi == ARM_ABI_IWMMXT)
3464         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3465       else if (TARGET_HARD_FLOAT_ABI)
3466         {
3467           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3468           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3469             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3470         }
3471       else
3472         arm_pcs_default = ARM_PCS_AAPCS;
3473     }
3474   else
3475     {
3476       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3477         sorry ("-mfloat-abi=hard and VFP");
3478
3479       if (arm_abi == ARM_ABI_APCS)
3480         arm_pcs_default = ARM_PCS_APCS;
3481       else
3482         arm_pcs_default = ARM_PCS_ATPCS;
3483     }
3484
3485   /* For arm2/3 there is no need to do any scheduling if we are doing
3486      software floating-point.  */
3487   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3488     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3489
3490   /* Use the cp15 method if it is available.  */
3491   if (target_thread_pointer == TP_AUTO)
3492     {
3493       if (arm_arch6k && !TARGET_THUMB1)
3494         target_thread_pointer = TP_CP15;
3495       else
3496         target_thread_pointer = TP_SOFT;
3497     }
3498
3499   /* Override the default structure alignment for AAPCS ABI.  */
3500   if (!global_options_set.x_arm_structure_size_boundary)
3501     {
3502       if (TARGET_AAPCS_BASED)
3503         arm_structure_size_boundary = 8;
3504     }
3505   else
3506     {
3507       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3508
3509       if (arm_structure_size_boundary != 8
3510           && arm_structure_size_boundary != 32
3511           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3512         {
3513           if (ARM_DOUBLEWORD_ALIGN)
3514             warning (0,
3515                      "structure size boundary can only be set to 8, 32 or 64");
3516           else
3517             warning (0, "structure size boundary can only be set to 8 or 32");
3518           arm_structure_size_boundary
3519             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3520         }
3521     }
3522
3523   if (TARGET_VXWORKS_RTP)
3524     {
3525       if (!global_options_set.x_arm_pic_data_is_text_relative)
3526         arm_pic_data_is_text_relative = 0;
3527     }
3528   else if (flag_pic
3529            && !arm_pic_data_is_text_relative
3530            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3531     /* When text & data segments don't have a fixed displacement, the
3532        intended use is with a single, read only, pic base register.
3533        Unless the user explicitly requested not to do that, set
3534        it.  */
3535     target_flags |= MASK_SINGLE_PIC_BASE;
3536
3537   /* If stack checking is disabled, we can use r10 as the PIC register,
3538      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3539   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3540     {
3541       if (TARGET_VXWORKS_RTP)
3542         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3543       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3544     }
3545
3546   if (flag_pic && TARGET_VXWORKS_RTP)
3547     arm_pic_register = 9;
3548
3549   if (arm_pic_register_string != NULL)
3550     {
3551       int pic_register = decode_reg_name (arm_pic_register_string);
3552
3553       if (!flag_pic)
3554         warning (0, "-mpic-register= is useless without -fpic");
3555
3556       /* Prevent the user from choosing an obviously stupid PIC register.  */
3557       else if (pic_register < 0 || call_used_regs[pic_register]
3558                || pic_register == HARD_FRAME_POINTER_REGNUM
3559                || pic_register == STACK_POINTER_REGNUM
3560                || pic_register >= PC_REGNUM
3561                || (TARGET_VXWORKS_RTP
3562                    && (unsigned int) pic_register != arm_pic_register))
3563         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3564       else
3565         arm_pic_register = pic_register;
3566     }
3567
3568   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3569   if (fix_cm3_ldrd == 2)
3570     {
3571       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3572         fix_cm3_ldrd = 1;
3573       else
3574         fix_cm3_ldrd = 0;
3575     }
3576
3577   /* Hot/Cold partitioning is not currently supported, since we can't
3578      handle literal pool placement in that case.  */
3579   if (flag_reorder_blocks_and_partition)
3580     {
3581       inform (input_location,
3582               "-freorder-blocks-and-partition not supported on this architecture");
3583       flag_reorder_blocks_and_partition = 0;
3584       flag_reorder_blocks = 1;
3585     }
3586
3587   if (flag_pic)
3588     /* Hoisting PIC address calculations more aggressively provides a small,
3589        but measurable, size reduction for PIC code.  Therefore, we decrease
3590        the bar for unrestricted expression hoisting to the cost of PIC address
3591        calculation, which is 2 instructions.  */
3592     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3593                            global_options.x_param_values,
3594                            global_options_set.x_param_values);
3595
3596   /* ARM EABI defaults to strict volatile bitfields.  */
3597   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3598       && abi_version_at_least(2))
3599     flag_strict_volatile_bitfields = 1;
3600
3601   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3602      have deemed it beneficial (signified by setting
3603      prefetch.num_slots to 1 or more).  */
3604   if (flag_prefetch_loop_arrays < 0
3605       && HAVE_prefetch
3606       && optimize >= 3
3607       && current_tune->prefetch.num_slots > 0)
3608     flag_prefetch_loop_arrays = 1;
3609
3610   /* Set up parameters to be used in prefetching algorithm.  Do not
3611      override the defaults unless we are tuning for a core we have
3612      researched values for.  */
3613   if (current_tune->prefetch.num_slots > 0)
3614     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3615                            current_tune->prefetch.num_slots,
3616                            global_options.x_param_values,
3617                            global_options_set.x_param_values);
3618   if (current_tune->prefetch.l1_cache_line_size >= 0)
3619     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3620                            current_tune->prefetch.l1_cache_line_size,
3621                            global_options.x_param_values,
3622                            global_options_set.x_param_values);
3623   if (current_tune->prefetch.l1_cache_size >= 0)
3624     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3625                            current_tune->prefetch.l1_cache_size,
3626                            global_options.x_param_values,
3627                            global_options_set.x_param_values);
3628
3629   /* Use Neon to perform 64-bits operations rather than core
3630      registers.  */
3631   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3632   if (use_neon_for_64bits == 1)
3633      prefer_neon_for_64bits = true;
3634
3635   /* Use the alternative scheduling-pressure algorithm by default.  */
3636   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3637                          global_options.x_param_values,
3638                          global_options_set.x_param_values);
3639
3640   /* Look through ready list and all of queue for instructions
3641      relevant for L2 auto-prefetcher.  */
3642   int param_sched_autopref_queue_depth;
3643
3644   switch (current_tune->sched_autopref)
3645     {
3646     case tune_params::SCHED_AUTOPREF_OFF:
3647       param_sched_autopref_queue_depth = -1;
3648       break;
3649
3650     case tune_params::SCHED_AUTOPREF_RANK:
3651       param_sched_autopref_queue_depth = 0;
3652       break;
3653
3654     case tune_params::SCHED_AUTOPREF_FULL:
3655       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3656       break;
3657
3658     default:
3659       gcc_unreachable ();
3660     }
3661
3662   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3663                          param_sched_autopref_queue_depth,
3664                          global_options.x_param_values,
3665                          global_options_set.x_param_values);
3666
3667   /* Currently, for slow flash data, we just disable literal pools.  We also
3668      disable it for pure-code.  */
3669   if (target_slow_flash_data || target_pure_code)
3670     arm_disable_literal_pool = true;
3671
3672   if (use_cmse && !arm_arch_cmse)
3673     error ("target CPU does not support ARMv8-M Security Extensions");
3674
3675   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3676      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3677   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3678     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3679
3680   /* Disable scheduling fusion by default if it's not armv7 processor
3681      or doesn't prefer ldrd/strd.  */
3682   if (flag_schedule_fusion == 2
3683       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3684     flag_schedule_fusion = 0;
3685
3686   /* Need to remember initial options before they are overriden.  */
3687   init_optimize = build_optimization_node (&global_options);
3688
3689   arm_option_override_internal (&global_options, &global_options_set);
3690   arm_option_check_internal (&global_options);
3691   arm_option_params_internal ();
3692
3693   /* Create the default target_options structure.  */
3694   target_option_default_node = target_option_current_node
3695     = build_target_option_node (&global_options);
3696
3697   /* Register global variables with the garbage collector.  */
3698   arm_add_gc_roots ();
3699
3700   /* Init initial mode for testing.  */
3701   thumb_flipper = TARGET_THUMB;
3702 }
3703
3704 static void
3705 arm_add_gc_roots (void)
3706 {
3707   gcc_obstack_init(&minipool_obstack);
3708   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3709 }
3710 \f
3711 /* A table of known ARM exception types.
3712    For use with the interrupt function attribute.  */
3713
3714 typedef struct
3715 {
3716   const char *const arg;
3717   const unsigned long return_value;
3718 }
3719 isr_attribute_arg;
3720
3721 static const isr_attribute_arg isr_attribute_args [] =
3722 {
3723   { "IRQ",   ARM_FT_ISR },
3724   { "irq",   ARM_FT_ISR },
3725   { "FIQ",   ARM_FT_FIQ },
3726   { "fiq",   ARM_FT_FIQ },
3727   { "ABORT", ARM_FT_ISR },
3728   { "abort", ARM_FT_ISR },
3729   { "ABORT", ARM_FT_ISR },
3730   { "abort", ARM_FT_ISR },
3731   { "UNDEF", ARM_FT_EXCEPTION },
3732   { "undef", ARM_FT_EXCEPTION },
3733   { "SWI",   ARM_FT_EXCEPTION },
3734   { "swi",   ARM_FT_EXCEPTION },
3735   { NULL,    ARM_FT_NORMAL }
3736 };
3737
3738 /* Returns the (interrupt) function type of the current
3739    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3740
3741 static unsigned long
3742 arm_isr_value (tree argument)
3743 {
3744   const isr_attribute_arg * ptr;
3745   const char *              arg;
3746
3747   if (!arm_arch_notm)
3748     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3749
3750   /* No argument - default to IRQ.  */
3751   if (argument == NULL_TREE)
3752     return ARM_FT_ISR;
3753
3754   /* Get the value of the argument.  */
3755   if (TREE_VALUE (argument) == NULL_TREE
3756       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3757     return ARM_FT_UNKNOWN;
3758
3759   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3760
3761   /* Check it against the list of known arguments.  */
3762   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3763     if (streq (arg, ptr->arg))
3764       return ptr->return_value;
3765
3766   /* An unrecognized interrupt type.  */
3767   return ARM_FT_UNKNOWN;
3768 }
3769
3770 /* Computes the type of the current function.  */
3771
3772 static unsigned long
3773 arm_compute_func_type (void)
3774 {
3775   unsigned long type = ARM_FT_UNKNOWN;
3776   tree a;
3777   tree attr;
3778
3779   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3780
3781   /* Decide if the current function is volatile.  Such functions
3782      never return, and many memory cycles can be saved by not storing
3783      register values that will never be needed again.  This optimization
3784      was added to speed up context switching in a kernel application.  */
3785   if (optimize > 0
3786       && (TREE_NOTHROW (current_function_decl)
3787           || !(flag_unwind_tables
3788                || (flag_exceptions
3789                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3790       && TREE_THIS_VOLATILE (current_function_decl))
3791     type |= ARM_FT_VOLATILE;
3792
3793   if (cfun->static_chain_decl != NULL)
3794     type |= ARM_FT_NESTED;
3795
3796   attr = DECL_ATTRIBUTES (current_function_decl);
3797
3798   a = lookup_attribute ("naked", attr);
3799   if (a != NULL_TREE)
3800     type |= ARM_FT_NAKED;
3801
3802   a = lookup_attribute ("isr", attr);
3803   if (a == NULL_TREE)
3804     a = lookup_attribute ("interrupt", attr);
3805
3806   if (a == NULL_TREE)
3807     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3808   else
3809     type |= arm_isr_value (TREE_VALUE (a));
3810
3811   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3812     type |= ARM_FT_CMSE_ENTRY;
3813
3814   return type;
3815 }
3816
3817 /* Returns the type of the current function.  */
3818
3819 unsigned long
3820 arm_current_func_type (void)
3821 {
3822   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3823     cfun->machine->func_type = arm_compute_func_type ();
3824
3825   return cfun->machine->func_type;
3826 }
3827
3828 bool
3829 arm_allocate_stack_slots_for_args (void)
3830 {
3831   /* Naked functions should not allocate stack slots for arguments.  */
3832   return !IS_NAKED (arm_current_func_type ());
3833 }
3834
3835 static bool
3836 arm_warn_func_return (tree decl)
3837 {
3838   /* Naked functions are implemented entirely in assembly, including the
3839      return sequence, so suppress warnings about this.  */
3840   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3841 }
3842
3843 \f
3844 /* Output assembler code for a block containing the constant parts
3845    of a trampoline, leaving space for the variable parts.
3846
3847    On the ARM, (if r8 is the static chain regnum, and remembering that
3848    referencing pc adds an offset of 8) the trampoline looks like:
3849            ldr          r8, [pc, #0]
3850            ldr          pc, [pc]
3851            .word        static chain value
3852            .word        function's address
3853    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3854
3855 static void
3856 arm_asm_trampoline_template (FILE *f)
3857 {
3858   fprintf (f, "\t.syntax unified\n");
3859
3860   if (TARGET_ARM)
3861     {
3862       fprintf (f, "\t.arm\n");
3863       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3864       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3865     }
3866   else if (TARGET_THUMB2)
3867     {
3868       fprintf (f, "\t.thumb\n");
3869       /* The Thumb-2 trampoline is similar to the arm implementation.
3870          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3871       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3872                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3873       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3874     }
3875   else
3876     {
3877       ASM_OUTPUT_ALIGN (f, 2);
3878       fprintf (f, "\t.code\t16\n");
3879       fprintf (f, ".Ltrampoline_start:\n");
3880       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3881       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3882       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3883       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3884       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3885       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3886     }
3887   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3888   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3889 }
3890
3891 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3892
3893 static void
3894 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3895 {
3896   rtx fnaddr, mem, a_tramp;
3897
3898   emit_block_move (m_tramp, assemble_trampoline_template (),
3899                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3900
3901   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3902   emit_move_insn (mem, chain_value);
3903
3904   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3905   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3906   emit_move_insn (mem, fnaddr);
3907
3908   a_tramp = XEXP (m_tramp, 0);
3909   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3910                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3911                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3912 }
3913
3914 /* Thumb trampolines should be entered in thumb mode, so set
3915    the bottom bit of the address.  */
3916
3917 static rtx
3918 arm_trampoline_adjust_address (rtx addr)
3919 {
3920   if (TARGET_THUMB)
3921     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3922                                 NULL, 0, OPTAB_LIB_WIDEN);
3923   return addr;
3924 }
3925 \f
3926 /* Return 1 if it is possible to return using a single instruction.
3927    If SIBLING is non-null, this is a test for a return before a sibling
3928    call.  SIBLING is the call insn, so we can examine its register usage.  */
3929
3930 int
3931 use_return_insn (int iscond, rtx sibling)
3932 {
3933   int regno;
3934   unsigned int func_type;
3935   unsigned long saved_int_regs;
3936   unsigned HOST_WIDE_INT stack_adjust;
3937   arm_stack_offsets *offsets;
3938
3939   /* Never use a return instruction before reload has run.  */
3940   if (!reload_completed)
3941     return 0;
3942
3943   func_type = arm_current_func_type ();
3944
3945   /* Naked, volatile and stack alignment functions need special
3946      consideration.  */
3947   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3948     return 0;
3949
3950   /* So do interrupt functions that use the frame pointer and Thumb
3951      interrupt functions.  */
3952   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3953     return 0;
3954
3955   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3956       && !optimize_function_for_size_p (cfun))
3957     return 0;
3958
3959   offsets = arm_get_frame_offsets ();
3960   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3961
3962   /* As do variadic functions.  */
3963   if (crtl->args.pretend_args_size
3964       || cfun->machine->uses_anonymous_args
3965       /* Or if the function calls __builtin_eh_return () */
3966       || crtl->calls_eh_return
3967       /* Or if the function calls alloca */
3968       || cfun->calls_alloca
3969       /* Or if there is a stack adjustment.  However, if the stack pointer
3970          is saved on the stack, we can use a pre-incrementing stack load.  */
3971       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3972                                  && stack_adjust == 4))
3973       /* Or if the static chain register was saved above the frame, under the
3974          assumption that the stack pointer isn't saved on the stack.  */
3975       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3976           && arm_compute_static_chain_stack_bytes() != 0))
3977     return 0;
3978
3979   saved_int_regs = offsets->saved_regs_mask;
3980
3981   /* Unfortunately, the insn
3982
3983        ldmib sp, {..., sp, ...}
3984
3985      triggers a bug on most SA-110 based devices, such that the stack
3986      pointer won't be correctly restored if the instruction takes a
3987      page fault.  We work around this problem by popping r3 along with
3988      the other registers, since that is never slower than executing
3989      another instruction.
3990
3991      We test for !arm_arch5 here, because code for any architecture
3992      less than this could potentially be run on one of the buggy
3993      chips.  */
3994   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3995     {
3996       /* Validate that r3 is a call-clobbered register (always true in
3997          the default abi) ...  */
3998       if (!call_used_regs[3])
3999         return 0;
4000
4001       /* ... that it isn't being used for a return value ... */
4002       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4003         return 0;
4004
4005       /* ... or for a tail-call argument ...  */
4006       if (sibling)
4007         {
4008           gcc_assert (CALL_P (sibling));
4009
4010           if (find_regno_fusage (sibling, USE, 3))
4011             return 0;
4012         }
4013
4014       /* ... and that there are no call-saved registers in r0-r2
4015          (always true in the default ABI).  */
4016       if (saved_int_regs & 0x7)
4017         return 0;
4018     }
4019
4020   /* Can't be done if interworking with Thumb, and any registers have been
4021      stacked.  */
4022   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4023     return 0;
4024
4025   /* On StrongARM, conditional returns are expensive if they aren't
4026      taken and multiple registers have been stacked.  */
4027   if (iscond && arm_tune_strongarm)
4028     {
4029       /* Conditional return when just the LR is stored is a simple
4030          conditional-load instruction, that's not expensive.  */
4031       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4032         return 0;
4033
4034       if (flag_pic
4035           && arm_pic_register != INVALID_REGNUM
4036           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4037         return 0;
4038     }
4039
4040   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4041      several instructions if anything needs to be popped.  */
4042   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4043     return 0;
4044
4045   /* If there are saved registers but the LR isn't saved, then we need
4046      two instructions for the return.  */
4047   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4048     return 0;
4049
4050   /* Can't be done if any of the VFP regs are pushed,
4051      since this also requires an insn.  */
4052   if (TARGET_HARD_FLOAT)
4053     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4054       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4055         return 0;
4056
4057   if (TARGET_REALLY_IWMMXT)
4058     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4059       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4060         return 0;
4061
4062   return 1;
4063 }
4064
4065 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4066    shrink-wrapping if possible.  This is the case if we need to emit a
4067    prologue, which we can test by looking at the offsets.  */
4068 bool
4069 use_simple_return_p (void)
4070 {
4071   arm_stack_offsets *offsets;
4072
4073   /* Note this function can be called before or after reload.  */
4074   if (!reload_completed)
4075     arm_compute_frame_layout ();
4076
4077   offsets = arm_get_frame_offsets ();
4078   return offsets->outgoing_args != 0;
4079 }
4080
4081 /* Return TRUE if int I is a valid immediate ARM constant.  */
4082
4083 int
4084 const_ok_for_arm (HOST_WIDE_INT i)
4085 {
4086   int lowbit;
4087
4088   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4089      be all zero, or all one.  */
4090   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4091       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4092           != ((~(unsigned HOST_WIDE_INT) 0)
4093               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4094     return FALSE;
4095
4096   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4097
4098   /* Fast return for 0 and small values.  We must do this for zero, since
4099      the code below can't handle that one case.  */
4100   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4101     return TRUE;
4102
4103   /* Get the number of trailing zeros.  */
4104   lowbit = ffs((int) i) - 1;
4105
4106   /* Only even shifts are allowed in ARM mode so round down to the
4107      nearest even number.  */
4108   if (TARGET_ARM)
4109     lowbit &= ~1;
4110
4111   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4112     return TRUE;
4113
4114   if (TARGET_ARM)
4115     {
4116       /* Allow rotated constants in ARM mode.  */
4117       if (lowbit <= 4
4118            && ((i & ~0xc000003f) == 0
4119                || (i & ~0xf000000f) == 0
4120                || (i & ~0xfc000003) == 0))
4121         return TRUE;
4122     }
4123   else if (TARGET_THUMB2)
4124     {
4125       HOST_WIDE_INT v;
4126
4127       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4128       v = i & 0xff;
4129       v |= v << 16;
4130       if (i == v || i == (v | (v << 8)))
4131         return TRUE;
4132
4133       /* Allow repeated pattern 0xXY00XY00.  */
4134       v = i & 0xff00;
4135       v |= v << 16;
4136       if (i == v)
4137         return TRUE;
4138     }
4139   else if (TARGET_HAVE_MOVT)
4140     {
4141       /* Thumb-1 Targets with MOVT.  */
4142       if (i > 0xffff)
4143         return FALSE;
4144       else
4145         return TRUE;
4146     }
4147
4148   return FALSE;
4149 }
4150
4151 /* Return true if I is a valid constant for the operation CODE.  */
4152 int
4153 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4154 {
4155   if (const_ok_for_arm (i))
4156     return 1;
4157
4158   switch (code)
4159     {
4160     case SET:
4161       /* See if we can use movw.  */
4162       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4163         return 1;
4164       else
4165         /* Otherwise, try mvn.  */
4166         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4167
4168     case PLUS:
4169       /* See if we can use addw or subw.  */
4170       if (TARGET_THUMB2
4171           && ((i & 0xfffff000) == 0
4172               || ((-i) & 0xfffff000) == 0))
4173         return 1;
4174       /* Fall through.  */
4175     case COMPARE:
4176     case EQ:
4177     case NE:
4178     case GT:
4179     case LE:
4180     case LT:
4181     case GE:
4182     case GEU:
4183     case LTU:
4184     case GTU:
4185     case LEU:
4186     case UNORDERED:
4187     case ORDERED:
4188     case UNEQ:
4189     case UNGE:
4190     case UNLT:
4191     case UNGT:
4192     case UNLE:
4193       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4194
4195     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4196     case XOR:
4197       return 0;
4198
4199     case IOR:
4200       if (TARGET_THUMB2)
4201         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4202       return 0;
4203
4204     case AND:
4205       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4206
4207     default:
4208       gcc_unreachable ();
4209     }
4210 }
4211
4212 /* Return true if I is a valid di mode constant for the operation CODE.  */
4213 int
4214 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4215 {
4216   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4217   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4218   rtx hi = GEN_INT (hi_val);
4219   rtx lo = GEN_INT (lo_val);
4220
4221   if (TARGET_THUMB1)
4222     return 0;
4223
4224   switch (code)
4225     {
4226     case AND:
4227     case IOR:
4228     case XOR:
4229       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4230               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4231     case PLUS:
4232       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4233
4234     default:
4235       return 0;
4236     }
4237 }
4238
4239 /* Emit a sequence of insns to handle a large constant.
4240    CODE is the code of the operation required, it can be any of SET, PLUS,
4241    IOR, AND, XOR, MINUS;
4242    MODE is the mode in which the operation is being performed;
4243    VAL is the integer to operate on;
4244    SOURCE is the other operand (a register, or a null-pointer for SET);
4245    SUBTARGETS means it is safe to create scratch registers if that will
4246    either produce a simpler sequence, or we will want to cse the values.
4247    Return value is the number of insns emitted.  */
4248
4249 /* ??? Tweak this for thumb2.  */
4250 int
4251 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4252                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4253 {
4254   rtx cond;
4255
4256   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4257     cond = COND_EXEC_TEST (PATTERN (insn));
4258   else
4259     cond = NULL_RTX;
4260
4261   if (subtargets || code == SET
4262       || (REG_P (target) && REG_P (source)
4263           && REGNO (target) != REGNO (source)))
4264     {
4265       /* After arm_reorg has been called, we can't fix up expensive
4266          constants by pushing them into memory so we must synthesize
4267          them in-line, regardless of the cost.  This is only likely to
4268          be more costly on chips that have load delay slots and we are
4269          compiling without running the scheduler (so no splitting
4270          occurred before the final instruction emission).
4271
4272          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4273       */
4274       if (!cfun->machine->after_arm_reorg
4275           && !cond
4276           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4277                                 1, 0)
4278               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4279                  + (code != SET))))
4280         {
4281           if (code == SET)
4282             {
4283               /* Currently SET is the only monadic value for CODE, all
4284                  the rest are diadic.  */
4285               if (TARGET_USE_MOVT)
4286                 arm_emit_movpair (target, GEN_INT (val));
4287               else
4288                 emit_set_insn (target, GEN_INT (val));
4289
4290               return 1;
4291             }
4292           else
4293             {
4294               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4295
4296               if (TARGET_USE_MOVT)
4297                 arm_emit_movpair (temp, GEN_INT (val));
4298               else
4299                 emit_set_insn (temp, GEN_INT (val));
4300
4301               /* For MINUS, the value is subtracted from, since we never
4302                  have subtraction of a constant.  */
4303               if (code == MINUS)
4304                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4305               else
4306                 emit_set_insn (target,
4307                                gen_rtx_fmt_ee (code, mode, source, temp));
4308               return 2;
4309             }
4310         }
4311     }
4312
4313   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4314                            1);
4315 }
4316
4317 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4318    ARM/THUMB2 immediates, and add up to VAL.
4319    Thr function return value gives the number of insns required.  */
4320 static int
4321 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4322                             struct four_ints *return_sequence)
4323 {
4324   int best_consecutive_zeros = 0;
4325   int i;
4326   int best_start = 0;
4327   int insns1, insns2;
4328   struct four_ints tmp_sequence;
4329
4330   /* If we aren't targeting ARM, the best place to start is always at
4331      the bottom, otherwise look more closely.  */
4332   if (TARGET_ARM)
4333     {
4334       for (i = 0; i < 32; i += 2)
4335         {
4336           int consecutive_zeros = 0;
4337
4338           if (!(val & (3 << i)))
4339             {
4340               while ((i < 32) && !(val & (3 << i)))
4341                 {
4342                   consecutive_zeros += 2;
4343                   i += 2;
4344                 }
4345               if (consecutive_zeros > best_consecutive_zeros)
4346                 {
4347                   best_consecutive_zeros = consecutive_zeros;
4348                   best_start = i - consecutive_zeros;
4349                 }
4350               i -= 2;
4351             }
4352         }
4353     }
4354
4355   /* So long as it won't require any more insns to do so, it's
4356      desirable to emit a small constant (in bits 0...9) in the last
4357      insn.  This way there is more chance that it can be combined with
4358      a later addressing insn to form a pre-indexed load or store
4359      operation.  Consider:
4360
4361            *((volatile int *)0xe0000100) = 1;
4362            *((volatile int *)0xe0000110) = 2;
4363
4364      We want this to wind up as:
4365
4366             mov rA, #0xe0000000
4367             mov rB, #1
4368             str rB, [rA, #0x100]
4369             mov rB, #2
4370             str rB, [rA, #0x110]
4371
4372      rather than having to synthesize both large constants from scratch.
4373
4374      Therefore, we calculate how many insns would be required to emit
4375      the constant starting from `best_start', and also starting from
4376      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4377      yield a shorter sequence, we may as well use zero.  */
4378   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4379   if (best_start != 0
4380       && ((HOST_WIDE_INT_1U << best_start) < val))
4381     {
4382       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4383       if (insns2 <= insns1)
4384         {
4385           *return_sequence = tmp_sequence;
4386           insns1 = insns2;
4387         }
4388     }
4389
4390   return insns1;
4391 }
4392
4393 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4394 static int
4395 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396                              struct four_ints *return_sequence, int i)
4397 {
4398   int remainder = val & 0xffffffff;
4399   int insns = 0;
4400
4401   /* Try and find a way of doing the job in either two or three
4402      instructions.
4403
4404      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4405      location.  We start at position I.  This may be the MSB, or
4406      optimial_immediate_sequence may have positioned it at the largest block
4407      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4408      wrapping around to the top of the word when we drop off the bottom.
4409      In the worst case this code should produce no more than four insns.
4410
4411      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4412      constants, shifted to any arbitrary location.  We should always start
4413      at the MSB.  */
4414   do
4415     {
4416       int end;
4417       unsigned int b1, b2, b3, b4;
4418       unsigned HOST_WIDE_INT result;
4419       int loc;
4420
4421       gcc_assert (insns < 4);
4422
4423       if (i <= 0)
4424         i += 32;
4425
4426       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4427       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4428         {
4429           loc = i;
4430           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4431             /* We can use addw/subw for the last 12 bits.  */
4432             result = remainder;
4433           else
4434             {
4435               /* Use an 8-bit shifted/rotated immediate.  */
4436               end = i - 8;
4437               if (end < 0)
4438                 end += 32;
4439               result = remainder & ((0x0ff << end)
4440                                    | ((i < end) ? (0xff >> (32 - end))
4441                                                 : 0));
4442               i -= 8;
4443             }
4444         }
4445       else
4446         {
4447           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4448              arbitrary shifts.  */
4449           i -= TARGET_ARM ? 2 : 1;
4450           continue;
4451         }
4452
4453       /* Next, see if we can do a better job with a thumb2 replicated
4454          constant.
4455
4456          We do it this way around to catch the cases like 0x01F001E0 where
4457          two 8-bit immediates would work, but a replicated constant would
4458          make it worse.
4459
4460          TODO: 16-bit constants that don't clear all the bits, but still win.
4461          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4462       if (TARGET_THUMB2)
4463         {
4464           b1 = (remainder & 0xff000000) >> 24;
4465           b2 = (remainder & 0x00ff0000) >> 16;
4466           b3 = (remainder & 0x0000ff00) >> 8;
4467           b4 = remainder & 0xff;
4468
4469           if (loc > 24)
4470             {
4471               /* The 8-bit immediate already found clears b1 (and maybe b2),
4472                  but must leave b3 and b4 alone.  */
4473
4474               /* First try to find a 32-bit replicated constant that clears
4475                  almost everything.  We can assume that we can't do it in one,
4476                  or else we wouldn't be here.  */
4477               unsigned int tmp = b1 & b2 & b3 & b4;
4478               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4479                                   + (tmp << 24);
4480               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4481                                             + (tmp == b3) + (tmp == b4);
4482               if (tmp
4483                   && (matching_bytes >= 3
4484                       || (matching_bytes == 2
4485                           && const_ok_for_op (remainder & ~tmp2, code))))
4486                 {
4487                   /* At least 3 of the bytes match, and the fourth has at
4488                      least as many bits set, or two of the bytes match
4489                      and it will only require one more insn to finish.  */
4490                   result = tmp2;
4491                   i = tmp != b1 ? 32
4492                       : tmp != b2 ? 24
4493                       : tmp != b3 ? 16
4494                       : 8;
4495                 }
4496
4497               /* Second, try to find a 16-bit replicated constant that can
4498                  leave three of the bytes clear.  If b2 or b4 is already
4499                  zero, then we can.  If the 8-bit from above would not
4500                  clear b2 anyway, then we still win.  */
4501               else if (b1 == b3 && (!b2 || !b4
4502                                || (remainder & 0x00ff0000 & ~result)))
4503                 {
4504                   result = remainder & 0xff00ff00;
4505                   i = 24;
4506                 }
4507             }
4508           else if (loc > 16)
4509             {
4510               /* The 8-bit immediate already found clears b2 (and maybe b3)
4511                  and we don't get here unless b1 is alredy clear, but it will
4512                  leave b4 unchanged.  */
4513
4514               /* If we can clear b2 and b4 at once, then we win, since the
4515                  8-bits couldn't possibly reach that far.  */
4516               if (b2 == b4)
4517                 {
4518                   result = remainder & 0x00ff00ff;
4519                   i = 16;
4520                 }
4521             }
4522         }
4523
4524       return_sequence->i[insns++] = result;
4525       remainder &= ~result;
4526
4527       if (code == SET || code == MINUS)
4528         code = PLUS;
4529     }
4530   while (remainder);
4531
4532   return insns;
4533 }
4534
4535 /* Emit an instruction with the indicated PATTERN.  If COND is
4536    non-NULL, conditionalize the execution of the instruction on COND
4537    being true.  */
4538
4539 static void
4540 emit_constant_insn (rtx cond, rtx pattern)
4541 {
4542   if (cond)
4543     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4544   emit_insn (pattern);
4545 }
4546
4547 /* As above, but extra parameter GENERATE which, if clear, suppresses
4548    RTL generation.  */
4549
4550 static int
4551 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4552                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4553                   int subtargets, int generate)
4554 {
4555   int can_invert = 0;
4556   int can_negate = 0;
4557   int final_invert = 0;
4558   int i;
4559   int set_sign_bit_copies = 0;
4560   int clear_sign_bit_copies = 0;
4561   int clear_zero_bit_copies = 0;
4562   int set_zero_bit_copies = 0;
4563   int insns = 0, neg_insns, inv_insns;
4564   unsigned HOST_WIDE_INT temp1, temp2;
4565   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4566   struct four_ints *immediates;
4567   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4568
4569   /* Find out which operations are safe for a given CODE.  Also do a quick
4570      check for degenerate cases; these can occur when DImode operations
4571      are split.  */
4572   switch (code)
4573     {
4574     case SET:
4575       can_invert = 1;
4576       break;
4577
4578     case PLUS:
4579       can_negate = 1;
4580       break;
4581
4582     case IOR:
4583       if (remainder == 0xffffffff)
4584         {
4585           if (generate)
4586             emit_constant_insn (cond,
4587                                 gen_rtx_SET (target,
4588                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4589           return 1;
4590         }
4591
4592       if (remainder == 0)
4593         {
4594           if (reload_completed && rtx_equal_p (target, source))
4595             return 0;
4596
4597           if (generate)
4598             emit_constant_insn (cond, gen_rtx_SET (target, source));
4599           return 1;
4600         }
4601       break;
4602
4603     case AND:
4604       if (remainder == 0)
4605         {
4606           if (generate)
4607             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4608           return 1;
4609         }
4610       if (remainder == 0xffffffff)
4611         {
4612           if (reload_completed && rtx_equal_p (target, source))
4613             return 0;
4614           if (generate)
4615             emit_constant_insn (cond, gen_rtx_SET (target, source));
4616           return 1;
4617         }
4618       can_invert = 1;
4619       break;
4620
4621     case XOR:
4622       if (remainder == 0)
4623         {
4624           if (reload_completed && rtx_equal_p (target, source))
4625             return 0;
4626           if (generate)
4627             emit_constant_insn (cond, gen_rtx_SET (target, source));
4628           return 1;
4629         }
4630
4631       if (remainder == 0xffffffff)
4632         {
4633           if (generate)
4634             emit_constant_insn (cond,
4635                                 gen_rtx_SET (target,
4636                                              gen_rtx_NOT (mode, source)));
4637           return 1;
4638         }
4639       final_invert = 1;
4640       break;
4641
4642     case MINUS:
4643       /* We treat MINUS as (val - source), since (source - val) is always
4644          passed as (source + (-val)).  */
4645       if (remainder == 0)
4646         {
4647           if (generate)
4648             emit_constant_insn (cond,
4649                                 gen_rtx_SET (target,
4650                                              gen_rtx_NEG (mode, source)));
4651           return 1;
4652         }
4653       if (const_ok_for_arm (val))
4654         {
4655           if (generate)
4656             emit_constant_insn (cond,
4657                                 gen_rtx_SET (target,
4658                                              gen_rtx_MINUS (mode, GEN_INT (val),
4659                                                             source)));
4660           return 1;
4661         }
4662
4663       break;
4664
4665     default:
4666       gcc_unreachable ();
4667     }
4668
4669   /* If we can do it in one insn get out quickly.  */
4670   if (const_ok_for_op (val, code))
4671     {
4672       if (generate)
4673         emit_constant_insn (cond,
4674                             gen_rtx_SET (target,
4675                                          (source
4676                                           ? gen_rtx_fmt_ee (code, mode, source,
4677                                                             GEN_INT (val))
4678                                           : GEN_INT (val))));
4679       return 1;
4680     }
4681
4682   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4683      insn.  */
4684   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4685       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4686     {
4687       if (generate)
4688         {
4689           if (mode == SImode && i == 16)
4690             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4691                smaller insn.  */
4692             emit_constant_insn (cond,
4693                                 gen_zero_extendhisi2
4694                                 (target, gen_lowpart (HImode, source)));
4695           else
4696             /* Extz only supports SImode, but we can coerce the operands
4697                into that mode.  */
4698             emit_constant_insn (cond,
4699                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4700                                               gen_lowpart (SImode, source),
4701                                               GEN_INT (i), const0_rtx));
4702         }
4703
4704       return 1;
4705     }
4706
4707   /* Calculate a few attributes that may be useful for specific
4708      optimizations.  */
4709   /* Count number of leading zeros.  */
4710   for (i = 31; i >= 0; i--)
4711     {
4712       if ((remainder & (1 << i)) == 0)
4713         clear_sign_bit_copies++;
4714       else
4715         break;
4716     }
4717
4718   /* Count number of leading 1's.  */
4719   for (i = 31; i >= 0; i--)
4720     {
4721       if ((remainder & (1 << i)) != 0)
4722         set_sign_bit_copies++;
4723       else
4724         break;
4725     }
4726
4727   /* Count number of trailing zero's.  */
4728   for (i = 0; i <= 31; i++)
4729     {
4730       if ((remainder & (1 << i)) == 0)
4731         clear_zero_bit_copies++;
4732       else
4733         break;
4734     }
4735
4736   /* Count number of trailing 1's.  */
4737   for (i = 0; i <= 31; i++)
4738     {
4739       if ((remainder & (1 << i)) != 0)
4740         set_zero_bit_copies++;
4741       else
4742         break;
4743     }
4744
4745   switch (code)
4746     {
4747     case SET:
4748       /* See if we can do this by sign_extending a constant that is known
4749          to be negative.  This is a good, way of doing it, since the shift
4750          may well merge into a subsequent insn.  */
4751       if (set_sign_bit_copies > 1)
4752         {
4753           if (const_ok_for_arm
4754               (temp1 = ARM_SIGN_EXTEND (remainder
4755                                         << (set_sign_bit_copies - 1))))
4756             {
4757               if (generate)
4758                 {
4759                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760                   emit_constant_insn (cond,
4761                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4762                   emit_constant_insn (cond,
4763                                       gen_ashrsi3 (target, new_src,
4764                                                    GEN_INT (set_sign_bit_copies - 1)));
4765                 }
4766               return 2;
4767             }
4768           /* For an inverted constant, we will need to set the low bits,
4769              these will be shifted out of harm's way.  */
4770           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4771           if (const_ok_for_arm (~temp1))
4772             {
4773               if (generate)
4774                 {
4775                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4776                   emit_constant_insn (cond,
4777                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4778                   emit_constant_insn (cond,
4779                                       gen_ashrsi3 (target, new_src,
4780                                                    GEN_INT (set_sign_bit_copies - 1)));
4781                 }
4782               return 2;
4783             }
4784         }
4785
4786       /* See if we can calculate the value as the difference between two
4787          valid immediates.  */
4788       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4789         {
4790           int topshift = clear_sign_bit_copies & ~1;
4791
4792           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4793                                    & (0xff000000 >> topshift));
4794
4795           /* If temp1 is zero, then that means the 9 most significant
4796              bits of remainder were 1 and we've caused it to overflow.
4797              When topshift is 0 we don't need to do anything since we
4798              can borrow from 'bit 32'.  */
4799           if (temp1 == 0 && topshift != 0)
4800             temp1 = 0x80000000 >> (topshift - 1);
4801
4802           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4803
4804           if (const_ok_for_arm (temp2))
4805             {
4806               if (generate)
4807                 {
4808                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4809                   emit_constant_insn (cond,
4810                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4811                   emit_constant_insn (cond,
4812                                       gen_addsi3 (target, new_src,
4813                                                   GEN_INT (-temp2)));
4814                 }
4815
4816               return 2;
4817             }
4818         }
4819
4820       /* See if we can generate this by setting the bottom (or the top)
4821          16 bits, and then shifting these into the other half of the
4822          word.  We only look for the simplest cases, to do more would cost
4823          too much.  Be careful, however, not to generate this when the
4824          alternative would take fewer insns.  */
4825       if (val & 0xffff0000)
4826         {
4827           temp1 = remainder & 0xffff0000;
4828           temp2 = remainder & 0x0000ffff;
4829
4830           /* Overlaps outside this range are best done using other methods.  */
4831           for (i = 9; i < 24; i++)
4832             {
4833               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4834                   && !const_ok_for_arm (temp2))
4835                 {
4836                   rtx new_src = (subtargets
4837                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4838                                  : target);
4839                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4840                                             source, subtargets, generate);
4841                   source = new_src;
4842                   if (generate)
4843                     emit_constant_insn
4844                       (cond,
4845                        gen_rtx_SET
4846                        (target,
4847                         gen_rtx_IOR (mode,
4848                                      gen_rtx_ASHIFT (mode, source,
4849                                                      GEN_INT (i)),
4850                                      source)));
4851                   return insns + 1;
4852                 }
4853             }
4854
4855           /* Don't duplicate cases already considered.  */
4856           for (i = 17; i < 24; i++)
4857             {
4858               if (((temp1 | (temp1 >> i)) == remainder)
4859                   && !const_ok_for_arm (temp1))
4860                 {
4861                   rtx new_src = (subtargets
4862                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4863                                  : target);
4864                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4865                                             source, subtargets, generate);
4866                   source = new_src;
4867                   if (generate)
4868                     emit_constant_insn
4869                       (cond,
4870                        gen_rtx_SET (target,
4871                                     gen_rtx_IOR
4872                                     (mode,
4873                                      gen_rtx_LSHIFTRT (mode, source,
4874                                                        GEN_INT (i)),
4875                                      source)));
4876                   return insns + 1;
4877                 }
4878             }
4879         }
4880       break;
4881
4882     case IOR:
4883     case XOR:
4884       /* If we have IOR or XOR, and the constant can be loaded in a
4885          single instruction, and we can find a temporary to put it in,
4886          then this can be done in two instructions instead of 3-4.  */
4887       if (subtargets
4888           /* TARGET can't be NULL if SUBTARGETS is 0 */
4889           || (reload_completed && !reg_mentioned_p (target, source)))
4890         {
4891           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4892             {
4893               if (generate)
4894                 {
4895                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4896
4897                   emit_constant_insn (cond,
4898                                       gen_rtx_SET (sub, GEN_INT (val)));
4899                   emit_constant_insn (cond,
4900                                       gen_rtx_SET (target,
4901                                                    gen_rtx_fmt_ee (code, mode,
4902                                                                    source, sub)));
4903                 }
4904               return 2;
4905             }
4906         }
4907
4908       if (code == XOR)
4909         break;
4910
4911       /*  Convert.
4912           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4913                              and the remainder 0s for e.g. 0xfff00000)
4914           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4915
4916           This can be done in 2 instructions by using shifts with mov or mvn.
4917           e.g. for
4918           x = x | 0xfff00000;
4919           we generate.
4920           mvn   r0, r0, asl #12
4921           mvn   r0, r0, lsr #12  */
4922       if (set_sign_bit_copies > 8
4923           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4924         {
4925           if (generate)
4926             {
4927               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4928               rtx shift = GEN_INT (set_sign_bit_copies);
4929
4930               emit_constant_insn
4931                 (cond,
4932                  gen_rtx_SET (sub,
4933                               gen_rtx_NOT (mode,
4934                                            gen_rtx_ASHIFT (mode,
4935                                                            source,
4936                                                            shift))));
4937               emit_constant_insn
4938                 (cond,
4939                  gen_rtx_SET (target,
4940                               gen_rtx_NOT (mode,
4941                                            gen_rtx_LSHIFTRT (mode, sub,
4942                                                              shift))));
4943             }
4944           return 2;
4945         }
4946
4947       /* Convert
4948           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4949            to
4950           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4951
4952           For eg. r0 = r0 | 0xfff
4953                mvn      r0, r0, lsr #12
4954                mvn      r0, r0, asl #12
4955
4956       */
4957       if (set_zero_bit_copies > 8
4958           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4959         {
4960           if (generate)
4961             {
4962               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963               rtx shift = GEN_INT (set_zero_bit_copies);
4964
4965               emit_constant_insn
4966                 (cond,
4967                  gen_rtx_SET (sub,
4968                               gen_rtx_NOT (mode,
4969                                            gen_rtx_LSHIFTRT (mode,
4970                                                              source,
4971                                                              shift))));
4972               emit_constant_insn
4973                 (cond,
4974                  gen_rtx_SET (target,
4975                               gen_rtx_NOT (mode,
4976                                            gen_rtx_ASHIFT (mode, sub,
4977                                                            shift))));
4978             }
4979           return 2;
4980         }
4981
4982       /* This will never be reached for Thumb2 because orn is a valid
4983          instruction. This is for Thumb1 and the ARM 32 bit cases.
4984
4985          x = y | constant (such that ~constant is a valid constant)
4986          Transform this to
4987          x = ~(~y & ~constant).
4988       */
4989       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4990         {
4991           if (generate)
4992             {
4993               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4994               emit_constant_insn (cond,
4995                                   gen_rtx_SET (sub,
4996                                                gen_rtx_NOT (mode, source)));
4997               source = sub;
4998               if (subtargets)
4999                 sub = gen_reg_rtx (mode);
5000               emit_constant_insn (cond,
5001                                   gen_rtx_SET (sub,
5002                                                gen_rtx_AND (mode, source,
5003                                                             GEN_INT (temp1))));
5004               emit_constant_insn (cond,
5005                                   gen_rtx_SET (target,
5006                                                gen_rtx_NOT (mode, sub)));
5007             }
5008           return 3;
5009         }
5010       break;
5011
5012     case AND:
5013       /* See if two shifts will do 2 or more insn's worth of work.  */
5014       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5015         {
5016           HOST_WIDE_INT shift_mask = ((0xffffffff
5017                                        << (32 - clear_sign_bit_copies))
5018                                       & 0xffffffff);
5019
5020           if ((remainder | shift_mask) != 0xffffffff)
5021             {
5022               HOST_WIDE_INT new_val
5023                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5024
5025               if (generate)
5026                 {
5027                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5029                                             new_src, source, subtargets, 1);
5030                   source = new_src;
5031                 }
5032               else
5033                 {
5034                   rtx targ = subtargets ? NULL_RTX : target;
5035                   insns = arm_gen_constant (AND, mode, cond, new_val,
5036                                             targ, source, subtargets, 0);
5037                 }
5038             }
5039
5040           if (generate)
5041             {
5042               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5043               rtx shift = GEN_INT (clear_sign_bit_copies);
5044
5045               emit_insn (gen_ashlsi3 (new_src, source, shift));
5046               emit_insn (gen_lshrsi3 (target, new_src, shift));
5047             }
5048
5049           return insns + 2;
5050         }
5051
5052       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5053         {
5054           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5055
5056           if ((remainder | shift_mask) != 0xffffffff)
5057             {
5058               HOST_WIDE_INT new_val
5059                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5060               if (generate)
5061                 {
5062                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063
5064                   insns = arm_gen_constant (AND, mode, cond, new_val,
5065                                             new_src, source, subtargets, 1);
5066                   source = new_src;
5067                 }
5068               else
5069                 {
5070                   rtx targ = subtargets ? NULL_RTX : target;
5071
5072                   insns = arm_gen_constant (AND, mode, cond, new_val,
5073                                             targ, source, subtargets, 0);
5074                 }
5075             }
5076
5077           if (generate)
5078             {
5079               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5080               rtx shift = GEN_INT (clear_zero_bit_copies);
5081
5082               emit_insn (gen_lshrsi3 (new_src, source, shift));
5083               emit_insn (gen_ashlsi3 (target, new_src, shift));
5084             }
5085
5086           return insns + 2;
5087         }
5088
5089       break;
5090
5091     default:
5092       break;
5093     }
5094
5095   /* Calculate what the instruction sequences would be if we generated it
5096      normally, negated, or inverted.  */
5097   if (code == AND)
5098     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5099     insns = 99;
5100   else
5101     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5102
5103   if (can_negate)
5104     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5105                                             &neg_immediates);
5106   else
5107     neg_insns = 99;
5108
5109   if (can_invert || final_invert)
5110     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5111                                             &inv_immediates);
5112   else
5113     inv_insns = 99;
5114
5115   immediates = &pos_immediates;
5116
5117   /* Is the negated immediate sequence more efficient?  */
5118   if (neg_insns < insns && neg_insns <= inv_insns)
5119     {
5120       insns = neg_insns;
5121       immediates = &neg_immediates;
5122     }
5123   else
5124     can_negate = 0;
5125
5126   /* Is the inverted immediate sequence more efficient?
5127      We must allow for an extra NOT instruction for XOR operations, although
5128      there is some chance that the final 'mvn' will get optimized later.  */
5129   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5130     {
5131       insns = inv_insns;
5132       immediates = &inv_immediates;
5133     }
5134   else
5135     {
5136       can_invert = 0;
5137       final_invert = 0;
5138     }
5139
5140   /* Now output the chosen sequence as instructions.  */
5141   if (generate)
5142     {
5143       for (i = 0; i < insns; i++)
5144         {
5145           rtx new_src, temp1_rtx;
5146
5147           temp1 = immediates->i[i];
5148
5149           if (code == SET || code == MINUS)
5150             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5151           else if ((final_invert || i < (insns - 1)) && subtargets)
5152             new_src = gen_reg_rtx (mode);
5153           else
5154             new_src = target;
5155
5156           if (can_invert)
5157             temp1 = ~temp1;
5158           else if (can_negate)
5159             temp1 = -temp1;
5160
5161           temp1 = trunc_int_for_mode (temp1, mode);
5162           temp1_rtx = GEN_INT (temp1);
5163
5164           if (code == SET)
5165             ;
5166           else if (code == MINUS)
5167             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5168           else
5169             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5170
5171           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5172           source = new_src;
5173
5174           if (code == SET)
5175             {
5176               can_negate = can_invert;
5177               can_invert = 0;
5178               code = PLUS;
5179             }
5180           else if (code == MINUS)
5181             code = PLUS;
5182         }
5183     }
5184
5185   if (final_invert)
5186     {
5187       if (generate)
5188         emit_constant_insn (cond, gen_rtx_SET (target,
5189                                                gen_rtx_NOT (mode, source)));
5190       insns++;
5191     }
5192
5193   return insns;
5194 }
5195
5196 /* Canonicalize a comparison so that we are more likely to recognize it.
5197    This can be done for a few constant compares, where we can make the
5198    immediate value easier to load.  */
5199
5200 static void
5201 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5202                              bool op0_preserve_value)
5203 {
5204   machine_mode mode;
5205   unsigned HOST_WIDE_INT i, maxval;
5206
5207   mode = GET_MODE (*op0);
5208   if (mode == VOIDmode)
5209     mode = GET_MODE (*op1);
5210
5211   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5212
5213   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5214      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5215      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5216      for GTU/LEU in Thumb mode.  */
5217   if (mode == DImode)
5218     {
5219
5220       if (*code == GT || *code == LE
5221           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5222         {
5223           /* Missing comparison.  First try to use an available
5224              comparison.  */
5225           if (CONST_INT_P (*op1))
5226             {
5227               i = INTVAL (*op1);
5228               switch (*code)
5229                 {
5230                 case GT:
5231                 case LE:
5232                   if (i != maxval
5233                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5234                     {
5235                       *op1 = GEN_INT (i + 1);
5236                       *code = *code == GT ? GE : LT;
5237                       return;
5238                     }
5239                   break;
5240                 case GTU:
5241                 case LEU:
5242                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5243                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5244                     {
5245                       *op1 = GEN_INT (i + 1);
5246                       *code = *code == GTU ? GEU : LTU;
5247                       return;
5248                     }
5249                   break;
5250                 default:
5251                   gcc_unreachable ();
5252                 }
5253             }
5254
5255           /* If that did not work, reverse the condition.  */
5256           if (!op0_preserve_value)
5257             {
5258               std::swap (*op0, *op1);
5259               *code = (int)swap_condition ((enum rtx_code)*code);
5260             }
5261         }
5262       return;
5263     }
5264
5265   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5266      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5267      to facilitate possible combining with a cmp into 'ands'.  */
5268   if (mode == SImode
5269       && GET_CODE (*op0) == ZERO_EXTEND
5270       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5271       && GET_MODE (XEXP (*op0, 0)) == QImode
5272       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5273       && subreg_lowpart_p (XEXP (*op0, 0))
5274       && *op1 == const0_rtx)
5275     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5276                         GEN_INT (255));
5277
5278   /* Comparisons smaller than DImode.  Only adjust comparisons against
5279      an out-of-range constant.  */
5280   if (!CONST_INT_P (*op1)
5281       || const_ok_for_arm (INTVAL (*op1))
5282       || const_ok_for_arm (- INTVAL (*op1)))
5283     return;
5284
5285   i = INTVAL (*op1);
5286
5287   switch (*code)
5288     {
5289     case EQ:
5290     case NE:
5291       return;
5292
5293     case GT:
5294     case LE:
5295       if (i != maxval
5296           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5297         {
5298           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5299           *code = *code == GT ? GE : LT;
5300           return;
5301         }
5302       break;
5303
5304     case GE:
5305     case LT:
5306       if (i != ~maxval
5307           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5308         {
5309           *op1 = GEN_INT (i - 1);
5310           *code = *code == GE ? GT : LE;
5311           return;
5312         }
5313       break;
5314
5315     case GTU:
5316     case LEU:
5317       if (i != ~((unsigned HOST_WIDE_INT) 0)
5318           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5319         {
5320           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5321           *code = *code == GTU ? GEU : LTU;
5322           return;
5323         }
5324       break;
5325
5326     case GEU:
5327     case LTU:
5328       if (i != 0
5329           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5330         {
5331           *op1 = GEN_INT (i - 1);
5332           *code = *code == GEU ? GTU : LEU;
5333           return;
5334         }
5335       break;
5336
5337     default:
5338       gcc_unreachable ();
5339     }
5340 }
5341
5342
5343 /* Define how to find the value returned by a function.  */
5344
5345 static rtx
5346 arm_function_value(const_tree type, const_tree func,
5347                    bool outgoing ATTRIBUTE_UNUSED)
5348 {
5349   machine_mode mode;
5350   int unsignedp ATTRIBUTE_UNUSED;
5351   rtx r ATTRIBUTE_UNUSED;
5352
5353   mode = TYPE_MODE (type);
5354
5355   if (TARGET_AAPCS_BASED)
5356     return aapcs_allocate_return_reg (mode, type, func);
5357
5358   /* Promote integer types.  */
5359   if (INTEGRAL_TYPE_P (type))
5360     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5361
5362   /* Promotes small structs returned in a register to full-word size
5363      for big-endian AAPCS.  */
5364   if (arm_return_in_msb (type))
5365     {
5366       HOST_WIDE_INT size = int_size_in_bytes (type);
5367       if (size % UNITS_PER_WORD != 0)
5368         {
5369           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5370           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5371         }
5372     }
5373
5374   return arm_libcall_value_1 (mode);
5375 }
5376
5377 /* libcall hashtable helpers.  */
5378
5379 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5380 {
5381   static inline hashval_t hash (const rtx_def *);
5382   static inline bool equal (const rtx_def *, const rtx_def *);
5383   static inline void remove (rtx_def *);
5384 };
5385
5386 inline bool
5387 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5388 {
5389   return rtx_equal_p (p1, p2);
5390 }
5391
5392 inline hashval_t
5393 libcall_hasher::hash (const rtx_def *p1)
5394 {
5395   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5396 }
5397
5398 typedef hash_table<libcall_hasher> libcall_table_type;
5399
5400 static void
5401 add_libcall (libcall_table_type *htab, rtx libcall)
5402 {
5403   *htab->find_slot (libcall, INSERT) = libcall;
5404 }
5405
5406 static bool
5407 arm_libcall_uses_aapcs_base (const_rtx libcall)
5408 {
5409   static bool init_done = false;
5410   static libcall_table_type *libcall_htab = NULL;
5411
5412   if (!init_done)
5413     {
5414       init_done = true;
5415
5416       libcall_htab = new libcall_table_type (31);
5417       add_libcall (libcall_htab,
5418                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5419       add_libcall (libcall_htab,
5420                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5421       add_libcall (libcall_htab,
5422                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5423       add_libcall (libcall_htab,
5424                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5425
5426       add_libcall (libcall_htab,
5427                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5428       add_libcall (libcall_htab,
5429                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5430       add_libcall (libcall_htab,
5431                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5434
5435       add_libcall (libcall_htab,
5436                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5437       add_libcall (libcall_htab,
5438                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5439       add_libcall (libcall_htab,
5440                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5441       add_libcall (libcall_htab,
5442                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5443       add_libcall (libcall_htab,
5444                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5445       add_libcall (libcall_htab,
5446                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5447       add_libcall (libcall_htab,
5448                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5449       add_libcall (libcall_htab,
5450                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5451
5452       /* Values from double-precision helper functions are returned in core
5453          registers if the selected core only supports single-precision
5454          arithmetic, even if we are using the hard-float ABI.  The same is
5455          true for single-precision helpers, but we will never be using the
5456          hard-float ABI on a CPU which doesn't support single-precision
5457          operations in hardware.  */
5458       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5459       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5460       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5461       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5462       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5463       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5464       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5465       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5466       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5467       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5468       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5469       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5470                                                         SFmode));
5471       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5472                                                         DFmode));
5473       add_libcall (libcall_htab,
5474                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5475     }
5476
5477   return libcall && libcall_htab->find (libcall) != NULL;
5478 }
5479
5480 static rtx
5481 arm_libcall_value_1 (machine_mode mode)
5482 {
5483   if (TARGET_AAPCS_BASED)
5484     return aapcs_libcall_value (mode);
5485   else if (TARGET_IWMMXT_ABI
5486            && arm_vector_mode_supported_p (mode))
5487     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5488   else
5489     return gen_rtx_REG (mode, ARG_REGISTER (1));
5490 }
5491
5492 /* Define how to find the value returned by a library function
5493    assuming the value has mode MODE.  */
5494
5495 static rtx
5496 arm_libcall_value (machine_mode mode, const_rtx libcall)
5497 {
5498   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5499       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5500     {
5501       /* The following libcalls return their result in integer registers,
5502          even though they return a floating point value.  */
5503       if (arm_libcall_uses_aapcs_base (libcall))
5504         return gen_rtx_REG (mode, ARG_REGISTER(1));
5505
5506     }
5507
5508   return arm_libcall_value_1 (mode);
5509 }
5510
5511 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5512
5513 static bool
5514 arm_function_value_regno_p (const unsigned int regno)
5515 {
5516   if (regno == ARG_REGISTER (1)
5517       || (TARGET_32BIT
5518           && TARGET_AAPCS_BASED
5519           && TARGET_HARD_FLOAT
5520           && regno == FIRST_VFP_REGNUM)
5521       || (TARGET_IWMMXT_ABI
5522           && regno == FIRST_IWMMXT_REGNUM))
5523     return true;
5524
5525   return false;
5526 }
5527
5528 /* Determine the amount of memory needed to store the possible return
5529    registers of an untyped call.  */
5530 int
5531 arm_apply_result_size (void)
5532 {
5533   int size = 16;
5534
5535   if (TARGET_32BIT)
5536     {
5537       if (TARGET_HARD_FLOAT_ABI)
5538         size += 32;
5539       if (TARGET_IWMMXT_ABI)
5540         size += 8;
5541     }
5542
5543   return size;
5544 }
5545
5546 /* Decide whether TYPE should be returned in memory (true)
5547    or in a register (false).  FNTYPE is the type of the function making
5548    the call.  */
5549 static bool
5550 arm_return_in_memory (const_tree type, const_tree fntype)
5551 {
5552   HOST_WIDE_INT size;
5553
5554   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5555
5556   if (TARGET_AAPCS_BASED)
5557     {
5558       /* Simple, non-aggregate types (ie not including vectors and
5559          complex) are always returned in a register (or registers).
5560          We don't care about which register here, so we can short-cut
5561          some of the detail.  */
5562       if (!AGGREGATE_TYPE_P (type)
5563           && TREE_CODE (type) != VECTOR_TYPE
5564           && TREE_CODE (type) != COMPLEX_TYPE)
5565         return false;
5566
5567       /* Any return value that is no larger than one word can be
5568          returned in r0.  */
5569       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5570         return false;
5571
5572       /* Check any available co-processors to see if they accept the
5573          type as a register candidate (VFP, for example, can return
5574          some aggregates in consecutive registers).  These aren't
5575          available if the call is variadic.  */
5576       if (aapcs_select_return_coproc (type, fntype) >= 0)
5577         return false;
5578
5579       /* Vector values should be returned using ARM registers, not
5580          memory (unless they're over 16 bytes, which will break since
5581          we only have four call-clobbered registers to play with).  */
5582       if (TREE_CODE (type) == VECTOR_TYPE)
5583         return (size < 0 || size > (4 * UNITS_PER_WORD));
5584
5585       /* The rest go in memory.  */
5586       return true;
5587     }
5588
5589   if (TREE_CODE (type) == VECTOR_TYPE)
5590     return (size < 0 || size > (4 * UNITS_PER_WORD));
5591
5592   if (!AGGREGATE_TYPE_P (type) &&
5593       (TREE_CODE (type) != VECTOR_TYPE))
5594     /* All simple types are returned in registers.  */
5595     return false;
5596
5597   if (arm_abi != ARM_ABI_APCS)
5598     {
5599       /* ATPCS and later return aggregate types in memory only if they are
5600          larger than a word (or are variable size).  */
5601       return (size < 0 || size > UNITS_PER_WORD);
5602     }
5603
5604   /* For the arm-wince targets we choose to be compatible with Microsoft's
5605      ARM and Thumb compilers, which always return aggregates in memory.  */
5606 #ifndef ARM_WINCE
5607   /* All structures/unions bigger than one word are returned in memory.
5608      Also catch the case where int_size_in_bytes returns -1.  In this case
5609      the aggregate is either huge or of variable size, and in either case
5610      we will want to return it via memory and not in a register.  */
5611   if (size < 0 || size > UNITS_PER_WORD)
5612     return true;
5613
5614   if (TREE_CODE (type) == RECORD_TYPE)
5615     {
5616       tree field;
5617
5618       /* For a struct the APCS says that we only return in a register
5619          if the type is 'integer like' and every addressable element
5620          has an offset of zero.  For practical purposes this means
5621          that the structure can have at most one non bit-field element
5622          and that this element must be the first one in the structure.  */
5623
5624       /* Find the first field, ignoring non FIELD_DECL things which will
5625          have been created by C++.  */
5626       for (field = TYPE_FIELDS (type);
5627            field && TREE_CODE (field) != FIELD_DECL;
5628            field = DECL_CHAIN (field))
5629         continue;
5630
5631       if (field == NULL)
5632         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5633
5634       /* Check that the first field is valid for returning in a register.  */
5635
5636       /* ... Floats are not allowed */
5637       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5638         return true;
5639
5640       /* ... Aggregates that are not themselves valid for returning in
5641          a register are not allowed.  */
5642       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5643         return true;
5644
5645       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5646          since they are not addressable.  */
5647       for (field = DECL_CHAIN (field);
5648            field;
5649            field = DECL_CHAIN (field))
5650         {
5651           if (TREE_CODE (field) != FIELD_DECL)
5652             continue;
5653
5654           if (!DECL_BIT_FIELD_TYPE (field))
5655             return true;
5656         }
5657
5658       return false;
5659     }
5660
5661   if (TREE_CODE (type) == UNION_TYPE)
5662     {
5663       tree field;
5664
5665       /* Unions can be returned in registers if every element is
5666          integral, or can be returned in an integer register.  */
5667       for (field = TYPE_FIELDS (type);
5668            field;
5669            field = DECL_CHAIN (field))
5670         {
5671           if (TREE_CODE (field) != FIELD_DECL)
5672             continue;
5673
5674           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5675             return true;
5676
5677           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5678             return true;
5679         }
5680
5681       return false;
5682     }
5683 #endif /* not ARM_WINCE */
5684
5685   /* Return all other types in memory.  */
5686   return true;
5687 }
5688
5689 const struct pcs_attribute_arg
5690 {
5691   const char *arg;
5692   enum arm_pcs value;
5693 } pcs_attribute_args[] =
5694   {
5695     {"aapcs", ARM_PCS_AAPCS},
5696     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5697 #if 0
5698     /* We could recognize these, but changes would be needed elsewhere
5699      * to implement them.  */
5700     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5701     {"atpcs", ARM_PCS_ATPCS},
5702     {"apcs", ARM_PCS_APCS},
5703 #endif
5704     {NULL, ARM_PCS_UNKNOWN}
5705   };
5706
5707 static enum arm_pcs
5708 arm_pcs_from_attribute (tree attr)
5709 {
5710   const struct pcs_attribute_arg *ptr;
5711   const char *arg;
5712
5713   /* Get the value of the argument.  */
5714   if (TREE_VALUE (attr) == NULL_TREE
5715       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5716     return ARM_PCS_UNKNOWN;
5717
5718   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5719
5720   /* Check it against the list of known arguments.  */
5721   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5722     if (streq (arg, ptr->arg))
5723       return ptr->value;
5724
5725   /* An unrecognized interrupt type.  */
5726   return ARM_PCS_UNKNOWN;
5727 }
5728
5729 /* Get the PCS variant to use for this call.  TYPE is the function's type
5730    specification, DECL is the specific declartion.  DECL may be null if
5731    the call could be indirect or if this is a library call.  */
5732 static enum arm_pcs
5733 arm_get_pcs_model (const_tree type, const_tree decl)
5734 {
5735   bool user_convention = false;
5736   enum arm_pcs user_pcs = arm_pcs_default;
5737   tree attr;
5738
5739   gcc_assert (type);
5740
5741   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5742   if (attr)
5743     {
5744       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5745       user_convention = true;
5746     }
5747
5748   if (TARGET_AAPCS_BASED)
5749     {
5750       /* Detect varargs functions.  These always use the base rules
5751          (no argument is ever a candidate for a co-processor
5752          register).  */
5753       bool base_rules = stdarg_p (type);
5754
5755       if (user_convention)
5756         {
5757           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5758             sorry ("non-AAPCS derived PCS variant");
5759           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5760             error ("variadic functions must use the base AAPCS variant");
5761         }
5762
5763       if (base_rules)
5764         return ARM_PCS_AAPCS;
5765       else if (user_convention)
5766         return user_pcs;
5767       else if (decl && flag_unit_at_a_time)
5768         {
5769           /* Local functions never leak outside this compilation unit,
5770              so we are free to use whatever conventions are
5771              appropriate.  */
5772           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5773           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5774           if (i && i->local)
5775             return ARM_PCS_AAPCS_LOCAL;
5776         }
5777     }
5778   else if (user_convention && user_pcs != arm_pcs_default)
5779     sorry ("PCS variant");
5780
5781   /* For everything else we use the target's default.  */
5782   return arm_pcs_default;
5783 }
5784
5785
5786 static void
5787 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5788                     const_tree fntype ATTRIBUTE_UNUSED,
5789                     rtx libcall ATTRIBUTE_UNUSED,
5790                     const_tree fndecl ATTRIBUTE_UNUSED)
5791 {
5792   /* Record the unallocated VFP registers.  */
5793   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5794   pcum->aapcs_vfp_reg_alloc = 0;
5795 }
5796
5797 /* Walk down the type tree of TYPE counting consecutive base elements.
5798    If *MODEP is VOIDmode, then set it to the first valid floating point
5799    type.  If a non-floating point type is found, or if a floating point
5800    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5801    otherwise return the count in the sub-tree.  */
5802 static int
5803 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5804 {
5805   machine_mode mode;
5806   HOST_WIDE_INT size;
5807
5808   switch (TREE_CODE (type))
5809     {
5810     case REAL_TYPE:
5811       mode = TYPE_MODE (type);
5812       if (mode != DFmode && mode != SFmode && mode != HFmode)
5813         return -1;
5814
5815       if (*modep == VOIDmode)
5816         *modep = mode;
5817
5818       if (*modep == mode)
5819         return 1;
5820
5821       break;
5822
5823     case COMPLEX_TYPE:
5824       mode = TYPE_MODE (TREE_TYPE (type));
5825       if (mode != DFmode && mode != SFmode)
5826         return -1;
5827
5828       if (*modep == VOIDmode)
5829         *modep = mode;
5830
5831       if (*modep == mode)
5832         return 2;
5833
5834       break;
5835
5836     case VECTOR_TYPE:
5837       /* Use V2SImode and V4SImode as representatives of all 64-bit
5838          and 128-bit vector types, whether or not those modes are
5839          supported with the present options.  */
5840       size = int_size_in_bytes (type);
5841       switch (size)
5842         {
5843         case 8:
5844           mode = V2SImode;
5845           break;
5846         case 16:
5847           mode = V4SImode;
5848           break;
5849         default:
5850           return -1;
5851         }
5852
5853       if (*modep == VOIDmode)
5854         *modep = mode;
5855
5856       /* Vector modes are considered to be opaque: two vectors are
5857          equivalent for the purposes of being homogeneous aggregates
5858          if they are the same size.  */
5859       if (*modep == mode)
5860         return 1;
5861
5862       break;
5863
5864     case ARRAY_TYPE:
5865       {
5866         int count;
5867         tree index = TYPE_DOMAIN (type);
5868
5869         /* Can't handle incomplete types nor sizes that are not
5870            fixed.  */
5871         if (!COMPLETE_TYPE_P (type)
5872             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5873           return -1;
5874
5875         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5876         if (count == -1
5877             || !index
5878             || !TYPE_MAX_VALUE (index)
5879             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5880             || !TYPE_MIN_VALUE (index)
5881             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5882             || count < 0)
5883           return -1;
5884
5885         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5886                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5887
5888         /* There must be no padding.  */
5889         if (wi::to_wide (TYPE_SIZE (type))
5890             != count * GET_MODE_BITSIZE (*modep))
5891           return -1;
5892
5893         return count;
5894       }
5895
5896     case RECORD_TYPE:
5897       {
5898         int count = 0;
5899         int sub_count;
5900         tree field;
5901
5902         /* Can't handle incomplete types nor sizes that are not
5903            fixed.  */
5904         if (!COMPLETE_TYPE_P (type)
5905             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5906           return -1;
5907
5908         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5909           {
5910             if (TREE_CODE (field) != FIELD_DECL)
5911               continue;
5912
5913             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5914             if (sub_count < 0)
5915               return -1;
5916             count += sub_count;
5917           }
5918
5919         /* There must be no padding.  */
5920         if (wi::to_wide (TYPE_SIZE (type))
5921             != count * GET_MODE_BITSIZE (*modep))
5922           return -1;
5923
5924         return count;
5925       }
5926
5927     case UNION_TYPE:
5928     case QUAL_UNION_TYPE:
5929       {
5930         /* These aren't very interesting except in a degenerate case.  */
5931         int count = 0;
5932         int sub_count;
5933         tree field;
5934
5935         /* Can't handle incomplete types nor sizes that are not
5936            fixed.  */
5937         if (!COMPLETE_TYPE_P (type)
5938             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5939           return -1;
5940
5941         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5942           {
5943             if (TREE_CODE (field) != FIELD_DECL)
5944               continue;
5945
5946             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5947             if (sub_count < 0)
5948               return -1;
5949             count = count > sub_count ? count : sub_count;
5950           }
5951
5952         /* There must be no padding.  */
5953         if (wi::to_wide (TYPE_SIZE (type))
5954             != count * GET_MODE_BITSIZE (*modep))
5955           return -1;
5956
5957         return count;
5958       }
5959
5960     default:
5961       break;
5962     }
5963
5964   return -1;
5965 }
5966
5967 /* Return true if PCS_VARIANT should use VFP registers.  */
5968 static bool
5969 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5970 {
5971   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5972     {
5973       static bool seen_thumb1_vfp = false;
5974
5975       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5976         {
5977           sorry ("Thumb-1 hard-float VFP ABI");
5978           /* sorry() is not immediately fatal, so only display this once.  */
5979           seen_thumb1_vfp = true;
5980         }
5981
5982       return true;
5983     }
5984
5985   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5986     return false;
5987
5988   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5989           (TARGET_VFP_DOUBLE || !is_double));
5990 }
5991
5992 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5993    suitable for passing or returning in VFP registers for the PCS
5994    variant selected.  If it is, then *BASE_MODE is updated to contain
5995    a machine mode describing each element of the argument's type and
5996    *COUNT to hold the number of such elements.  */
5997 static bool
5998 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5999                                        machine_mode mode, const_tree type,
6000                                        machine_mode *base_mode, int *count)
6001 {
6002   machine_mode new_mode = VOIDmode;
6003
6004   /* If we have the type information, prefer that to working things
6005      out from the mode.  */
6006   if (type)
6007     {
6008       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6009
6010       if (ag_count > 0 && ag_count <= 4)
6011         *count = ag_count;
6012       else
6013         return false;
6014     }
6015   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6016            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6017            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6018     {
6019       *count = 1;
6020       new_mode = mode;
6021     }
6022   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6023     {
6024       *count = 2;
6025       new_mode = (mode == DCmode ? DFmode : SFmode);
6026     }
6027   else
6028     return false;
6029
6030
6031   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6032     return false;
6033
6034   *base_mode = new_mode;
6035   return true;
6036 }
6037
6038 static bool
6039 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6040                                machine_mode mode, const_tree type)
6041 {
6042   int count ATTRIBUTE_UNUSED;
6043   machine_mode ag_mode ATTRIBUTE_UNUSED;
6044
6045   if (!use_vfp_abi (pcs_variant, false))
6046     return false;
6047   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6048                                                 &ag_mode, &count);
6049 }
6050
6051 static bool
6052 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6053                              const_tree type)
6054 {
6055   if (!use_vfp_abi (pcum->pcs_variant, false))
6056     return false;
6057
6058   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6059                                                 &pcum->aapcs_vfp_rmode,
6060                                                 &pcum->aapcs_vfp_rcount);
6061 }
6062
6063 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6064    for the behaviour of this function.  */
6065
6066 static bool
6067 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6068                     const_tree type  ATTRIBUTE_UNUSED)
6069 {
6070   int rmode_size
6071     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6072   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6073   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6074   int regno;
6075
6076   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6077     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6078       {
6079         pcum->aapcs_vfp_reg_alloc = mask << regno;
6080         if (mode == BLKmode
6081             || (mode == TImode && ! TARGET_NEON)
6082             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6083           {
6084             int i;
6085             int rcount = pcum->aapcs_vfp_rcount;
6086             int rshift = shift;
6087             machine_mode rmode = pcum->aapcs_vfp_rmode;
6088             rtx par;
6089             if (!TARGET_NEON)
6090               {
6091                 /* Avoid using unsupported vector modes.  */
6092                 if (rmode == V2SImode)
6093                   rmode = DImode;
6094                 else if (rmode == V4SImode)
6095                   {
6096                     rmode = DImode;
6097                     rcount *= 2;
6098                     rshift /= 2;
6099                   }
6100               }
6101             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6102             for (i = 0; i < rcount; i++)
6103               {
6104                 rtx tmp = gen_rtx_REG (rmode,
6105                                        FIRST_VFP_REGNUM + regno + i * rshift);
6106                 tmp = gen_rtx_EXPR_LIST
6107                   (VOIDmode, tmp,
6108                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6109                 XVECEXP (par, 0, i) = tmp;
6110               }
6111
6112             pcum->aapcs_reg = par;
6113           }
6114         else
6115           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6116         return true;
6117       }
6118   return false;
6119 }
6120
6121 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6122    comment there for the behaviour of this function.  */
6123
6124 static rtx
6125 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6126                                machine_mode mode,
6127                                const_tree type ATTRIBUTE_UNUSED)
6128 {
6129   if (!use_vfp_abi (pcs_variant, false))
6130     return NULL;
6131
6132   if (mode == BLKmode
6133       || (GET_MODE_CLASS (mode) == MODE_INT
6134           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6135           && !TARGET_NEON))
6136     {
6137       int count;
6138       machine_mode ag_mode;
6139       int i;
6140       rtx par;
6141       int shift;
6142
6143       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6144                                              &ag_mode, &count);
6145
6146       if (!TARGET_NEON)
6147         {
6148           if (ag_mode == V2SImode)
6149             ag_mode = DImode;
6150           else if (ag_mode == V4SImode)
6151             {
6152               ag_mode = DImode;
6153               count *= 2;
6154             }
6155         }
6156       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6157       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6158       for (i = 0; i < count; i++)
6159         {
6160           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6161           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6162                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6163           XVECEXP (par, 0, i) = tmp;
6164         }
6165
6166       return par;
6167     }
6168
6169   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6170 }
6171
6172 static void
6173 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6174                    machine_mode mode  ATTRIBUTE_UNUSED,
6175                    const_tree type  ATTRIBUTE_UNUSED)
6176 {
6177   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6178   pcum->aapcs_vfp_reg_alloc = 0;
6179   return;
6180 }
6181
6182 #define AAPCS_CP(X)                             \
6183   {                                             \
6184     aapcs_ ## X ## _cum_init,                   \
6185     aapcs_ ## X ## _is_call_candidate,          \
6186     aapcs_ ## X ## _allocate,                   \
6187     aapcs_ ## X ## _is_return_candidate,        \
6188     aapcs_ ## X ## _allocate_return_reg,        \
6189     aapcs_ ## X ## _advance                     \
6190   }
6191
6192 /* Table of co-processors that can be used to pass arguments in
6193    registers.  Idealy no arugment should be a candidate for more than
6194    one co-processor table entry, but the table is processed in order
6195    and stops after the first match.  If that entry then fails to put
6196    the argument into a co-processor register, the argument will go on
6197    the stack.  */
6198 static struct
6199 {
6200   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6201   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6202
6203   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6204      BLKmode) is a candidate for this co-processor's registers; this
6205      function should ignore any position-dependent state in
6206      CUMULATIVE_ARGS and only use call-type dependent information.  */
6207   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6208
6209   /* Return true if the argument does get a co-processor register; it
6210      should set aapcs_reg to an RTX of the register allocated as is
6211      required for a return from FUNCTION_ARG.  */
6212   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6213
6214   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6215      be returned in this co-processor's registers.  */
6216   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6217
6218   /* Allocate and return an RTX element to hold the return type of a call.  This
6219      routine must not fail and will only be called if is_return_candidate
6220      returned true with the same parameters.  */
6221   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6222
6223   /* Finish processing this argument and prepare to start processing
6224      the next one.  */
6225   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6226 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6227   {
6228     AAPCS_CP(vfp)
6229   };
6230
6231 #undef AAPCS_CP
6232
6233 static int
6234 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6235                           const_tree type)
6236 {
6237   int i;
6238
6239   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6240     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6241       return i;
6242
6243   return -1;
6244 }
6245
6246 static int
6247 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6248 {
6249   /* We aren't passed a decl, so we can't check that a call is local.
6250      However, it isn't clear that that would be a win anyway, since it
6251      might limit some tail-calling opportunities.  */
6252   enum arm_pcs pcs_variant;
6253
6254   if (fntype)
6255     {
6256       const_tree fndecl = NULL_TREE;
6257
6258       if (TREE_CODE (fntype) == FUNCTION_DECL)
6259         {
6260           fndecl = fntype;
6261           fntype = TREE_TYPE (fntype);
6262         }
6263
6264       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6265     }
6266   else
6267     pcs_variant = arm_pcs_default;
6268
6269   if (pcs_variant != ARM_PCS_AAPCS)
6270     {
6271       int i;
6272
6273       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6274         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6275                                                         TYPE_MODE (type),
6276                                                         type))
6277           return i;
6278     }
6279   return -1;
6280 }
6281
6282 static rtx
6283 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6284                            const_tree fntype)
6285 {
6286   /* We aren't passed a decl, so we can't check that a call is local.
6287      However, it isn't clear that that would be a win anyway, since it
6288      might limit some tail-calling opportunities.  */
6289   enum arm_pcs pcs_variant;
6290   int unsignedp ATTRIBUTE_UNUSED;
6291
6292   if (fntype)
6293     {
6294       const_tree fndecl = NULL_TREE;
6295
6296       if (TREE_CODE (fntype) == FUNCTION_DECL)
6297         {
6298           fndecl = fntype;
6299           fntype = TREE_TYPE (fntype);
6300         }
6301
6302       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6303     }
6304   else
6305     pcs_variant = arm_pcs_default;
6306
6307   /* Promote integer types.  */
6308   if (type && INTEGRAL_TYPE_P (type))
6309     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6310
6311   if (pcs_variant != ARM_PCS_AAPCS)
6312     {
6313       int i;
6314
6315       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6316         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6317                                                         type))
6318           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6319                                                              mode, type);
6320     }
6321
6322   /* Promotes small structs returned in a register to full-word size
6323      for big-endian AAPCS.  */
6324   if (type && arm_return_in_msb (type))
6325     {
6326       HOST_WIDE_INT size = int_size_in_bytes (type);
6327       if (size % UNITS_PER_WORD != 0)
6328         {
6329           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6330           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6331         }
6332     }
6333
6334   return gen_rtx_REG (mode, R0_REGNUM);
6335 }
6336
6337 static rtx
6338 aapcs_libcall_value (machine_mode mode)
6339 {
6340   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6341       && GET_MODE_SIZE (mode) <= 4)
6342     mode = SImode;
6343
6344   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6345 }
6346
6347 /* Lay out a function argument using the AAPCS rules.  The rule
6348    numbers referred to here are those in the AAPCS.  */
6349 static void
6350 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6351                   const_tree type, bool named)
6352 {
6353   int nregs, nregs2;
6354   int ncrn;
6355
6356   /* We only need to do this once per argument.  */
6357   if (pcum->aapcs_arg_processed)
6358     return;
6359
6360   pcum->aapcs_arg_processed = true;
6361
6362   /* Special case: if named is false then we are handling an incoming
6363      anonymous argument which is on the stack.  */
6364   if (!named)
6365     return;
6366
6367   /* Is this a potential co-processor register candidate?  */
6368   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6369     {
6370       int slot = aapcs_select_call_coproc (pcum, mode, type);
6371       pcum->aapcs_cprc_slot = slot;
6372
6373       /* We don't have to apply any of the rules from part B of the
6374          preparation phase, these are handled elsewhere in the
6375          compiler.  */
6376
6377       if (slot >= 0)
6378         {
6379           /* A Co-processor register candidate goes either in its own
6380              class of registers or on the stack.  */
6381           if (!pcum->aapcs_cprc_failed[slot])
6382             {
6383               /* C1.cp - Try to allocate the argument to co-processor
6384                  registers.  */
6385               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6386                 return;
6387
6388               /* C2.cp - Put the argument on the stack and note that we
6389                  can't assign any more candidates in this slot.  We also
6390                  need to note that we have allocated stack space, so that
6391                  we won't later try to split a non-cprc candidate between
6392                  core registers and the stack.  */
6393               pcum->aapcs_cprc_failed[slot] = true;
6394               pcum->can_split = false;
6395             }
6396
6397           /* We didn't get a register, so this argument goes on the
6398              stack.  */
6399           gcc_assert (pcum->can_split == false);
6400           return;
6401         }
6402     }
6403
6404   /* C3 - For double-word aligned arguments, round the NCRN up to the
6405      next even number.  */
6406   ncrn = pcum->aapcs_ncrn;
6407   if (ncrn & 1)
6408     {
6409       int res = arm_needs_doubleword_align (mode, type);
6410       /* Only warn during RTL expansion of call stmts, otherwise we would
6411          warn e.g. during gimplification even on functions that will be
6412          always inlined, and we'd warn multiple times.  Don't warn when
6413          called in expand_function_start either, as we warn instead in
6414          arm_function_arg_boundary in that case.  */
6415       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6416         inform (input_location, "parameter passing for argument of type "
6417                 "%qT changed in GCC 7.1", type);
6418       else if (res > 0)
6419         ncrn++;
6420     }
6421
6422   nregs = ARM_NUM_REGS2(mode, type);
6423
6424   /* Sigh, this test should really assert that nregs > 0, but a GCC
6425      extension allows empty structs and then gives them empty size; it
6426      then allows such a structure to be passed by value.  For some of
6427      the code below we have to pretend that such an argument has
6428      non-zero size so that we 'locate' it correctly either in
6429      registers or on the stack.  */
6430   gcc_assert (nregs >= 0);
6431
6432   nregs2 = nregs ? nregs : 1;
6433
6434   /* C4 - Argument fits entirely in core registers.  */
6435   if (ncrn + nregs2 <= NUM_ARG_REGS)
6436     {
6437       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6438       pcum->aapcs_next_ncrn = ncrn + nregs;
6439       return;
6440     }
6441
6442   /* C5 - Some core registers left and there are no arguments already
6443      on the stack: split this argument between the remaining core
6444      registers and the stack.  */
6445   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6446     {
6447       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6448       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6450       return;
6451     }
6452
6453   /* C6 - NCRN is set to 4.  */
6454   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6455
6456   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6457   return;
6458 }
6459
6460 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6461    for a call to a function whose data type is FNTYPE.
6462    For a library call, FNTYPE is NULL.  */
6463 void
6464 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6465                           rtx libname,
6466                           tree fndecl ATTRIBUTE_UNUSED)
6467 {
6468   /* Long call handling.  */
6469   if (fntype)
6470     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6471   else
6472     pcum->pcs_variant = arm_pcs_default;
6473
6474   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6475     {
6476       if (arm_libcall_uses_aapcs_base (libname))
6477         pcum->pcs_variant = ARM_PCS_AAPCS;
6478
6479       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6480       pcum->aapcs_reg = NULL_RTX;
6481       pcum->aapcs_partial = 0;
6482       pcum->aapcs_arg_processed = false;
6483       pcum->aapcs_cprc_slot = -1;
6484       pcum->can_split = true;
6485
6486       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6487         {
6488           int i;
6489
6490           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6491             {
6492               pcum->aapcs_cprc_failed[i] = false;
6493               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6494             }
6495         }
6496       return;
6497     }
6498
6499   /* Legacy ABIs */
6500
6501   /* On the ARM, the offset starts at 0.  */
6502   pcum->nregs = 0;
6503   pcum->iwmmxt_nregs = 0;
6504   pcum->can_split = true;
6505
6506   /* Varargs vectors are treated the same as long long.
6507      named_count avoids having to change the way arm handles 'named' */
6508   pcum->named_count = 0;
6509   pcum->nargs = 0;
6510
6511   if (TARGET_REALLY_IWMMXT && fntype)
6512     {
6513       tree fn_arg;
6514
6515       for (fn_arg = TYPE_ARG_TYPES (fntype);
6516            fn_arg;
6517            fn_arg = TREE_CHAIN (fn_arg))
6518         pcum->named_count += 1;
6519
6520       if (! pcum->named_count)
6521         pcum->named_count = INT_MAX;
6522     }
6523 }
6524
6525 /* Return 1 if double word alignment is required for argument passing.
6526    Return -1 if double word alignment used to be required for argument
6527    passing before PR77728 ABI fix, but is not required anymore.
6528    Return 0 if double word alignment is not required and wasn't requried
6529    before either.  */
6530 static int
6531 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6532 {
6533   if (!type)
6534     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6535
6536   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6537   if (!AGGREGATE_TYPE_P (type))
6538     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6539
6540   /* Array types: Use member alignment of element type.  */
6541   if (TREE_CODE (type) == ARRAY_TYPE)
6542     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6543
6544   int ret = 0;
6545   /* Record/aggregate types: Use greatest member alignment of any member.  */
6546   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6547     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6548       {
6549         if (TREE_CODE (field) == FIELD_DECL)
6550           return 1;
6551         else
6552           /* Before PR77728 fix, we were incorrectly considering also
6553              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6554              Make sure we can warn about that with -Wpsabi.  */
6555           ret = -1;
6556       }
6557
6558   return ret;
6559 }
6560
6561
6562 /* Determine where to put an argument to a function.
6563    Value is zero to push the argument on the stack,
6564    or a hard register in which to store the argument.
6565
6566    MODE is the argument's machine mode.
6567    TYPE is the data type of the argument (as a tree).
6568     This is null for libcalls where that information may
6569     not be available.
6570    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6571     the preceding args and about the function being called.
6572    NAMED is nonzero if this argument is a named parameter
6573     (otherwise it is an extra parameter matching an ellipsis).
6574
6575    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6576    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6577    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6578    defined), say it is passed in the stack (function_prologue will
6579    indeed make it pass in the stack if necessary).  */
6580
6581 static rtx
6582 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6583                   const_tree type, bool named)
6584 {
6585   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6586   int nregs;
6587
6588   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6589      a call insn (op3 of a call_value insn).  */
6590   if (mode == VOIDmode)
6591     return const0_rtx;
6592
6593   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6594     {
6595       aapcs_layout_arg (pcum, mode, type, named);
6596       return pcum->aapcs_reg;
6597     }
6598
6599   /* Varargs vectors are treated the same as long long.
6600      named_count avoids having to change the way arm handles 'named' */
6601   if (TARGET_IWMMXT_ABI
6602       && arm_vector_mode_supported_p (mode)
6603       && pcum->named_count > pcum->nargs + 1)
6604     {
6605       if (pcum->iwmmxt_nregs <= 9)
6606         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6607       else
6608         {
6609           pcum->can_split = false;
6610           return NULL_RTX;
6611         }
6612     }
6613
6614   /* Put doubleword aligned quantities in even register pairs.  */
6615   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6616     {
6617       int res = arm_needs_doubleword_align (mode, type);
6618       if (res < 0 && warn_psabi)
6619         inform (input_location, "parameter passing for argument of type "
6620                 "%qT changed in GCC 7.1", type);
6621       else if (res > 0)
6622         pcum->nregs++;
6623     }
6624
6625   /* Only allow splitting an arg between regs and memory if all preceding
6626      args were allocated to regs.  For args passed by reference we only count
6627      the reference pointer.  */
6628   if (pcum->can_split)
6629     nregs = 1;
6630   else
6631     nregs = ARM_NUM_REGS2 (mode, type);
6632
6633   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6634     return NULL_RTX;
6635
6636   return gen_rtx_REG (mode, pcum->nregs);
6637 }
6638
6639 static unsigned int
6640 arm_function_arg_boundary (machine_mode mode, const_tree type)
6641 {
6642   if (!ARM_DOUBLEWORD_ALIGN)
6643     return PARM_BOUNDARY;
6644
6645   int res = arm_needs_doubleword_align (mode, type);
6646   if (res < 0 && warn_psabi)
6647     inform (input_location, "parameter passing for argument of type %qT "
6648             "changed in GCC 7.1", type);
6649
6650   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6651 }
6652
6653 static int
6654 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6655                        tree type, bool named)
6656 {
6657   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6658   int nregs = pcum->nregs;
6659
6660   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661     {
6662       aapcs_layout_arg (pcum, mode, type, named);
6663       return pcum->aapcs_partial;
6664     }
6665
6666   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6667     return 0;
6668
6669   if (NUM_ARG_REGS > nregs
6670       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6671       && pcum->can_split)
6672     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6673
6674   return 0;
6675 }
6676
6677 /* Update the data in PCUM to advance over an argument
6678    of mode MODE and data type TYPE.
6679    (TYPE is null for libcalls where that information may not be available.)  */
6680
6681 static void
6682 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6683                           const_tree type, bool named)
6684 {
6685   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6686
6687   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6688     {
6689       aapcs_layout_arg (pcum, mode, type, named);
6690
6691       if (pcum->aapcs_cprc_slot >= 0)
6692         {
6693           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6694                                                               type);
6695           pcum->aapcs_cprc_slot = -1;
6696         }
6697
6698       /* Generic stuff.  */
6699       pcum->aapcs_arg_processed = false;
6700       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6701       pcum->aapcs_reg = NULL_RTX;
6702       pcum->aapcs_partial = 0;
6703     }
6704   else
6705     {
6706       pcum->nargs += 1;
6707       if (arm_vector_mode_supported_p (mode)
6708           && pcum->named_count > pcum->nargs
6709           && TARGET_IWMMXT_ABI)
6710         pcum->iwmmxt_nregs += 1;
6711       else
6712         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6713     }
6714 }
6715
6716 /* Variable sized types are passed by reference.  This is a GCC
6717    extension to the ARM ABI.  */
6718
6719 static bool
6720 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6721                        machine_mode mode ATTRIBUTE_UNUSED,
6722                        const_tree type, bool named ATTRIBUTE_UNUSED)
6723 {
6724   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6725 }
6726 \f
6727 /* Encode the current state of the #pragma [no_]long_calls.  */
6728 typedef enum
6729 {
6730   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6731   LONG,         /* #pragma long_calls is in effect.  */
6732   SHORT         /* #pragma no_long_calls is in effect.  */
6733 } arm_pragma_enum;
6734
6735 static arm_pragma_enum arm_pragma_long_calls = OFF;
6736
6737 void
6738 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740   arm_pragma_long_calls = LONG;
6741 }
6742
6743 void
6744 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746   arm_pragma_long_calls = SHORT;
6747 }
6748
6749 void
6750 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6751 {
6752   arm_pragma_long_calls = OFF;
6753 }
6754 \f
6755 /* Handle an attribute requiring a FUNCTION_DECL;
6756    arguments as in struct attribute_spec.handler.  */
6757 static tree
6758 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6759                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6760 {
6761   if (TREE_CODE (*node) != FUNCTION_DECL)
6762     {
6763       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6764                name);
6765       *no_add_attrs = true;
6766     }
6767
6768   return NULL_TREE;
6769 }
6770
6771 /* Handle an "interrupt" or "isr" attribute;
6772    arguments as in struct attribute_spec.handler.  */
6773 static tree
6774 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6775                           bool *no_add_attrs)
6776 {
6777   if (DECL_P (*node))
6778     {
6779       if (TREE_CODE (*node) != FUNCTION_DECL)
6780         {
6781           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6782                    name);
6783           *no_add_attrs = true;
6784         }
6785       /* FIXME: the argument if any is checked for type attributes;
6786          should it be checked for decl ones?  */
6787     }
6788   else
6789     {
6790       if (TREE_CODE (*node) == FUNCTION_TYPE
6791           || TREE_CODE (*node) == METHOD_TYPE)
6792         {
6793           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6794             {
6795               warning (OPT_Wattributes, "%qE attribute ignored",
6796                        name);
6797               *no_add_attrs = true;
6798             }
6799         }
6800       else if (TREE_CODE (*node) == POINTER_TYPE
6801                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6802                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6803                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6804         {
6805           *node = build_variant_type_copy (*node);
6806           TREE_TYPE (*node) = build_type_attribute_variant
6807             (TREE_TYPE (*node),
6808              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6809           *no_add_attrs = true;
6810         }
6811       else
6812         {
6813           /* Possibly pass this attribute on from the type to a decl.  */
6814           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6815                        | (int) ATTR_FLAG_FUNCTION_NEXT
6816                        | (int) ATTR_FLAG_ARRAY_NEXT))
6817             {
6818               *no_add_attrs = true;
6819               return tree_cons (name, args, NULL_TREE);
6820             }
6821           else
6822             {
6823               warning (OPT_Wattributes, "%qE attribute ignored",
6824                        name);
6825             }
6826         }
6827     }
6828
6829   return NULL_TREE;
6830 }
6831
6832 /* Handle a "pcs" attribute; arguments as in struct
6833    attribute_spec.handler.  */
6834 static tree
6835 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6836                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6837 {
6838   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6839     {
6840       warning (OPT_Wattributes, "%qE attribute ignored", name);
6841       *no_add_attrs = true;
6842     }
6843   return NULL_TREE;
6844 }
6845
6846 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6847 /* Handle the "notshared" attribute.  This attribute is another way of
6848    requesting hidden visibility.  ARM's compiler supports
6849    "__declspec(notshared)"; we support the same thing via an
6850    attribute.  */
6851
6852 static tree
6853 arm_handle_notshared_attribute (tree *node,
6854                                 tree name ATTRIBUTE_UNUSED,
6855                                 tree args ATTRIBUTE_UNUSED,
6856                                 int flags ATTRIBUTE_UNUSED,
6857                                 bool *no_add_attrs)
6858 {
6859   tree decl = TYPE_NAME (*node);
6860
6861   if (decl)
6862     {
6863       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6864       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6865       *no_add_attrs = false;
6866     }
6867   return NULL_TREE;
6868 }
6869 #endif
6870
6871 /* This function returns true if a function with declaration FNDECL and type
6872    FNTYPE uses the stack to pass arguments or return variables and false
6873    otherwise.  This is used for functions with the attributes
6874    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6875    diagnostic messages if the stack is used.  NAME is the name of the attribute
6876    used.  */
6877
6878 static bool
6879 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6880 {
6881   function_args_iterator args_iter;
6882   CUMULATIVE_ARGS args_so_far_v;
6883   cumulative_args_t args_so_far;
6884   bool first_param = true;
6885   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6886
6887   /* Error out if any argument is passed on the stack.  */
6888   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6889   args_so_far = pack_cumulative_args (&args_so_far_v);
6890   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6891     {
6892       rtx arg_rtx;
6893       machine_mode arg_mode = TYPE_MODE (arg_type);
6894
6895       prev_arg_type = arg_type;
6896       if (VOID_TYPE_P (arg_type))
6897         continue;
6898
6899       if (!first_param)
6900         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6901       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6902       if (!arg_rtx
6903           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6904         {
6905           error ("%qE attribute not available to functions with arguments "
6906                  "passed on the stack", name);
6907           return true;
6908         }
6909       first_param = false;
6910     }
6911
6912   /* Error out for variadic functions since we cannot control how many
6913      arguments will be passed and thus stack could be used.  stdarg_p () is not
6914      used for the checking to avoid browsing arguments twice.  */
6915   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6916     {
6917       error ("%qE attribute not available to functions with variable number "
6918              "of arguments", name);
6919       return true;
6920     }
6921
6922   /* Error out if return value is passed on the stack.  */
6923   ret_type = TREE_TYPE (fntype);
6924   if (arm_return_in_memory (ret_type, fntype))
6925     {
6926       error ("%qE attribute not available to functions that return value on "
6927              "the stack", name);
6928       return true;
6929     }
6930   return false;
6931 }
6932
6933 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6934    function will check whether the attribute is allowed here and will add the
6935    attribute to the function declaration tree or otherwise issue a warning.  */
6936
6937 static tree
6938 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6939                                  tree /* args */,
6940                                  int /* flags */,
6941                                  bool *no_add_attrs)
6942 {
6943   tree fndecl;
6944
6945   if (!use_cmse)
6946     {
6947       *no_add_attrs = true;
6948       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6949                name);
6950       return NULL_TREE;
6951     }
6952
6953   /* Ignore attribute for function types.  */
6954   if (TREE_CODE (*node) != FUNCTION_DECL)
6955     {
6956       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6957                name);
6958       *no_add_attrs = true;
6959       return NULL_TREE;
6960     }
6961
6962   fndecl = *node;
6963
6964   /* Warn for static linkage functions.  */
6965   if (!TREE_PUBLIC (fndecl))
6966     {
6967       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6968                "with static linkage", name);
6969       *no_add_attrs = true;
6970       return NULL_TREE;
6971     }
6972
6973   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6974                                                 TREE_TYPE (fndecl));
6975   return NULL_TREE;
6976 }
6977
6978
6979 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6980    function will check whether the attribute is allowed here and will add the
6981    attribute to the function type tree or otherwise issue a diagnostic.  The
6982    reason we check this at declaration time is to only allow the use of the
6983    attribute with declarations of function pointers and not function
6984    declarations.  This function checks NODE is of the expected type and issues
6985    diagnostics otherwise using NAME.  If it is not of the expected type
6986    *NO_ADD_ATTRS will be set to true.  */
6987
6988 static tree
6989 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6990                                  tree /* args */,
6991                                  int /* flags */,
6992                                  bool *no_add_attrs)
6993 {
6994   tree decl = NULL_TREE, fntype = NULL_TREE;
6995   tree type;
6996
6997   if (!use_cmse)
6998     {
6999       *no_add_attrs = true;
7000       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7001                name);
7002       return NULL_TREE;
7003     }
7004
7005   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7006     {
7007       decl = *node;
7008       fntype = TREE_TYPE (decl);
7009     }
7010
7011   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7012     fntype = TREE_TYPE (fntype);
7013
7014   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7015     {
7016         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7017                  "function pointer", name);
7018         *no_add_attrs = true;
7019         return NULL_TREE;
7020     }
7021
7022   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7023
7024   if (*no_add_attrs)
7025     return NULL_TREE;
7026
7027   /* Prevent trees being shared among function types with and without
7028      cmse_nonsecure_call attribute.  */
7029   type = TREE_TYPE (decl);
7030
7031   type = build_distinct_type_copy (type);
7032   TREE_TYPE (decl) = type;
7033   fntype = type;
7034
7035   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7036     {
7037       type = fntype;
7038       fntype = TREE_TYPE (fntype);
7039       fntype = build_distinct_type_copy (fntype);
7040       TREE_TYPE (type) = fntype;
7041     }
7042
7043   /* Construct a type attribute and add it to the function type.  */
7044   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7045                           TYPE_ATTRIBUTES (fntype));
7046   TYPE_ATTRIBUTES (fntype) = attrs;
7047   return NULL_TREE;
7048 }
7049
7050 /* Return 0 if the attributes for two types are incompatible, 1 if they
7051    are compatible, and 2 if they are nearly compatible (which causes a
7052    warning to be generated).  */
7053 static int
7054 arm_comp_type_attributes (const_tree type1, const_tree type2)
7055 {
7056   int l1, l2, s1, s2;
7057
7058   /* Check for mismatch of non-default calling convention.  */
7059   if (TREE_CODE (type1) != FUNCTION_TYPE)
7060     return 1;
7061
7062   /* Check for mismatched call attributes.  */
7063   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7064   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7065   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7066   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7067
7068   /* Only bother to check if an attribute is defined.  */
7069   if (l1 | l2 | s1 | s2)
7070     {
7071       /* If one type has an attribute, the other must have the same attribute.  */
7072       if ((l1 != l2) || (s1 != s2))
7073         return 0;
7074
7075       /* Disallow mixed attributes.  */
7076       if ((l1 & s2) || (l2 & s1))
7077         return 0;
7078     }
7079
7080   /* Check for mismatched ISR attribute.  */
7081   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7082   if (! l1)
7083     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7084   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7085   if (! l2)
7086     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7087   if (l1 != l2)
7088     return 0;
7089
7090   l1 = lookup_attribute ("cmse_nonsecure_call",
7091                          TYPE_ATTRIBUTES (type1)) != NULL;
7092   l2 = lookup_attribute ("cmse_nonsecure_call",
7093                          TYPE_ATTRIBUTES (type2)) != NULL;
7094
7095   if (l1 != l2)
7096     return 0;
7097
7098   return 1;
7099 }
7100
7101 /*  Assigns default attributes to newly defined type.  This is used to
7102     set short_call/long_call attributes for function types of
7103     functions defined inside corresponding #pragma scopes.  */
7104 static void
7105 arm_set_default_type_attributes (tree type)
7106 {
7107   /* Add __attribute__ ((long_call)) to all functions, when
7108      inside #pragma long_calls or __attribute__ ((short_call)),
7109      when inside #pragma no_long_calls.  */
7110   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7111     {
7112       tree type_attr_list, attr_name;
7113       type_attr_list = TYPE_ATTRIBUTES (type);
7114
7115       if (arm_pragma_long_calls == LONG)
7116         attr_name = get_identifier ("long_call");
7117       else if (arm_pragma_long_calls == SHORT)
7118         attr_name = get_identifier ("short_call");
7119       else
7120         return;
7121
7122       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7123       TYPE_ATTRIBUTES (type) = type_attr_list;
7124     }
7125 }
7126 \f
7127 /* Return true if DECL is known to be linked into section SECTION.  */
7128
7129 static bool
7130 arm_function_in_section_p (tree decl, section *section)
7131 {
7132   /* We can only be certain about the prevailing symbol definition.  */
7133   if (!decl_binds_to_current_def_p (decl))
7134     return false;
7135
7136   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7137   if (!DECL_SECTION_NAME (decl))
7138     {
7139       /* Make sure that we will not create a unique section for DECL.  */
7140       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7141         return false;
7142     }
7143
7144   return function_section (decl) == section;
7145 }
7146
7147 /* Return nonzero if a 32-bit "long_call" should be generated for
7148    a call from the current function to DECL.  We generate a long_call
7149    if the function:
7150
7151         a.  has an __attribute__((long call))
7152      or b.  is within the scope of a #pragma long_calls
7153      or c.  the -mlong-calls command line switch has been specified
7154
7155    However we do not generate a long call if the function:
7156
7157         d.  has an __attribute__ ((short_call))
7158      or e.  is inside the scope of a #pragma no_long_calls
7159      or f.  is defined in the same section as the current function.  */
7160
7161 bool
7162 arm_is_long_call_p (tree decl)
7163 {
7164   tree attrs;
7165
7166   if (!decl)
7167     return TARGET_LONG_CALLS;
7168
7169   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7170   if (lookup_attribute ("short_call", attrs))
7171     return false;
7172
7173   /* For "f", be conservative, and only cater for cases in which the
7174      whole of the current function is placed in the same section.  */
7175   if (!flag_reorder_blocks_and_partition
7176       && TREE_CODE (decl) == FUNCTION_DECL
7177       && arm_function_in_section_p (decl, current_function_section ()))
7178     return false;
7179
7180   if (lookup_attribute ("long_call", attrs))
7181     return true;
7182
7183   return TARGET_LONG_CALLS;
7184 }
7185
7186 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7187 static bool
7188 arm_function_ok_for_sibcall (tree decl, tree exp)
7189 {
7190   unsigned long func_type;
7191
7192   if (cfun->machine->sibcall_blocked)
7193     return false;
7194
7195   /* Never tailcall something if we are generating code for Thumb-1.  */
7196   if (TARGET_THUMB1)
7197     return false;
7198
7199   /* The PIC register is live on entry to VxWorks PLT entries, so we
7200      must make the call before restoring the PIC register.  */
7201   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7202     return false;
7203
7204   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7205      may be used both as target of the call and base register for restoring
7206      the VFP registers  */
7207   if (TARGET_APCS_FRAME && TARGET_ARM
7208       && TARGET_HARD_FLOAT
7209       && decl && arm_is_long_call_p (decl))
7210     return false;
7211
7212   /* If we are interworking and the function is not declared static
7213      then we can't tail-call it unless we know that it exists in this
7214      compilation unit (since it might be a Thumb routine).  */
7215   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7216       && !TREE_ASM_WRITTEN (decl))
7217     return false;
7218
7219   func_type = arm_current_func_type ();
7220   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7221   if (IS_INTERRUPT (func_type))
7222     return false;
7223
7224   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7225      generated for entry functions themselves.  */
7226   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7227     return false;
7228
7229   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7230      this would complicate matters for later code generation.  */
7231   if (TREE_CODE (exp) == CALL_EXPR)
7232     {
7233       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7234       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7235         return false;
7236     }
7237
7238   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7239     {
7240       /* Check that the return value locations are the same.  For
7241          example that we aren't returning a value from the sibling in
7242          a VFP register but then need to transfer it to a core
7243          register.  */
7244       rtx a, b;
7245       tree decl_or_type = decl;
7246
7247       /* If it is an indirect function pointer, get the function type.  */
7248       if (!decl)
7249         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7250
7251       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7252       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7253                               cfun->decl, false);
7254       if (!rtx_equal_p (a, b))
7255         return false;
7256     }
7257
7258   /* Never tailcall if function may be called with a misaligned SP.  */
7259   if (IS_STACKALIGN (func_type))
7260     return false;
7261
7262   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7263      references should become a NOP.  Don't convert such calls into
7264      sibling calls.  */
7265   if (TARGET_AAPCS_BASED
7266       && arm_abi == ARM_ABI_AAPCS
7267       && decl
7268       && DECL_WEAK (decl))
7269     return false;
7270
7271   /* We cannot do a tailcall for an indirect call by descriptor if all the
7272      argument registers are used because the only register left to load the
7273      address is IP and it will already contain the static chain.  */
7274   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7275     {
7276       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7277       CUMULATIVE_ARGS cum;
7278       cumulative_args_t cum_v;
7279
7280       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7281       cum_v = pack_cumulative_args (&cum);
7282
7283       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7284         {
7285           tree type = TREE_VALUE (t);
7286           if (!VOID_TYPE_P (type))
7287             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7288         }
7289
7290       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7291         return false;
7292     }
7293
7294   /* Everything else is ok.  */
7295   return true;
7296 }
7297
7298 \f
7299 /* Addressing mode support functions.  */
7300
7301 /* Return nonzero if X is a legitimate immediate operand when compiling
7302    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7303 int
7304 legitimate_pic_operand_p (rtx x)
7305 {
7306   if (GET_CODE (x) == SYMBOL_REF
7307       || (GET_CODE (x) == CONST
7308           && GET_CODE (XEXP (x, 0)) == PLUS
7309           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7310     return 0;
7311
7312   return 1;
7313 }
7314
7315 /* Record that the current function needs a PIC register.  Initialize
7316    cfun->machine->pic_reg if we have not already done so.  */
7317
7318 static void
7319 require_pic_register (void)
7320 {
7321   /* A lot of the logic here is made obscure by the fact that this
7322      routine gets called as part of the rtx cost estimation process.
7323      We don't want those calls to affect any assumptions about the real
7324      function; and further, we can't call entry_of_function() until we
7325      start the real expansion process.  */
7326   if (!crtl->uses_pic_offset_table)
7327     {
7328       gcc_assert (can_create_pseudo_p ());
7329       if (arm_pic_register != INVALID_REGNUM
7330           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7331         {
7332           if (!cfun->machine->pic_reg)
7333             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7334
7335           /* Play games to avoid marking the function as needing pic
7336              if we are being called as part of the cost-estimation
7337              process.  */
7338           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7339             crtl->uses_pic_offset_table = 1;
7340         }
7341       else
7342         {
7343           rtx_insn *seq, *insn;
7344
7345           if (!cfun->machine->pic_reg)
7346             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7347
7348           /* Play games to avoid marking the function as needing pic
7349              if we are being called as part of the cost-estimation
7350              process.  */
7351           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7352             {
7353               crtl->uses_pic_offset_table = 1;
7354               start_sequence ();
7355
7356               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7357                   && arm_pic_register > LAST_LO_REGNUM)
7358                 emit_move_insn (cfun->machine->pic_reg,
7359                                 gen_rtx_REG (Pmode, arm_pic_register));
7360               else
7361                 arm_load_pic_register (0UL);
7362
7363               seq = get_insns ();
7364               end_sequence ();
7365
7366               for (insn = seq; insn; insn = NEXT_INSN (insn))
7367                 if (INSN_P (insn))
7368                   INSN_LOCATION (insn) = prologue_location;
7369
7370               /* We can be called during expansion of PHI nodes, where
7371                  we can't yet emit instructions directly in the final
7372                  insn stream.  Queue the insns on the entry edge, they will
7373                  be committed after everything else is expanded.  */
7374               insert_insn_on_edge (seq,
7375                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7376             }
7377         }
7378     }
7379 }
7380
7381 rtx
7382 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7383 {
7384   if (GET_CODE (orig) == SYMBOL_REF
7385       || GET_CODE (orig) == LABEL_REF)
7386     {
7387       if (reg == 0)
7388         {
7389           gcc_assert (can_create_pseudo_p ());
7390           reg = gen_reg_rtx (Pmode);
7391         }
7392
7393       /* VxWorks does not impose a fixed gap between segments; the run-time
7394          gap can be different from the object-file gap.  We therefore can't
7395          use GOTOFF unless we are absolutely sure that the symbol is in the
7396          same segment as the GOT.  Unfortunately, the flexibility of linker
7397          scripts means that we can't be sure of that in general, so assume
7398          that GOTOFF is never valid on VxWorks.  */
7399       /* References to weak symbols cannot be resolved locally: they
7400          may be overridden by a non-weak definition at link time.  */
7401       rtx_insn *insn;
7402       if ((GET_CODE (orig) == LABEL_REF
7403            || (GET_CODE (orig) == SYMBOL_REF
7404                && SYMBOL_REF_LOCAL_P (orig)
7405                && (SYMBOL_REF_DECL (orig)
7406                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7407           && NEED_GOT_RELOC
7408           && arm_pic_data_is_text_relative)
7409         insn = arm_pic_static_addr (orig, reg);
7410       else
7411         {
7412           rtx pat;
7413           rtx mem;
7414
7415           /* If this function doesn't have a pic register, create one now.  */
7416           require_pic_register ();
7417
7418           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7419
7420           /* Make the MEM as close to a constant as possible.  */
7421           mem = SET_SRC (pat);
7422           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7423           MEM_READONLY_P (mem) = 1;
7424           MEM_NOTRAP_P (mem) = 1;
7425
7426           insn = emit_insn (pat);
7427         }
7428
7429       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7430          by loop.  */
7431       set_unique_reg_note (insn, REG_EQUAL, orig);
7432
7433       return reg;
7434     }
7435   else if (GET_CODE (orig) == CONST)
7436     {
7437       rtx base, offset;
7438
7439       if (GET_CODE (XEXP (orig, 0)) == PLUS
7440           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7441         return orig;
7442
7443       /* Handle the case where we have: const (UNSPEC_TLS).  */
7444       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7445           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7446         return orig;
7447
7448       /* Handle the case where we have:
7449          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7450          CONST_INT.  */
7451       if (GET_CODE (XEXP (orig, 0)) == PLUS
7452           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7453           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7454         {
7455           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7456           return orig;
7457         }
7458
7459       if (reg == 0)
7460         {
7461           gcc_assert (can_create_pseudo_p ());
7462           reg = gen_reg_rtx (Pmode);
7463         }
7464
7465       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7466
7467       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7468       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7469                                        base == reg ? 0 : reg);
7470
7471       if (CONST_INT_P (offset))
7472         {
7473           /* The base register doesn't really matter, we only want to
7474              test the index for the appropriate mode.  */
7475           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7476             {
7477               gcc_assert (can_create_pseudo_p ());
7478               offset = force_reg (Pmode, offset);
7479             }
7480
7481           if (CONST_INT_P (offset))
7482             return plus_constant (Pmode, base, INTVAL (offset));
7483         }
7484
7485       if (GET_MODE_SIZE (mode) > 4
7486           && (GET_MODE_CLASS (mode) == MODE_INT
7487               || TARGET_SOFT_FLOAT))
7488         {
7489           emit_insn (gen_addsi3 (reg, base, offset));
7490           return reg;
7491         }
7492
7493       return gen_rtx_PLUS (Pmode, base, offset);
7494     }
7495
7496   return orig;
7497 }
7498
7499
7500 /* Find a spare register to use during the prolog of a function.  */
7501
7502 static int
7503 thumb_find_work_register (unsigned long pushed_regs_mask)
7504 {
7505   int reg;
7506
7507   /* Check the argument registers first as these are call-used.  The
7508      register allocation order means that sometimes r3 might be used
7509      but earlier argument registers might not, so check them all.  */
7510   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7511     if (!df_regs_ever_live_p (reg))
7512       return reg;
7513
7514   /* Before going on to check the call-saved registers we can try a couple
7515      more ways of deducing that r3 is available.  The first is when we are
7516      pushing anonymous arguments onto the stack and we have less than 4
7517      registers worth of fixed arguments(*).  In this case r3 will be part of
7518      the variable argument list and so we can be sure that it will be
7519      pushed right at the start of the function.  Hence it will be available
7520      for the rest of the prologue.
7521      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7522   if (cfun->machine->uses_anonymous_args
7523       && crtl->args.pretend_args_size > 0)
7524     return LAST_ARG_REGNUM;
7525
7526   /* The other case is when we have fixed arguments but less than 4 registers
7527      worth.  In this case r3 might be used in the body of the function, but
7528      it is not being used to convey an argument into the function.  In theory
7529      we could just check crtl->args.size to see how many bytes are
7530      being passed in argument registers, but it seems that it is unreliable.
7531      Sometimes it will have the value 0 when in fact arguments are being
7532      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7533      check the args_info.nregs field as well.  The problem with this field is
7534      that it makes no allowances for arguments that are passed to the
7535      function but which are not used.  Hence we could miss an opportunity
7536      when a function has an unused argument in r3.  But it is better to be
7537      safe than to be sorry.  */
7538   if (! cfun->machine->uses_anonymous_args
7539       && crtl->args.size >= 0
7540       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7541       && (TARGET_AAPCS_BASED
7542           ? crtl->args.info.aapcs_ncrn < 4
7543           : crtl->args.info.nregs < 4))
7544     return LAST_ARG_REGNUM;
7545
7546   /* Otherwise look for a call-saved register that is going to be pushed.  */
7547   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7548     if (pushed_regs_mask & (1 << reg))
7549       return reg;
7550
7551   if (TARGET_THUMB2)
7552     {
7553       /* Thumb-2 can use high regs.  */
7554       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7555         if (pushed_regs_mask & (1 << reg))
7556           return reg;
7557     }
7558   /* Something went wrong - thumb_compute_save_reg_mask()
7559      should have arranged for a suitable register to be pushed.  */
7560   gcc_unreachable ();
7561 }
7562
7563 static GTY(()) int pic_labelno;
7564
7565 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7566    low register.  */
7567
7568 void
7569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7570 {
7571   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7572
7573   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7574     return;
7575
7576   gcc_assert (flag_pic);
7577
7578   pic_reg = cfun->machine->pic_reg;
7579   if (TARGET_VXWORKS_RTP)
7580     {
7581       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7582       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7583       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7584
7585       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7586
7587       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7588       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7589     }
7590   else
7591     {
7592       /* We use an UNSPEC rather than a LABEL_REF because this label
7593          never appears in the code stream.  */
7594
7595       labelno = GEN_INT (pic_labelno++);
7596       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7597       l1 = gen_rtx_CONST (VOIDmode, l1);
7598
7599       /* On the ARM the PC register contains 'dot + 8' at the time of the
7600          addition, on the Thumb it is 'dot + 4'.  */
7601       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7602       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7603                                 UNSPEC_GOTSYM_OFF);
7604       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7605
7606       if (TARGET_32BIT)
7607         {
7608           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609         }
7610       else /* TARGET_THUMB1 */
7611         {
7612           if (arm_pic_register != INVALID_REGNUM
7613               && REGNO (pic_reg) > LAST_LO_REGNUM)
7614             {
7615               /* We will have pushed the pic register, so we should always be
7616                  able to find a work register.  */
7617               pic_tmp = gen_rtx_REG (SImode,
7618                                      thumb_find_work_register (saved_regs));
7619               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7620               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7621               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7622             }
7623           else if (arm_pic_register != INVALID_REGNUM
7624                    && arm_pic_register > LAST_LO_REGNUM
7625                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7626             {
7627               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7628               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7629               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7630             }
7631           else
7632             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7633         }
7634     }
7635
7636   /* Need to emit this whether or not we obey regdecls,
7637      since setjmp/longjmp can cause life info to screw up.  */
7638   emit_use (pic_reg);
7639 }
7640
7641 /* Generate code to load the address of a static var when flag_pic is set.  */
7642 static rtx_insn *
7643 arm_pic_static_addr (rtx orig, rtx reg)
7644 {
7645   rtx l1, labelno, offset_rtx;
7646
7647   gcc_assert (flag_pic);
7648
7649   /* We use an UNSPEC rather than a LABEL_REF because this label
7650      never appears in the code stream.  */
7651   labelno = GEN_INT (pic_labelno++);
7652   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653   l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655   /* On the ARM the PC register contains 'dot + 8' at the time of the
7656      addition, on the Thumb it is 'dot + 4'.  */
7657   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7659                                UNSPEC_SYMBOL_OFFSET);
7660   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7661
7662   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7663 }
7664
7665 /* Return nonzero if X is valid as an ARM state addressing register.  */
7666 static int
7667 arm_address_register_rtx_p (rtx x, int strict_p)
7668 {
7669   int regno;
7670
7671   if (!REG_P (x))
7672     return 0;
7673
7674   regno = REGNO (x);
7675
7676   if (strict_p)
7677     return ARM_REGNO_OK_FOR_BASE_P (regno);
7678
7679   return (regno <= LAST_ARM_REGNUM
7680           || regno >= FIRST_PSEUDO_REGISTER
7681           || regno == FRAME_POINTER_REGNUM
7682           || regno == ARG_POINTER_REGNUM);
7683 }
7684
7685 /* Return TRUE if this rtx is the difference of a symbol and a label,
7686    and will reduce to a PC-relative relocation in the object file.
7687    Expressions like this can be left alone when generating PIC, rather
7688    than forced through the GOT.  */
7689 static int
7690 pcrel_constant_p (rtx x)
7691 {
7692   if (GET_CODE (x) == MINUS)
7693     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7694
7695   return FALSE;
7696 }
7697
7698 /* Return true if X will surely end up in an index register after next
7699    splitting pass.  */
7700 static bool
7701 will_be_in_index_register (const_rtx x)
7702 {
7703   /* arm.md: calculate_pic_address will split this into a register.  */
7704   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7705 }
7706
7707 /* Return nonzero if X is a valid ARM state address operand.  */
7708 int
7709 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7710                                 int strict_p)
7711 {
7712   bool use_ldrd;
7713   enum rtx_code code = GET_CODE (x);
7714
7715   if (arm_address_register_rtx_p (x, strict_p))
7716     return 1;
7717
7718   use_ldrd = (TARGET_LDRD
7719               && (mode == DImode || mode == DFmode));
7720
7721   if (code == POST_INC || code == PRE_DEC
7722       || ((code == PRE_INC || code == POST_DEC)
7723           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7724     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7725
7726   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7727            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7728            && GET_CODE (XEXP (x, 1)) == PLUS
7729            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7730     {
7731       rtx addend = XEXP (XEXP (x, 1), 1);
7732
7733       /* Don't allow ldrd post increment by register because it's hard
7734          to fixup invalid register choices.  */
7735       if (use_ldrd
7736           && GET_CODE (x) == POST_MODIFY
7737           && REG_P (addend))
7738         return 0;
7739
7740       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7741               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7742     }
7743
7744   /* After reload constants split into minipools will have addresses
7745      from a LABEL_REF.  */
7746   else if (reload_completed
7747            && (code == LABEL_REF
7748                || (code == CONST
7749                    && GET_CODE (XEXP (x, 0)) == PLUS
7750                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7751                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7752     return 1;
7753
7754   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7755     return 0;
7756
7757   else if (code == PLUS)
7758     {
7759       rtx xop0 = XEXP (x, 0);
7760       rtx xop1 = XEXP (x, 1);
7761
7762       return ((arm_address_register_rtx_p (xop0, strict_p)
7763                && ((CONST_INT_P (xop1)
7764                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7765                    || (!strict_p && will_be_in_index_register (xop1))))
7766               || (arm_address_register_rtx_p (xop1, strict_p)
7767                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7768     }
7769
7770 #if 0
7771   /* Reload currently can't handle MINUS, so disable this for now */
7772   else if (GET_CODE (x) == MINUS)
7773     {
7774       rtx xop0 = XEXP (x, 0);
7775       rtx xop1 = XEXP (x, 1);
7776
7777       return (arm_address_register_rtx_p (xop0, strict_p)
7778               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7779     }
7780 #endif
7781
7782   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7783            && code == SYMBOL_REF
7784            && CONSTANT_POOL_ADDRESS_P (x)
7785            && ! (flag_pic
7786                  && symbol_mentioned_p (get_pool_constant (x))
7787                  && ! pcrel_constant_p (get_pool_constant (x))))
7788     return 1;
7789
7790   return 0;
7791 }
7792
7793 /* Return true if we can avoid creating a constant pool entry for x.  */
7794 static bool
7795 can_avoid_literal_pool_for_label_p (rtx x)
7796 {
7797   /* Normally we can assign constant values to target registers without
7798      the help of constant pool.  But there are cases we have to use constant
7799      pool like:
7800      1) assign a label to register.
7801      2) sign-extend a 8bit value to 32bit and then assign to register.
7802
7803      Constant pool access in format:
7804      (set (reg r0) (mem (symbol_ref (".LC0"))))
7805      will cause the use of literal pool (later in function arm_reorg).
7806      So here we mark such format as an invalid format, then the compiler
7807      will adjust it into:
7808      (set (reg r0) (symbol_ref (".LC0")))
7809      (set (reg r0) (mem (reg r0))).
7810      No extra register is required, and (mem (reg r0)) won't cause the use
7811      of literal pools.  */
7812   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7813       && CONSTANT_POOL_ADDRESS_P (x))
7814     return 1;
7815   return 0;
7816 }
7817
7818
7819 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7820 static int
7821 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7822 {
7823   bool use_ldrd;
7824   enum rtx_code code = GET_CODE (x);
7825
7826   if (arm_address_register_rtx_p (x, strict_p))
7827     return 1;
7828
7829   use_ldrd = (TARGET_LDRD
7830               && (mode == DImode || mode == DFmode));
7831
7832   if (code == POST_INC || code == PRE_DEC
7833       || ((code == PRE_INC || code == POST_DEC)
7834           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7835     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7836
7837   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7838            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7839            && GET_CODE (XEXP (x, 1)) == PLUS
7840            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7841     {
7842       /* Thumb-2 only has autoincrement by constant.  */
7843       rtx addend = XEXP (XEXP (x, 1), 1);
7844       HOST_WIDE_INT offset;
7845
7846       if (!CONST_INT_P (addend))
7847         return 0;
7848
7849       offset = INTVAL(addend);
7850       if (GET_MODE_SIZE (mode) <= 4)
7851         return (offset > -256 && offset < 256);
7852
7853       return (use_ldrd && offset > -1024 && offset < 1024
7854               && (offset & 3) == 0);
7855     }
7856
7857   /* After reload constants split into minipools will have addresses
7858      from a LABEL_REF.  */
7859   else if (reload_completed
7860            && (code == LABEL_REF
7861                || (code == CONST
7862                    && GET_CODE (XEXP (x, 0)) == PLUS
7863                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7864                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7865     return 1;
7866
7867   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7868     return 0;
7869
7870   else if (code == PLUS)
7871     {
7872       rtx xop0 = XEXP (x, 0);
7873       rtx xop1 = XEXP (x, 1);
7874
7875       return ((arm_address_register_rtx_p (xop0, strict_p)
7876                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7877                    || (!strict_p && will_be_in_index_register (xop1))))
7878               || (arm_address_register_rtx_p (xop1, strict_p)
7879                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7880     }
7881
7882   else if (can_avoid_literal_pool_for_label_p (x))
7883     return 0;
7884
7885   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7886            && code == SYMBOL_REF
7887            && CONSTANT_POOL_ADDRESS_P (x)
7888            && ! (flag_pic
7889                  && symbol_mentioned_p (get_pool_constant (x))
7890                  && ! pcrel_constant_p (get_pool_constant (x))))
7891     return 1;
7892
7893   return 0;
7894 }
7895
7896 /* Return nonzero if INDEX is valid for an address index operand in
7897    ARM state.  */
7898 static int
7899 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7900                         int strict_p)
7901 {
7902   HOST_WIDE_INT range;
7903   enum rtx_code code = GET_CODE (index);
7904
7905   /* Standard coprocessor addressing modes.  */
7906   if (TARGET_HARD_FLOAT
7907       && (mode == SFmode || mode == DFmode))
7908     return (code == CONST_INT && INTVAL (index) < 1024
7909             && INTVAL (index) > -1024
7910             && (INTVAL (index) & 3) == 0);
7911
7912   /* For quad modes, we restrict the constant offset to be slightly less
7913      than what the instruction format permits.  We do this because for
7914      quad mode moves, we will actually decompose them into two separate
7915      double-mode reads or writes.  INDEX must therefore be a valid
7916      (double-mode) offset and so should INDEX+8.  */
7917   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7918     return (code == CONST_INT
7919             && INTVAL (index) < 1016
7920             && INTVAL (index) > -1024
7921             && (INTVAL (index) & 3) == 0);
7922
7923   /* We have no such constraint on double mode offsets, so we permit the
7924      full range of the instruction format.  */
7925   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7926     return (code == CONST_INT
7927             && INTVAL (index) < 1024
7928             && INTVAL (index) > -1024
7929             && (INTVAL (index) & 3) == 0);
7930
7931   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7932     return (code == CONST_INT
7933             && INTVAL (index) < 1024
7934             && INTVAL (index) > -1024
7935             && (INTVAL (index) & 3) == 0);
7936
7937   if (arm_address_register_rtx_p (index, strict_p)
7938       && (GET_MODE_SIZE (mode) <= 4))
7939     return 1;
7940
7941   if (mode == DImode || mode == DFmode)
7942     {
7943       if (code == CONST_INT)
7944         {
7945           HOST_WIDE_INT val = INTVAL (index);
7946
7947           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7948              If vldr is selected it uses arm_coproc_mem_operand.  */
7949           if (TARGET_LDRD)
7950             return val > -256 && val < 256;
7951           else
7952             return val > -4096 && val < 4092;
7953         }
7954
7955       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7956     }
7957
7958   if (GET_MODE_SIZE (mode) <= 4
7959       && ! (arm_arch4
7960             && (mode == HImode
7961                 || mode == HFmode
7962                 || (mode == QImode && outer == SIGN_EXTEND))))
7963     {
7964       if (code == MULT)
7965         {
7966           rtx xiop0 = XEXP (index, 0);
7967           rtx xiop1 = XEXP (index, 1);
7968
7969           return ((arm_address_register_rtx_p (xiop0, strict_p)
7970                    && power_of_two_operand (xiop1, SImode))
7971                   || (arm_address_register_rtx_p (xiop1, strict_p)
7972                       && power_of_two_operand (xiop0, SImode)));
7973         }
7974       else if (code == LSHIFTRT || code == ASHIFTRT
7975                || code == ASHIFT || code == ROTATERT)
7976         {
7977           rtx op = XEXP (index, 1);
7978
7979           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7980                   && CONST_INT_P (op)
7981                   && INTVAL (op) > 0
7982                   && INTVAL (op) <= 31);
7983         }
7984     }
7985
7986   /* For ARM v4 we may be doing a sign-extend operation during the
7987      load.  */
7988   if (arm_arch4)
7989     {
7990       if (mode == HImode
7991           || mode == HFmode
7992           || (outer == SIGN_EXTEND && mode == QImode))
7993         range = 256;
7994       else
7995         range = 4096;
7996     }
7997   else
7998     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7999
8000   return (code == CONST_INT
8001           && INTVAL (index) < range
8002           && INTVAL (index) > -range);
8003 }
8004
8005 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8006    index operand.  i.e. 1, 2, 4 or 8.  */
8007 static bool
8008 thumb2_index_mul_operand (rtx op)
8009 {
8010   HOST_WIDE_INT val;
8011
8012   if (!CONST_INT_P (op))
8013     return false;
8014
8015   val = INTVAL(op);
8016   return (val == 1 || val == 2 || val == 4 || val == 8);
8017 }
8018
8019 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8020 static int
8021 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8022 {
8023   enum rtx_code code = GET_CODE (index);
8024
8025   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8026   /* Standard coprocessor addressing modes.  */
8027   if (TARGET_HARD_FLOAT
8028       && (mode == SFmode || mode == DFmode))
8029     return (code == CONST_INT && INTVAL (index) < 1024
8030             /* Thumb-2 allows only > -256 index range for it's core register
8031                load/stores. Since we allow SF/DF in core registers, we have
8032                to use the intersection between -256~4096 (core) and -1024~1024
8033                (coprocessor).  */
8034             && INTVAL (index) > -256
8035             && (INTVAL (index) & 3) == 0);
8036
8037   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8038     {
8039       /* For DImode assume values will usually live in core regs
8040          and only allow LDRD addressing modes.  */
8041       if (!TARGET_LDRD || mode != DImode)
8042         return (code == CONST_INT
8043                 && INTVAL (index) < 1024
8044                 && INTVAL (index) > -1024
8045                 && (INTVAL (index) & 3) == 0);
8046     }
8047
8048   /* For quad modes, we restrict the constant offset to be slightly less
8049      than what the instruction format permits.  We do this because for
8050      quad mode moves, we will actually decompose them into two separate
8051      double-mode reads or writes.  INDEX must therefore be a valid
8052      (double-mode) offset and so should INDEX+8.  */
8053   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8054     return (code == CONST_INT
8055             && INTVAL (index) < 1016
8056             && INTVAL (index) > -1024
8057             && (INTVAL (index) & 3) == 0);
8058
8059   /* We have no such constraint on double mode offsets, so we permit the
8060      full range of the instruction format.  */
8061   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8062     return (code == CONST_INT
8063             && INTVAL (index) < 1024
8064             && INTVAL (index) > -1024
8065             && (INTVAL (index) & 3) == 0);
8066
8067   if (arm_address_register_rtx_p (index, strict_p)
8068       && (GET_MODE_SIZE (mode) <= 4))
8069     return 1;
8070
8071   if (mode == DImode || mode == DFmode)
8072     {
8073       if (code == CONST_INT)
8074         {
8075           HOST_WIDE_INT val = INTVAL (index);
8076           /* Thumb-2 ldrd only has reg+const addressing modes.
8077              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8078              If vldr is selected it uses arm_coproc_mem_operand.  */
8079           if (TARGET_LDRD)
8080             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8081           else
8082             return IN_RANGE (val, -255, 4095 - 4);
8083         }
8084       else
8085         return 0;
8086     }
8087
8088   if (code == MULT)
8089     {
8090       rtx xiop0 = XEXP (index, 0);
8091       rtx xiop1 = XEXP (index, 1);
8092
8093       return ((arm_address_register_rtx_p (xiop0, strict_p)
8094                && thumb2_index_mul_operand (xiop1))
8095               || (arm_address_register_rtx_p (xiop1, strict_p)
8096                   && thumb2_index_mul_operand (xiop0)));
8097     }
8098   else if (code == ASHIFT)
8099     {
8100       rtx op = XEXP (index, 1);
8101
8102       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8103               && CONST_INT_P (op)
8104               && INTVAL (op) > 0
8105               && INTVAL (op) <= 3);
8106     }
8107
8108   return (code == CONST_INT
8109           && INTVAL (index) < 4096
8110           && INTVAL (index) > -256);
8111 }
8112
8113 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8114 static int
8115 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8116 {
8117   int regno;
8118
8119   if (!REG_P (x))
8120     return 0;
8121
8122   regno = REGNO (x);
8123
8124   if (strict_p)
8125     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8126
8127   return (regno <= LAST_LO_REGNUM
8128           || regno > LAST_VIRTUAL_REGISTER
8129           || regno == FRAME_POINTER_REGNUM
8130           || (GET_MODE_SIZE (mode) >= 4
8131               && (regno == STACK_POINTER_REGNUM
8132                   || regno >= FIRST_PSEUDO_REGISTER
8133                   || x == hard_frame_pointer_rtx
8134                   || x == arg_pointer_rtx)));
8135 }
8136
8137 /* Return nonzero if x is a legitimate index register.  This is the case
8138    for any base register that can access a QImode object.  */
8139 inline static int
8140 thumb1_index_register_rtx_p (rtx x, int strict_p)
8141 {
8142   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8143 }
8144
8145 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8146
8147    The AP may be eliminated to either the SP or the FP, so we use the
8148    least common denominator, e.g. SImode, and offsets from 0 to 64.
8149
8150    ??? Verify whether the above is the right approach.
8151
8152    ??? Also, the FP may be eliminated to the SP, so perhaps that
8153    needs special handling also.
8154
8155    ??? Look at how the mips16 port solves this problem.  It probably uses
8156    better ways to solve some of these problems.
8157
8158    Although it is not incorrect, we don't accept QImode and HImode
8159    addresses based on the frame pointer or arg pointer until the
8160    reload pass starts.  This is so that eliminating such addresses
8161    into stack based ones won't produce impossible code.  */
8162 int
8163 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8164 {
8165   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8166     return 0;
8167
8168   /* ??? Not clear if this is right.  Experiment.  */
8169   if (GET_MODE_SIZE (mode) < 4
8170       && !(reload_in_progress || reload_completed)
8171       && (reg_mentioned_p (frame_pointer_rtx, x)
8172           || reg_mentioned_p (arg_pointer_rtx, x)
8173           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8174           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8175           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8176           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8177     return 0;
8178
8179   /* Accept any base register.  SP only in SImode or larger.  */
8180   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8181     return 1;
8182
8183   /* This is PC relative data before arm_reorg runs.  */
8184   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8185            && GET_CODE (x) == SYMBOL_REF
8186            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8187     return 1;
8188
8189   /* This is PC relative data after arm_reorg runs.  */
8190   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8191            && reload_completed
8192            && (GET_CODE (x) == LABEL_REF
8193                || (GET_CODE (x) == CONST
8194                    && GET_CODE (XEXP (x, 0)) == PLUS
8195                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8196                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8197     return 1;
8198
8199   /* Post-inc indexing only supported for SImode and larger.  */
8200   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8201            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8202     return 1;
8203
8204   else if (GET_CODE (x) == PLUS)
8205     {
8206       /* REG+REG address can be any two index registers.  */
8207       /* We disallow FRAME+REG addressing since we know that FRAME
8208          will be replaced with STACK, and SP relative addressing only
8209          permits SP+OFFSET.  */
8210       if (GET_MODE_SIZE (mode) <= 4
8211           && XEXP (x, 0) != frame_pointer_rtx
8212           && XEXP (x, 1) != frame_pointer_rtx
8213           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8215               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8216         return 1;
8217
8218       /* REG+const has 5-7 bit offset for non-SP registers.  */
8219       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8220                 || XEXP (x, 0) == arg_pointer_rtx)
8221                && CONST_INT_P (XEXP (x, 1))
8222                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8223         return 1;
8224
8225       /* REG+const has 10-bit offset for SP, but only SImode and
8226          larger is supported.  */
8227       /* ??? Should probably check for DI/DFmode overflow here
8228          just like GO_IF_LEGITIMATE_OFFSET does.  */
8229       else if (REG_P (XEXP (x, 0))
8230                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8231                && GET_MODE_SIZE (mode) >= 4
8232                && CONST_INT_P (XEXP (x, 1))
8233                && INTVAL (XEXP (x, 1)) >= 0
8234                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8235                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8236         return 1;
8237
8238       else if (REG_P (XEXP (x, 0))
8239                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8240                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8241                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8242                        && REGNO (XEXP (x, 0))
8243                           <= LAST_VIRTUAL_POINTER_REGISTER))
8244                && GET_MODE_SIZE (mode) >= 4
8245                && CONST_INT_P (XEXP (x, 1))
8246                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8247         return 1;
8248     }
8249
8250   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8251            && GET_MODE_SIZE (mode) == 4
8252            && GET_CODE (x) == SYMBOL_REF
8253            && CONSTANT_POOL_ADDRESS_P (x)
8254            && ! (flag_pic
8255                  && symbol_mentioned_p (get_pool_constant (x))
8256                  && ! pcrel_constant_p (get_pool_constant (x))))
8257     return 1;
8258
8259   return 0;
8260 }
8261
8262 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8263    instruction of mode MODE.  */
8264 int
8265 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8266 {
8267   switch (GET_MODE_SIZE (mode))
8268     {
8269     case 1:
8270       return val >= 0 && val < 32;
8271
8272     case 2:
8273       return val >= 0 && val < 64 && (val & 1) == 0;
8274
8275     default:
8276       return (val >= 0
8277               && (val + GET_MODE_SIZE (mode)) <= 128
8278               && (val & 3) == 0);
8279     }
8280 }
8281
8282 bool
8283 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8284 {
8285   if (TARGET_ARM)
8286     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8287   else if (TARGET_THUMB2)
8288     return thumb2_legitimate_address_p (mode, x, strict_p);
8289   else /* if (TARGET_THUMB1) */
8290     return thumb1_legitimate_address_p (mode, x, strict_p);
8291 }
8292
8293 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8294
8295    Given an rtx X being reloaded into a reg required to be
8296    in class CLASS, return the class of reg to actually use.
8297    In general this is just CLASS, but for the Thumb core registers and
8298    immediate constants we prefer a LO_REGS class or a subset.  */
8299
8300 static reg_class_t
8301 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8302 {
8303   if (TARGET_32BIT)
8304     return rclass;
8305   else
8306     {
8307       if (rclass == GENERAL_REGS)
8308         return LO_REGS;
8309       else
8310         return rclass;
8311     }
8312 }
8313
8314 /* Build the SYMBOL_REF for __tls_get_addr.  */
8315
8316 static GTY(()) rtx tls_get_addr_libfunc;
8317
8318 static rtx
8319 get_tls_get_addr (void)
8320 {
8321   if (!tls_get_addr_libfunc)
8322     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8323   return tls_get_addr_libfunc;
8324 }
8325
8326 rtx
8327 arm_load_tp (rtx target)
8328 {
8329   if (!target)
8330     target = gen_reg_rtx (SImode);
8331
8332   if (TARGET_HARD_TP)
8333     {
8334       /* Can return in any reg.  */
8335       emit_insn (gen_load_tp_hard (target));
8336     }
8337   else
8338     {
8339       /* Always returned in r0.  Immediately copy the result into a pseudo,
8340          otherwise other uses of r0 (e.g. setting up function arguments) may
8341          clobber the value.  */
8342
8343       rtx tmp;
8344
8345       emit_insn (gen_load_tp_soft ());
8346
8347       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8348       emit_move_insn (target, tmp);
8349     }
8350   return target;
8351 }
8352
8353 static rtx
8354 load_tls_operand (rtx x, rtx reg)
8355 {
8356   rtx tmp;
8357
8358   if (reg == NULL_RTX)
8359     reg = gen_reg_rtx (SImode);
8360
8361   tmp = gen_rtx_CONST (SImode, x);
8362
8363   emit_move_insn (reg, tmp);
8364
8365   return reg;
8366 }
8367
8368 static rtx_insn *
8369 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8370 {
8371   rtx label, labelno, sum;
8372
8373   gcc_assert (reloc != TLS_DESCSEQ);
8374   start_sequence ();
8375
8376   labelno = GEN_INT (pic_labelno++);
8377   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8378   label = gen_rtx_CONST (VOIDmode, label);
8379
8380   sum = gen_rtx_UNSPEC (Pmode,
8381                         gen_rtvec (4, x, GEN_INT (reloc), label,
8382                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8383                         UNSPEC_TLS);
8384   reg = load_tls_operand (sum, reg);
8385
8386   if (TARGET_ARM)
8387     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8388   else
8389     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8390
8391   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8392                                      LCT_PURE, /* LCT_CONST?  */
8393                                      Pmode, reg, Pmode);
8394
8395   rtx_insn *insns = get_insns ();
8396   end_sequence ();
8397
8398   return insns;
8399 }
8400
8401 static rtx
8402 arm_tls_descseq_addr (rtx x, rtx reg)
8403 {
8404   rtx labelno = GEN_INT (pic_labelno++);
8405   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8406   rtx sum = gen_rtx_UNSPEC (Pmode,
8407                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8408                                        gen_rtx_CONST (VOIDmode, label),
8409                                        GEN_INT (!TARGET_ARM)),
8410                             UNSPEC_TLS);
8411   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8412
8413   emit_insn (gen_tlscall (x, labelno));
8414   if (!reg)
8415     reg = gen_reg_rtx (SImode);
8416   else
8417     gcc_assert (REGNO (reg) != R0_REGNUM);
8418
8419   emit_move_insn (reg, reg0);
8420
8421   return reg;
8422 }
8423
8424 rtx
8425 legitimize_tls_address (rtx x, rtx reg)
8426 {
8427   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8428   rtx_insn *insns;
8429   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8430
8431   switch (model)
8432     {
8433     case TLS_MODEL_GLOBAL_DYNAMIC:
8434       if (TARGET_GNU2_TLS)
8435         {
8436           reg = arm_tls_descseq_addr (x, reg);
8437
8438           tp = arm_load_tp (NULL_RTX);
8439
8440           dest = gen_rtx_PLUS (Pmode, tp, reg);
8441         }
8442       else
8443         {
8444           /* Original scheme */
8445           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8446           dest = gen_reg_rtx (Pmode);
8447           emit_libcall_block (insns, dest, ret, x);
8448         }
8449       return dest;
8450
8451     case TLS_MODEL_LOCAL_DYNAMIC:
8452       if (TARGET_GNU2_TLS)
8453         {
8454           reg = arm_tls_descseq_addr (x, reg);
8455
8456           tp = arm_load_tp (NULL_RTX);
8457
8458           dest = gen_rtx_PLUS (Pmode, tp, reg);
8459         }
8460       else
8461         {
8462           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8463
8464           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8465              share the LDM result with other LD model accesses.  */
8466           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8467                                 UNSPEC_TLS);
8468           dest = gen_reg_rtx (Pmode);
8469           emit_libcall_block (insns, dest, ret, eqv);
8470
8471           /* Load the addend.  */
8472           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8473                                                      GEN_INT (TLS_LDO32)),
8474                                    UNSPEC_TLS);
8475           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8476           dest = gen_rtx_PLUS (Pmode, dest, addend);
8477         }
8478       return dest;
8479
8480     case TLS_MODEL_INITIAL_EXEC:
8481       labelno = GEN_INT (pic_labelno++);
8482       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8483       label = gen_rtx_CONST (VOIDmode, label);
8484       sum = gen_rtx_UNSPEC (Pmode,
8485                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8486                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8487                             UNSPEC_TLS);
8488       reg = load_tls_operand (sum, reg);
8489
8490       if (TARGET_ARM)
8491         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8492       else if (TARGET_THUMB2)
8493         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8494       else
8495         {
8496           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8497           emit_move_insn (reg, gen_const_mem (SImode, reg));
8498         }
8499
8500       tp = arm_load_tp (NULL_RTX);
8501
8502       return gen_rtx_PLUS (Pmode, tp, reg);
8503
8504     case TLS_MODEL_LOCAL_EXEC:
8505       tp = arm_load_tp (NULL_RTX);
8506
8507       reg = gen_rtx_UNSPEC (Pmode,
8508                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8509                             UNSPEC_TLS);
8510       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8511
8512       return gen_rtx_PLUS (Pmode, tp, reg);
8513
8514     default:
8515       abort ();
8516     }
8517 }
8518
8519 /* Try machine-dependent ways of modifying an illegitimate address
8520    to be legitimate.  If we find one, return the new, valid address.  */
8521 rtx
8522 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8523 {
8524   if (arm_tls_referenced_p (x))
8525     {
8526       rtx addend = NULL;
8527
8528       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8529         {
8530           addend = XEXP (XEXP (x, 0), 1);
8531           x = XEXP (XEXP (x, 0), 0);
8532         }
8533
8534       if (GET_CODE (x) != SYMBOL_REF)
8535         return x;
8536
8537       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8538
8539       x = legitimize_tls_address (x, NULL_RTX);
8540
8541       if (addend)
8542         {
8543           x = gen_rtx_PLUS (SImode, x, addend);
8544           orig_x = x;
8545         }
8546       else
8547         return x;
8548     }
8549
8550   if (!TARGET_ARM)
8551     {
8552       /* TODO: legitimize_address for Thumb2.  */
8553       if (TARGET_THUMB2)
8554         return x;
8555       return thumb_legitimize_address (x, orig_x, mode);
8556     }
8557
8558   if (GET_CODE (x) == PLUS)
8559     {
8560       rtx xop0 = XEXP (x, 0);
8561       rtx xop1 = XEXP (x, 1);
8562
8563       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8564         xop0 = force_reg (SImode, xop0);
8565
8566       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8567           && !symbol_mentioned_p (xop1))
8568         xop1 = force_reg (SImode, xop1);
8569
8570       if (ARM_BASE_REGISTER_RTX_P (xop0)
8571           && CONST_INT_P (xop1))
8572         {
8573           HOST_WIDE_INT n, low_n;
8574           rtx base_reg, val;
8575           n = INTVAL (xop1);
8576
8577           /* VFP addressing modes actually allow greater offsets, but for
8578              now we just stick with the lowest common denominator.  */
8579           if (mode == DImode || mode == DFmode)
8580             {
8581               low_n = n & 0x0f;
8582               n &= ~0x0f;
8583               if (low_n > 4)
8584                 {
8585                   n += 16;
8586                   low_n -= 16;
8587                 }
8588             }
8589           else
8590             {
8591               low_n = ((mode) == TImode ? 0
8592                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8593               n -= low_n;
8594             }
8595
8596           base_reg = gen_reg_rtx (SImode);
8597           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8598           emit_move_insn (base_reg, val);
8599           x = plus_constant (Pmode, base_reg, low_n);
8600         }
8601       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8602         x = gen_rtx_PLUS (SImode, xop0, xop1);
8603     }
8604
8605   /* XXX We don't allow MINUS any more -- see comment in
8606      arm_legitimate_address_outer_p ().  */
8607   else if (GET_CODE (x) == MINUS)
8608     {
8609       rtx xop0 = XEXP (x, 0);
8610       rtx xop1 = XEXP (x, 1);
8611
8612       if (CONSTANT_P (xop0))
8613         xop0 = force_reg (SImode, xop0);
8614
8615       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8616         xop1 = force_reg (SImode, xop1);
8617
8618       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619         x = gen_rtx_MINUS (SImode, xop0, xop1);
8620     }
8621
8622   /* Make sure to take full advantage of the pre-indexed addressing mode
8623      with absolute addresses which often allows for the base register to
8624      be factorized for multiple adjacent memory references, and it might
8625      even allows for the mini pool to be avoided entirely. */
8626   else if (CONST_INT_P (x) && optimize > 0)
8627     {
8628       unsigned int bits;
8629       HOST_WIDE_INT mask, base, index;
8630       rtx base_reg;
8631
8632       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8633          use a 8-bit index. So let's use a 12-bit index for SImode only and
8634          hope that arm_gen_constant will enable ldrb to use more bits. */
8635       bits = (mode == SImode) ? 12 : 8;
8636       mask = (1 << bits) - 1;
8637       base = INTVAL (x) & ~mask;
8638       index = INTVAL (x) & mask;
8639       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8640         {
8641           /* It'll most probably be more efficient to generate the base
8642              with more bits set and use a negative index instead. */
8643           base |= mask;
8644           index -= mask;
8645         }
8646       base_reg = force_reg (SImode, GEN_INT (base));
8647       x = plus_constant (Pmode, base_reg, index);
8648     }
8649
8650   if (flag_pic)
8651     {
8652       /* We need to find and carefully transform any SYMBOL and LABEL
8653          references; so go back to the original address expression.  */
8654       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8655
8656       if (new_x != orig_x)
8657         x = new_x;
8658     }
8659
8660   return x;
8661 }
8662
8663
8664 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8665    to be legitimate.  If we find one, return the new, valid address.  */
8666 rtx
8667 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669   if (GET_CODE (x) == PLUS
8670       && CONST_INT_P (XEXP (x, 1))
8671       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8672           || INTVAL (XEXP (x, 1)) < 0))
8673     {
8674       rtx xop0 = XEXP (x, 0);
8675       rtx xop1 = XEXP (x, 1);
8676       HOST_WIDE_INT offset = INTVAL (xop1);
8677
8678       /* Try and fold the offset into a biasing of the base register and
8679          then offsetting that.  Don't do this when optimizing for space
8680          since it can cause too many CSEs.  */
8681       if (optimize_size && offset >= 0
8682           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8683         {
8684           HOST_WIDE_INT delta;
8685
8686           if (offset >= 256)
8687             delta = offset - (256 - GET_MODE_SIZE (mode));
8688           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8689             delta = 31 * GET_MODE_SIZE (mode);
8690           else
8691             delta = offset & (~31 * GET_MODE_SIZE (mode));
8692
8693           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8694                                 NULL_RTX);
8695           x = plus_constant (Pmode, xop0, delta);
8696         }
8697       else if (offset < 0 && offset > -256)
8698         /* Small negative offsets are best done with a subtract before the
8699            dereference, forcing these into a register normally takes two
8700            instructions.  */
8701         x = force_operand (x, NULL_RTX);
8702       else
8703         {
8704           /* For the remaining cases, force the constant into a register.  */
8705           xop1 = force_reg (SImode, xop1);
8706           x = gen_rtx_PLUS (SImode, xop0, xop1);
8707         }
8708     }
8709   else if (GET_CODE (x) == PLUS
8710            && s_register_operand (XEXP (x, 1), SImode)
8711            && !s_register_operand (XEXP (x, 0), SImode))
8712     {
8713       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8714
8715       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8716     }
8717
8718   if (flag_pic)
8719     {
8720       /* We need to find and carefully transform any SYMBOL and LABEL
8721          references; so go back to the original address expression.  */
8722       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8723
8724       if (new_x != orig_x)
8725         x = new_x;
8726     }
8727
8728   return x;
8729 }
8730
8731 /* Return TRUE if X contains any TLS symbol references.  */
8732
8733 bool
8734 arm_tls_referenced_p (rtx x)
8735 {
8736   if (! TARGET_HAVE_TLS)
8737     return false;
8738
8739   subrtx_iterator::array_type array;
8740   FOR_EACH_SUBRTX (iter, array, x, ALL)
8741     {
8742       const_rtx x = *iter;
8743       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8744         {
8745           /* ARM currently does not provide relocations to encode TLS variables
8746              into AArch32 instructions, only data, so there is no way to
8747              currently implement these if a literal pool is disabled.  */
8748           if (arm_disable_literal_pool)
8749             sorry ("accessing thread-local storage is not currently supported "
8750                    "with -mpure-code or -mslow-flash-data");
8751
8752           return true;
8753         }
8754
8755       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8756          TLS offsets, not real symbol references.  */
8757       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8758         iter.skip_subrtxes ();
8759     }
8760   return false;
8761 }
8762
8763 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8764
8765    On the ARM, allow any integer (invalid ones are removed later by insn
8766    patterns), nice doubles and symbol_refs which refer to the function's
8767    constant pool XXX.
8768
8769    When generating pic allow anything.  */
8770
8771 static bool
8772 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8773 {
8774   return flag_pic || !label_mentioned_p (x);
8775 }
8776
8777 static bool
8778 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8779 {
8780   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8781      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8782      for ARMv8-M Baseline or later the result is valid.  */
8783   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8784     x = XEXP (x, 0);
8785
8786   return (CONST_INT_P (x)
8787           || CONST_DOUBLE_P (x)
8788           || CONSTANT_ADDRESS_P (x)
8789           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8790           || flag_pic);
8791 }
8792
8793 static bool
8794 arm_legitimate_constant_p (machine_mode mode, rtx x)
8795 {
8796   return (!arm_cannot_force_const_mem (mode, x)
8797           && (TARGET_32BIT
8798               ? arm_legitimate_constant_p_1 (mode, x)
8799               : thumb_legitimate_constant_p (mode, x)));
8800 }
8801
8802 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8803
8804 static bool
8805 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8806 {
8807   rtx base, offset;
8808
8809   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8810     {
8811       split_const (x, &base, &offset);
8812       if (GET_CODE (base) == SYMBOL_REF
8813           && !offset_within_block_p (base, INTVAL (offset)))
8814         return true;
8815     }
8816   return arm_tls_referenced_p (x);
8817 }
8818 \f
8819 #define REG_OR_SUBREG_REG(X)                                            \
8820   (REG_P (X)                                                    \
8821    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8822
8823 #define REG_OR_SUBREG_RTX(X)                    \
8824    (REG_P (X) ? (X) : SUBREG_REG (X))
8825
8826 static inline int
8827 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8828 {
8829   machine_mode mode = GET_MODE (x);
8830   int total, words;
8831
8832   switch (code)
8833     {
8834     case ASHIFT:
8835     case ASHIFTRT:
8836     case LSHIFTRT:
8837     case ROTATERT:
8838       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8839
8840     case PLUS:
8841     case MINUS:
8842     case COMPARE:
8843     case NEG:
8844     case NOT:
8845       return COSTS_N_INSNS (1);
8846
8847     case MULT:
8848       if (arm_arch6m && arm_m_profile_small_mul)
8849         return COSTS_N_INSNS (32);
8850
8851       if (CONST_INT_P (XEXP (x, 1)))
8852         {
8853           int cycles = 0;
8854           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8855
8856           while (i)
8857             {
8858               i >>= 2;
8859               cycles++;
8860             }
8861           return COSTS_N_INSNS (2) + cycles;
8862         }
8863       return COSTS_N_INSNS (1) + 16;
8864
8865     case SET:
8866       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8867          the mode.  */
8868       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8869       return (COSTS_N_INSNS (words)
8870               + 4 * ((MEM_P (SET_SRC (x)))
8871                      + MEM_P (SET_DEST (x))));
8872
8873     case CONST_INT:
8874       if (outer == SET)
8875         {
8876           if (UINTVAL (x) < 256
8877               /* 16-bit constant.  */
8878               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8879             return 0;
8880           if (thumb_shiftable_const (INTVAL (x)))
8881             return COSTS_N_INSNS (2);
8882           return COSTS_N_INSNS (3);
8883         }
8884       else if ((outer == PLUS || outer == COMPARE)
8885                && INTVAL (x) < 256 && INTVAL (x) > -256)
8886         return 0;
8887       else if ((outer == IOR || outer == XOR || outer == AND)
8888                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8889         return COSTS_N_INSNS (1);
8890       else if (outer == AND)
8891         {
8892           int i;
8893           /* This duplicates the tests in the andsi3 expander.  */
8894           for (i = 9; i <= 31; i++)
8895             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8896                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8897               return COSTS_N_INSNS (2);
8898         }
8899       else if (outer == ASHIFT || outer == ASHIFTRT
8900                || outer == LSHIFTRT)
8901         return 0;
8902       return COSTS_N_INSNS (2);
8903
8904     case CONST:
8905     case CONST_DOUBLE:
8906     case LABEL_REF:
8907     case SYMBOL_REF:
8908       return COSTS_N_INSNS (3);
8909
8910     case UDIV:
8911     case UMOD:
8912     case DIV:
8913     case MOD:
8914       return 100;
8915
8916     case TRUNCATE:
8917       return 99;
8918
8919     case AND:
8920     case XOR:
8921     case IOR:
8922       /* XXX guess.  */
8923       return 8;
8924
8925     case MEM:
8926       /* XXX another guess.  */
8927       /* Memory costs quite a lot for the first word, but subsequent words
8928          load at the equivalent of a single insn each.  */
8929       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8930               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8931                  ? 4 : 0));
8932
8933     case IF_THEN_ELSE:
8934       /* XXX a guess.  */
8935       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8936         return 14;
8937       return 2;
8938
8939     case SIGN_EXTEND:
8940     case ZERO_EXTEND:
8941       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8942       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8943
8944       if (mode == SImode)
8945         return total;
8946
8947       if (arm_arch6)
8948         return total + COSTS_N_INSNS (1);
8949
8950       /* Assume a two-shift sequence.  Increase the cost slightly so
8951          we prefer actual shifts over an extend operation.  */
8952       return total + 1 + COSTS_N_INSNS (2);
8953
8954     default:
8955       return 99;
8956     }
8957 }
8958
8959 /* Estimates the size cost of thumb1 instructions.
8960    For now most of the code is copied from thumb1_rtx_costs. We need more
8961    fine grain tuning when we have more related test cases.  */
8962 static inline int
8963 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8964 {
8965   machine_mode mode = GET_MODE (x);
8966   int words, cost;
8967
8968   switch (code)
8969     {
8970     case ASHIFT:
8971     case ASHIFTRT:
8972     case LSHIFTRT:
8973     case ROTATERT:
8974       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8975
8976     case PLUS:
8977     case MINUS:
8978       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8979          defined by RTL expansion, especially for the expansion of
8980          multiplication.  */
8981       if ((GET_CODE (XEXP (x, 0)) == MULT
8982            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8983           || (GET_CODE (XEXP (x, 1)) == MULT
8984               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8985         return COSTS_N_INSNS (2);
8986       /* Fall through.  */
8987     case COMPARE:
8988     case NEG:
8989     case NOT:
8990       return COSTS_N_INSNS (1);
8991
8992     case MULT:
8993       if (CONST_INT_P (XEXP (x, 1)))
8994         {
8995           /* Thumb1 mul instruction can't operate on const. We must Load it
8996              into a register first.  */
8997           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8998           /* For the targets which have a very small and high-latency multiply
8999              unit, we prefer to synthesize the mult with up to 5 instructions,
9000              giving a good balance between size and performance.  */
9001           if (arm_arch6m && arm_m_profile_small_mul)
9002             return COSTS_N_INSNS (5);
9003           else
9004             return COSTS_N_INSNS (1) + const_size;
9005         }
9006       return COSTS_N_INSNS (1);
9007
9008     case SET:
9009       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9010          the mode.  */
9011       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9012       cost = COSTS_N_INSNS (words);
9013       if (satisfies_constraint_J (SET_SRC (x))
9014           || satisfies_constraint_K (SET_SRC (x))
9015              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9016           || (CONST_INT_P (SET_SRC (x))
9017               && UINTVAL (SET_SRC (x)) >= 256
9018               && TARGET_HAVE_MOVT
9019               && satisfies_constraint_j (SET_SRC (x)))
9020              /* thumb1_movdi_insn.  */
9021           || ((words > 1) && MEM_P (SET_SRC (x))))
9022         cost += COSTS_N_INSNS (1);
9023       return cost;
9024
9025     case CONST_INT:
9026       if (outer == SET)
9027         {
9028           if (UINTVAL (x) < 256)
9029             return COSTS_N_INSNS (1);
9030           /* movw is 4byte long.  */
9031           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9032             return COSTS_N_INSNS (2);
9033           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9034           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9035             return COSTS_N_INSNS (2);
9036           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9037           if (thumb_shiftable_const (INTVAL (x)))
9038             return COSTS_N_INSNS (2);
9039           return COSTS_N_INSNS (3);
9040         }
9041       else if ((outer == PLUS || outer == COMPARE)
9042                && INTVAL (x) < 256 && INTVAL (x) > -256)
9043         return 0;
9044       else if ((outer == IOR || outer == XOR || outer == AND)
9045                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9046         return COSTS_N_INSNS (1);
9047       else if (outer == AND)
9048         {
9049           int i;
9050           /* This duplicates the tests in the andsi3 expander.  */
9051           for (i = 9; i <= 31; i++)
9052             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9053                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9054               return COSTS_N_INSNS (2);
9055         }
9056       else if (outer == ASHIFT || outer == ASHIFTRT
9057                || outer == LSHIFTRT)
9058         return 0;
9059       return COSTS_N_INSNS (2);
9060
9061     case CONST:
9062     case CONST_DOUBLE:
9063     case LABEL_REF:
9064     case SYMBOL_REF:
9065       return COSTS_N_INSNS (3);
9066
9067     case UDIV:
9068     case UMOD:
9069     case DIV:
9070     case MOD:
9071       return 100;
9072
9073     case TRUNCATE:
9074       return 99;
9075
9076     case AND:
9077     case XOR:
9078     case IOR:
9079       return COSTS_N_INSNS (1);
9080
9081     case MEM:
9082       return (COSTS_N_INSNS (1)
9083               + COSTS_N_INSNS (1)
9084                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9085               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9086                  ? COSTS_N_INSNS (1) : 0));
9087
9088     case IF_THEN_ELSE:
9089       /* XXX a guess.  */
9090       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9091         return 14;
9092       return 2;
9093
9094     case ZERO_EXTEND:
9095       /* XXX still guessing.  */
9096       switch (GET_MODE (XEXP (x, 0)))
9097         {
9098           case E_QImode:
9099             return (1 + (mode == DImode ? 4 : 0)
9100                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9101
9102           case E_HImode:
9103             return (4 + (mode == DImode ? 4 : 0)
9104                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106           case E_SImode:
9107             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9108
9109           default:
9110             return 99;
9111         }
9112
9113     default:
9114       return 99;
9115     }
9116 }
9117
9118 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9119    operand, then return the operand that is being shifted.  If the shift
9120    is not by a constant, then set SHIFT_REG to point to the operand.
9121    Return NULL if OP is not a shifter operand.  */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9124 {
9125   enum rtx_code code = GET_CODE (op);
9126
9127   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129     return XEXP (op, 0);
9130   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131     return XEXP (op, 0);
9132   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133            || code == ASHIFTRT)
9134     {
9135       if (!CONST_INT_P (XEXP (op, 1)))
9136         *shift_reg = XEXP (op, 1);
9137       return XEXP (op, 0);
9138     }
9139
9140   return NULL;
9141 }
9142
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9145 {
9146   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147   rtx_code code = GET_CODE (x);
9148   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9149
9150   switch (XINT (x, 1))
9151     {
9152     case UNSPEC_UNALIGNED_LOAD:
9153       /* We can only do unaligned loads into the integer unit, and we can't
9154          use LDM or LDRD.  */
9155       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9156       if (speed_p)
9157         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9158                   + extra_cost->ldst.load_unaligned);
9159
9160 #ifdef NOT_YET
9161       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9162                                  ADDR_SPACE_GENERIC, speed_p);
9163 #endif
9164       return true;
9165
9166     case UNSPEC_UNALIGNED_STORE:
9167       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9168       if (speed_p)
9169         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9170                   + extra_cost->ldst.store_unaligned);
9171
9172       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9173 #ifdef NOT_YET
9174       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9175                                  ADDR_SPACE_GENERIC, speed_p);
9176 #endif
9177       return true;
9178
9179     case UNSPEC_VRINTZ:
9180     case UNSPEC_VRINTP:
9181     case UNSPEC_VRINTM:
9182     case UNSPEC_VRINTR:
9183     case UNSPEC_VRINTX:
9184     case UNSPEC_VRINTA:
9185       if (speed_p)
9186         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9187
9188       return true;
9189     default:
9190       *cost = COSTS_N_INSNS (2);
9191       break;
9192     }
9193   return true;
9194 }
9195
9196 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9197    call (one insn for -Os) and then one for processing the result.  */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9199
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9201         do                                                              \
9202           {                                                             \
9203             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9204             if (shift_op != NULL                                        \
9205                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9206               {                                                         \
9207                 if (shift_reg)                                          \
9208                   {                                                     \
9209                     if (speed_p)                                        \
9210                       *cost += extra_cost->alu.arith_shift_reg;         \
9211                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9212                                        ASHIFT, 1, speed_p);             \
9213                   }                                                     \
9214                 else if (speed_p)                                       \
9215                   *cost += extra_cost->alu.arith_shift;                 \
9216                                                                         \
9217                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9218                                     ASHIFT, 0, speed_p)                 \
9219                           + rtx_cost (XEXP (x, 1 - IDX),                \
9220                                       GET_MODE (shift_op),              \
9221                                       OP, 1, speed_p));                 \
9222                 return true;                                            \
9223               }                                                         \
9224           }                                                             \
9225         while (0)
9226
9227 /* RTX costs.  Make an estimate of the cost of executing the operation
9228    X, which is contained with an operation with code OUTER_CODE.
9229    SPEED_P indicates whether the cost desired is the performance cost,
9230    or the size cost.  The estimate is stored in COST and the return
9231    value is TRUE if the cost calculation is final, or FALSE if the
9232    caller should recurse through the operands of X to add additional
9233    costs.
9234
9235    We currently make no attempt to model the size savings of Thumb-2
9236    16-bit instructions.  At the normal points in compilation where
9237    this code is called we have no measure of whether the condition
9238    flags are live or not, and thus no realistic way to determine what
9239    the size will eventually be.  */
9240 static bool
9241 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9242                    const struct cpu_cost_table *extra_cost,
9243                    int *cost, bool speed_p)
9244 {
9245   machine_mode mode = GET_MODE (x);
9246
9247   *cost = COSTS_N_INSNS (1);
9248
9249   if (TARGET_THUMB1)
9250     {
9251       if (speed_p)
9252         *cost = thumb1_rtx_costs (x, code, outer_code);
9253       else
9254         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9255       return true;
9256     }
9257
9258   switch (code)
9259     {
9260     case SET:
9261       *cost = 0;
9262       /* SET RTXs don't have a mode so we get it from the destination.  */
9263       mode = GET_MODE (SET_DEST (x));
9264
9265       if (REG_P (SET_SRC (x))
9266           && REG_P (SET_DEST (x)))
9267         {
9268           /* Assume that most copies can be done with a single insn,
9269              unless we don't have HW FP, in which case everything
9270              larger than word mode will require two insns.  */
9271           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9272                                    && GET_MODE_SIZE (mode) > 4)
9273                                   || mode == DImode)
9274                                  ? 2 : 1);
9275           /* Conditional register moves can be encoded
9276              in 16 bits in Thumb mode.  */
9277           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9278             *cost >>= 1;
9279
9280           return true;
9281         }
9282
9283       if (CONST_INT_P (SET_SRC (x)))
9284         {
9285           /* Handle CONST_INT here, since the value doesn't have a mode
9286              and we would otherwise be unable to work out the true cost.  */
9287           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9288                             0, speed_p);
9289           outer_code = SET;
9290           /* Slightly lower the cost of setting a core reg to a constant.
9291              This helps break up chains and allows for better scheduling.  */
9292           if (REG_P (SET_DEST (x))
9293               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9294             *cost -= 1;
9295           x = SET_SRC (x);
9296           /* Immediate moves with an immediate in the range [0, 255] can be
9297              encoded in 16 bits in Thumb mode.  */
9298           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9299               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9300             *cost >>= 1;
9301           goto const_int_cost;
9302         }
9303
9304       return false;
9305
9306     case MEM:
9307       /* A memory access costs 1 insn if the mode is small, or the address is
9308          a single register, otherwise it costs one insn per word.  */
9309       if (REG_P (XEXP (x, 0)))
9310         *cost = COSTS_N_INSNS (1);
9311       else if (flag_pic
9312                && GET_CODE (XEXP (x, 0)) == PLUS
9313                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9314         /* This will be split into two instructions.
9315            See arm.md:calculate_pic_address.  */
9316         *cost = COSTS_N_INSNS (2);
9317       else
9318         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9319
9320       /* For speed optimizations, add the costs of the address and
9321          accessing memory.  */
9322       if (speed_p)
9323 #ifdef NOT_YET
9324         *cost += (extra_cost->ldst.load
9325                   + arm_address_cost (XEXP (x, 0), mode,
9326                                       ADDR_SPACE_GENERIC, speed_p));
9327 #else
9328         *cost += extra_cost->ldst.load;
9329 #endif
9330       return true;
9331
9332     case PARALLEL:
9333     {
9334    /* Calculations of LDM costs are complex.  We assume an initial cost
9335    (ldm_1st) which will load the number of registers mentioned in
9336    ldm_regs_per_insn_1st registers; then each additional
9337    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9338    formula for N regs is thus:
9339
9340    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9341                              + ldm_regs_per_insn_subsequent - 1)
9342                             / ldm_regs_per_insn_subsequent).
9343
9344    Additional costs may also be added for addressing.  A similar
9345    formula is used for STM.  */
9346
9347       bool is_ldm = load_multiple_operation (x, SImode);
9348       bool is_stm = store_multiple_operation (x, SImode);
9349
9350       if (is_ldm || is_stm)
9351         {
9352           if (speed_p)
9353             {
9354               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9355               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9356                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9357                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9358               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9359                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9360                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9361
9362               *cost += regs_per_insn_1st
9363                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9364                                             + regs_per_insn_sub - 1)
9365                                           / regs_per_insn_sub);
9366               return true;
9367             }
9368
9369         }
9370       return false;
9371     }
9372     case DIV:
9373     case UDIV:
9374       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9375           && (mode == SFmode || !TARGET_VFP_SINGLE))
9376         *cost += COSTS_N_INSNS (speed_p
9377                                ? extra_cost->fp[mode != SFmode].div : 0);
9378       else if (mode == SImode && TARGET_IDIV)
9379         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9380       else
9381         *cost = LIBCALL_COST (2);
9382
9383       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9384          possible udiv is prefered.  */
9385       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9386       return false;     /* All arguments must be in registers.  */
9387
9388     case MOD:
9389       /* MOD by a power of 2 can be expanded as:
9390          rsbs    r1, r0, #0
9391          and     r0, r0, #(n - 1)
9392          and     r1, r1, #(n - 1)
9393          rsbpl   r0, r1, #0.  */
9394       if (CONST_INT_P (XEXP (x, 1))
9395           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9396           && mode == SImode)
9397         {
9398           *cost += COSTS_N_INSNS (3);
9399
9400           if (speed_p)
9401             *cost += 2 * extra_cost->alu.logical
9402                      + extra_cost->alu.arith;
9403           return true;
9404         }
9405
9406     /* Fall-through.  */
9407     case UMOD:
9408       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9409          possible udiv is prefered.  */
9410       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9411       return false;     /* All arguments must be in registers.  */
9412
9413     case ROTATE:
9414       if (mode == SImode && REG_P (XEXP (x, 1)))
9415         {
9416           *cost += (COSTS_N_INSNS (1)
9417                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9418           if (speed_p)
9419             *cost += extra_cost->alu.shift_reg;
9420           return true;
9421         }
9422       /* Fall through */
9423     case ROTATERT:
9424     case ASHIFT:
9425     case LSHIFTRT:
9426     case ASHIFTRT:
9427       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9428         {
9429           *cost += (COSTS_N_INSNS (2)
9430                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9431           if (speed_p)
9432             *cost += 2 * extra_cost->alu.shift;
9433           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9434           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9435             *cost += 1;
9436           return true;
9437         }
9438       else if (mode == SImode)
9439         {
9440           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9441           /* Slightly disparage register shifts at -Os, but not by much.  */
9442           if (!CONST_INT_P (XEXP (x, 1)))
9443             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9444                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9445           return true;
9446         }
9447       else if (GET_MODE_CLASS (mode) == MODE_INT
9448                && GET_MODE_SIZE (mode) < 4)
9449         {
9450           if (code == ASHIFT)
9451             {
9452               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9453               /* Slightly disparage register shifts at -Os, but not by
9454                  much.  */
9455               if (!CONST_INT_P (XEXP (x, 1)))
9456                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9457                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9458             }
9459           else if (code == LSHIFTRT || code == ASHIFTRT)
9460             {
9461               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9462                 {
9463                   /* Can use SBFX/UBFX.  */
9464                   if (speed_p)
9465                     *cost += extra_cost->alu.bfx;
9466                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9467                 }
9468               else
9469                 {
9470                   *cost += COSTS_N_INSNS (1);
9471                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9472                   if (speed_p)
9473                     {
9474                       if (CONST_INT_P (XEXP (x, 1)))
9475                         *cost += 2 * extra_cost->alu.shift;
9476                       else
9477                         *cost += (extra_cost->alu.shift
9478                                   + extra_cost->alu.shift_reg);
9479                     }
9480                   else
9481                     /* Slightly disparage register shifts.  */
9482                     *cost += !CONST_INT_P (XEXP (x, 1));
9483                 }
9484             }
9485           else /* Rotates.  */
9486             {
9487               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9488               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9489               if (speed_p)
9490                 {
9491                   if (CONST_INT_P (XEXP (x, 1)))
9492                     *cost += (2 * extra_cost->alu.shift
9493                               + extra_cost->alu.log_shift);
9494                   else
9495                     *cost += (extra_cost->alu.shift
9496                               + extra_cost->alu.shift_reg
9497                               + extra_cost->alu.log_shift_reg);
9498                 }
9499             }
9500           return true;
9501         }
9502
9503       *cost = LIBCALL_COST (2);
9504       return false;
9505
9506     case BSWAP:
9507       if (arm_arch6)
9508         {
9509           if (mode == SImode)
9510             {
9511               if (speed_p)
9512                 *cost += extra_cost->alu.rev;
9513
9514               return false;
9515             }
9516         }
9517       else
9518         {
9519         /* No rev instruction available.  Look at arm_legacy_rev
9520            and thumb_legacy_rev for the form of RTL used then.  */
9521           if (TARGET_THUMB)
9522             {
9523               *cost += COSTS_N_INSNS (9);
9524
9525               if (speed_p)
9526                 {
9527                   *cost += 6 * extra_cost->alu.shift;
9528                   *cost += 3 * extra_cost->alu.logical;
9529                 }
9530             }
9531           else
9532             {
9533               *cost += COSTS_N_INSNS (4);
9534
9535               if (speed_p)
9536                 {
9537                   *cost += 2 * extra_cost->alu.shift;
9538                   *cost += extra_cost->alu.arith_shift;
9539                   *cost += 2 * extra_cost->alu.logical;
9540                 }
9541             }
9542           return true;
9543         }
9544       return false;
9545
9546     case MINUS:
9547       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9548           && (mode == SFmode || !TARGET_VFP_SINGLE))
9549         {
9550           if (GET_CODE (XEXP (x, 0)) == MULT
9551               || GET_CODE (XEXP (x, 1)) == MULT)
9552             {
9553               rtx mul_op0, mul_op1, sub_op;
9554
9555               if (speed_p)
9556                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9557
9558               if (GET_CODE (XEXP (x, 0)) == MULT)
9559                 {
9560                   mul_op0 = XEXP (XEXP (x, 0), 0);
9561                   mul_op1 = XEXP (XEXP (x, 0), 1);
9562                   sub_op = XEXP (x, 1);
9563                 }
9564               else
9565                 {
9566                   mul_op0 = XEXP (XEXP (x, 1), 0);
9567                   mul_op1 = XEXP (XEXP (x, 1), 1);
9568                   sub_op = XEXP (x, 0);
9569                 }
9570
9571               /* The first operand of the multiply may be optionally
9572                  negated.  */
9573               if (GET_CODE (mul_op0) == NEG)
9574                 mul_op0 = XEXP (mul_op0, 0);
9575
9576               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9577                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9578                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9579
9580               return true;
9581             }
9582
9583           if (speed_p)
9584             *cost += extra_cost->fp[mode != SFmode].addsub;
9585           return false;
9586         }
9587
9588       if (mode == SImode)
9589         {
9590           rtx shift_by_reg = NULL;
9591           rtx shift_op;
9592           rtx non_shift_op;
9593
9594           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9595           if (shift_op == NULL)
9596             {
9597               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9598               non_shift_op = XEXP (x, 0);
9599             }
9600           else
9601             non_shift_op = XEXP (x, 1);
9602
9603           if (shift_op != NULL)
9604             {
9605               if (shift_by_reg != NULL)
9606                 {
9607                   if (speed_p)
9608                     *cost += extra_cost->alu.arith_shift_reg;
9609                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9610                 }
9611               else if (speed_p)
9612                 *cost += extra_cost->alu.arith_shift;
9613
9614               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9615               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9616               return true;
9617             }
9618
9619           if (arm_arch_thumb2
9620               && GET_CODE (XEXP (x, 1)) == MULT)
9621             {
9622               /* MLS.  */
9623               if (speed_p)
9624                 *cost += extra_cost->mult[0].add;
9625               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9626               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9627               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9628               return true;
9629             }
9630
9631           if (CONST_INT_P (XEXP (x, 0)))
9632             {
9633               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9634                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9635                                             NULL_RTX, 1, 0);
9636               *cost = COSTS_N_INSNS (insns);
9637               if (speed_p)
9638                 *cost += insns * extra_cost->alu.arith;
9639               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9640               return true;
9641             }
9642           else if (speed_p)
9643             *cost += extra_cost->alu.arith;
9644
9645           return false;
9646         }
9647
9648       if (GET_MODE_CLASS (mode) == MODE_INT
9649           && GET_MODE_SIZE (mode) < 4)
9650         {
9651           rtx shift_op, shift_reg;
9652           shift_reg = NULL;
9653
9654           /* We check both sides of the MINUS for shifter operands since,
9655              unlike PLUS, it's not commutative.  */
9656
9657           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9658           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9659
9660           /* Slightly disparage, as we might need to widen the result.  */
9661           *cost += 1;
9662           if (speed_p)
9663             *cost += extra_cost->alu.arith;
9664
9665           if (CONST_INT_P (XEXP (x, 0)))
9666             {
9667               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9668               return true;
9669             }
9670
9671           return false;
9672         }
9673
9674       if (mode == DImode)
9675         {
9676           *cost += COSTS_N_INSNS (1);
9677
9678           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9679             {
9680               rtx op1 = XEXP (x, 1);
9681
9682               if (speed_p)
9683                 *cost += 2 * extra_cost->alu.arith;
9684
9685               if (GET_CODE (op1) == ZERO_EXTEND)
9686                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9687                                    0, speed_p);
9688               else
9689                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9690               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9691                                  0, speed_p);
9692               return true;
9693             }
9694           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9695             {
9696               if (speed_p)
9697                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9698               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9699                                   0, speed_p)
9700                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9701               return true;
9702             }
9703           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9704                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9705             {
9706               if (speed_p)
9707                 *cost += (extra_cost->alu.arith
9708                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9709                              ? extra_cost->alu.arith
9710                              : extra_cost->alu.arith_shift));
9711               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9712                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9713                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9714               return true;
9715             }
9716
9717           if (speed_p)
9718             *cost += 2 * extra_cost->alu.arith;
9719           return false;
9720         }
9721
9722       /* Vector mode?  */
9723
9724       *cost = LIBCALL_COST (2);
9725       return false;
9726
9727     case PLUS:
9728       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9729           && (mode == SFmode || !TARGET_VFP_SINGLE))
9730         {
9731           if (GET_CODE (XEXP (x, 0)) == MULT)
9732             {
9733               rtx mul_op0, mul_op1, add_op;
9734
9735               if (speed_p)
9736                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9737
9738               mul_op0 = XEXP (XEXP (x, 0), 0);
9739               mul_op1 = XEXP (XEXP (x, 0), 1);
9740               add_op = XEXP (x, 1);
9741
9742               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9743                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9744                         + rtx_cost (add_op, mode, code, 0, speed_p));
9745
9746               return true;
9747             }
9748
9749           if (speed_p)
9750             *cost += extra_cost->fp[mode != SFmode].addsub;
9751           return false;
9752         }
9753       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9754         {
9755           *cost = LIBCALL_COST (2);
9756           return false;
9757         }
9758
9759         /* Narrow modes can be synthesized in SImode, but the range
9760            of useful sub-operations is limited.  Check for shift operations
9761            on one of the operands.  Only left shifts can be used in the
9762            narrow modes.  */
9763       if (GET_MODE_CLASS (mode) == MODE_INT
9764           && GET_MODE_SIZE (mode) < 4)
9765         {
9766           rtx shift_op, shift_reg;
9767           shift_reg = NULL;
9768
9769           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9770
9771           if (CONST_INT_P (XEXP (x, 1)))
9772             {
9773               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9774                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9775                                             NULL_RTX, 1, 0);
9776               *cost = COSTS_N_INSNS (insns);
9777               if (speed_p)
9778                 *cost += insns * extra_cost->alu.arith;
9779               /* Slightly penalize a narrow operation as the result may
9780                  need widening.  */
9781               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9782               return true;
9783             }
9784
9785           /* Slightly penalize a narrow operation as the result may
9786              need widening.  */
9787           *cost += 1;
9788           if (speed_p)
9789             *cost += extra_cost->alu.arith;
9790
9791           return false;
9792         }
9793
9794       if (mode == SImode)
9795         {
9796           rtx shift_op, shift_reg;
9797
9798           if (TARGET_INT_SIMD
9799               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9800                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9801             {
9802               /* UXTA[BH] or SXTA[BH].  */
9803               if (speed_p)
9804                 *cost += extra_cost->alu.extend_arith;
9805               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9806                                   0, speed_p)
9807                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9808               return true;
9809             }
9810
9811           shift_reg = NULL;
9812           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9813           if (shift_op != NULL)
9814             {
9815               if (shift_reg)
9816                 {
9817                   if (speed_p)
9818                     *cost += extra_cost->alu.arith_shift_reg;
9819                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9820                 }
9821               else if (speed_p)
9822                 *cost += extra_cost->alu.arith_shift;
9823
9824               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9825                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9826               return true;
9827             }
9828           if (GET_CODE (XEXP (x, 0)) == MULT)
9829             {
9830               rtx mul_op = XEXP (x, 0);
9831
9832               if (TARGET_DSP_MULTIPLY
9833                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9834                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9835                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9836                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9837                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9838                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9839                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9840                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9841                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9842                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9843                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9844                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9845                                       == 16))))))
9846                 {
9847                   /* SMLA[BT][BT].  */
9848                   if (speed_p)
9849                     *cost += extra_cost->mult[0].extend_add;
9850                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9851                                       SIGN_EXTEND, 0, speed_p)
9852                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9853                                         SIGN_EXTEND, 0, speed_p)
9854                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9855                   return true;
9856                 }
9857
9858               if (speed_p)
9859                 *cost += extra_cost->mult[0].add;
9860               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9861                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9862                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9863               return true;
9864             }
9865           if (CONST_INT_P (XEXP (x, 1)))
9866             {
9867               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9868                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9869                                             NULL_RTX, 1, 0);
9870               *cost = COSTS_N_INSNS (insns);
9871               if (speed_p)
9872                 *cost += insns * extra_cost->alu.arith;
9873               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9874               return true;
9875             }
9876           else if (speed_p)
9877             *cost += extra_cost->alu.arith;
9878
9879           return false;
9880         }
9881
9882       if (mode == DImode)
9883         {
9884           if (arm_arch3m
9885               && GET_CODE (XEXP (x, 0)) == MULT
9886               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9887                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9888                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9889                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9890             {
9891               if (speed_p)
9892                 *cost += extra_cost->mult[1].extend_add;
9893               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9894                                   ZERO_EXTEND, 0, speed_p)
9895                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9896                                     ZERO_EXTEND, 0, speed_p)
9897                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9898               return true;
9899             }
9900
9901           *cost += COSTS_N_INSNS (1);
9902
9903           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9904               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9905             {
9906               if (speed_p)
9907                 *cost += (extra_cost->alu.arith
9908                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9909                              ? extra_cost->alu.arith
9910                              : extra_cost->alu.arith_shift));
9911
9912               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9913                                   0, speed_p)
9914                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9915               return true;
9916             }
9917
9918           if (speed_p)
9919             *cost += 2 * extra_cost->alu.arith;
9920           return false;
9921         }
9922
9923       /* Vector mode?  */
9924       *cost = LIBCALL_COST (2);
9925       return false;
9926     case IOR:
9927       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9928         {
9929           if (speed_p)
9930             *cost += extra_cost->alu.rev;
9931
9932           return true;
9933         }
9934     /* Fall through.  */
9935     case AND: case XOR:
9936       if (mode == SImode)
9937         {
9938           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9939           rtx op0 = XEXP (x, 0);
9940           rtx shift_op, shift_reg;
9941
9942           if (subcode == NOT
9943               && (code == AND
9944                   || (code == IOR && TARGET_THUMB2)))
9945             op0 = XEXP (op0, 0);
9946
9947           shift_reg = NULL;
9948           shift_op = shifter_op_p (op0, &shift_reg);
9949           if (shift_op != NULL)
9950             {
9951               if (shift_reg)
9952                 {
9953                   if (speed_p)
9954                     *cost += extra_cost->alu.log_shift_reg;
9955                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9956                 }
9957               else if (speed_p)
9958                 *cost += extra_cost->alu.log_shift;
9959
9960               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9961                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9962               return true;
9963             }
9964
9965           if (CONST_INT_P (XEXP (x, 1)))
9966             {
9967               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9968                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9969                                             NULL_RTX, 1, 0);
9970
9971               *cost = COSTS_N_INSNS (insns);
9972               if (speed_p)
9973                 *cost += insns * extra_cost->alu.logical;
9974               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9975               return true;
9976             }
9977
9978           if (speed_p)
9979             *cost += extra_cost->alu.logical;
9980           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9981                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9982           return true;
9983         }
9984
9985       if (mode == DImode)
9986         {
9987           rtx op0 = XEXP (x, 0);
9988           enum rtx_code subcode = GET_CODE (op0);
9989
9990           *cost += COSTS_N_INSNS (1);
9991
9992           if (subcode == NOT
9993               && (code == AND
9994                   || (code == IOR && TARGET_THUMB2)))
9995             op0 = XEXP (op0, 0);
9996
9997           if (GET_CODE (op0) == ZERO_EXTEND)
9998             {
9999               if (speed_p)
10000                 *cost += 2 * extra_cost->alu.logical;
10001
10002               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10003                                   0, speed_p)
10004                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10005               return true;
10006             }
10007           else if (GET_CODE (op0) == SIGN_EXTEND)
10008             {
10009               if (speed_p)
10010                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10011
10012               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10013                                   0, speed_p)
10014                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10015               return true;
10016             }
10017
10018           if (speed_p)
10019             *cost += 2 * extra_cost->alu.logical;
10020
10021           return true;
10022         }
10023       /* Vector mode?  */
10024
10025       *cost = LIBCALL_COST (2);
10026       return false;
10027
10028     case MULT:
10029       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10030           && (mode == SFmode || !TARGET_VFP_SINGLE))
10031         {
10032           rtx op0 = XEXP (x, 0);
10033
10034           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10035             op0 = XEXP (op0, 0);
10036
10037           if (speed_p)
10038             *cost += extra_cost->fp[mode != SFmode].mult;
10039
10040           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10041                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10042           return true;
10043         }
10044       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10045         {
10046           *cost = LIBCALL_COST (2);
10047           return false;
10048         }
10049
10050       if (mode == SImode)
10051         {
10052           if (TARGET_DSP_MULTIPLY
10053               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10054                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10055                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10056                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10057                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10058                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10059                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10060                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10061                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10062                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10063                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10064                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10065                                   == 16))))))
10066             {
10067               /* SMUL[TB][TB].  */
10068               if (speed_p)
10069                 *cost += extra_cost->mult[0].extend;
10070               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10071                                  SIGN_EXTEND, 0, speed_p);
10072               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10073                                  SIGN_EXTEND, 1, speed_p);
10074               return true;
10075             }
10076           if (speed_p)
10077             *cost += extra_cost->mult[0].simple;
10078           return false;
10079         }
10080
10081       if (mode == DImode)
10082         {
10083           if (arm_arch3m
10084               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10085                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10086                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10087                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10088             {
10089               if (speed_p)
10090                 *cost += extra_cost->mult[1].extend;
10091               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10092                                   ZERO_EXTEND, 0, speed_p)
10093                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10094                                     ZERO_EXTEND, 0, speed_p));
10095               return true;
10096             }
10097
10098           *cost = LIBCALL_COST (2);
10099           return false;
10100         }
10101
10102       /* Vector mode?  */
10103       *cost = LIBCALL_COST (2);
10104       return false;
10105
10106     case NEG:
10107       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10108           && (mode == SFmode || !TARGET_VFP_SINGLE))
10109         {
10110           if (GET_CODE (XEXP (x, 0)) == MULT)
10111             {
10112               /* VNMUL.  */
10113               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10114               return true;
10115             }
10116
10117           if (speed_p)
10118             *cost += extra_cost->fp[mode != SFmode].neg;
10119
10120           return false;
10121         }
10122       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10123         {
10124           *cost = LIBCALL_COST (1);
10125           return false;
10126         }
10127
10128       if (mode == SImode)
10129         {
10130           if (GET_CODE (XEXP (x, 0)) == ABS)
10131             {
10132               *cost += COSTS_N_INSNS (1);
10133               /* Assume the non-flag-changing variant.  */
10134               if (speed_p)
10135                 *cost += (extra_cost->alu.log_shift
10136                           + extra_cost->alu.arith_shift);
10137               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10138               return true;
10139             }
10140
10141           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10142               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10143             {
10144               *cost += COSTS_N_INSNS (1);
10145               /* No extra cost for MOV imm and MVN imm.  */
10146               /* If the comparison op is using the flags, there's no further
10147                  cost, otherwise we need to add the cost of the comparison.  */
10148               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10149                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10150                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10151                 {
10152                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10153                   *cost += (COSTS_N_INSNS (1)
10154                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10155                                         0, speed_p)
10156                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10157                                         1, speed_p));
10158                   if (speed_p)
10159                     *cost += extra_cost->alu.arith;
10160                 }
10161               return true;
10162             }
10163
10164           if (speed_p)
10165             *cost += extra_cost->alu.arith;
10166           return false;
10167         }
10168
10169       if (GET_MODE_CLASS (mode) == MODE_INT
10170           && GET_MODE_SIZE (mode) < 4)
10171         {
10172           /* Slightly disparage, as we might need an extend operation.  */
10173           *cost += 1;
10174           if (speed_p)
10175             *cost += extra_cost->alu.arith;
10176           return false;
10177         }
10178
10179       if (mode == DImode)
10180         {
10181           *cost += COSTS_N_INSNS (1);
10182           if (speed_p)
10183             *cost += 2 * extra_cost->alu.arith;
10184           return false;
10185         }
10186
10187       /* Vector mode?  */
10188       *cost = LIBCALL_COST (1);
10189       return false;
10190
10191     case NOT:
10192       if (mode == SImode)
10193         {
10194           rtx shift_op;
10195           rtx shift_reg = NULL;
10196
10197           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10198
10199           if (shift_op)
10200             {
10201               if (shift_reg != NULL)
10202                 {
10203                   if (speed_p)
10204                     *cost += extra_cost->alu.log_shift_reg;
10205                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10206                 }
10207               else if (speed_p)
10208                 *cost += extra_cost->alu.log_shift;
10209               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10210               return true;
10211             }
10212
10213           if (speed_p)
10214             *cost += extra_cost->alu.logical;
10215           return false;
10216         }
10217       if (mode == DImode)
10218         {
10219           *cost += COSTS_N_INSNS (1);
10220           return false;
10221         }
10222
10223       /* Vector mode?  */
10224
10225       *cost += LIBCALL_COST (1);
10226       return false;
10227
10228     case IF_THEN_ELSE:
10229       {
10230         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10231           {
10232             *cost += COSTS_N_INSNS (3);
10233             return true;
10234           }
10235         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10236         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10237
10238         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10239         /* Assume that if one arm of the if_then_else is a register,
10240            that it will be tied with the result and eliminate the
10241            conditional insn.  */
10242         if (REG_P (XEXP (x, 1)))
10243           *cost += op2cost;
10244         else if (REG_P (XEXP (x, 2)))
10245           *cost += op1cost;
10246         else
10247           {
10248             if (speed_p)
10249               {
10250                 if (extra_cost->alu.non_exec_costs_exec)
10251                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10252                 else
10253                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10254               }
10255             else
10256               *cost += op1cost + op2cost;
10257           }
10258       }
10259       return true;
10260
10261     case COMPARE:
10262       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10263         *cost = 0;
10264       else
10265         {
10266           machine_mode op0mode;
10267           /* We'll mostly assume that the cost of a compare is the cost of the
10268              LHS.  However, there are some notable exceptions.  */
10269
10270           /* Floating point compares are never done as side-effects.  */
10271           op0mode = GET_MODE (XEXP (x, 0));
10272           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10273               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10274             {
10275               if (speed_p)
10276                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10277
10278               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10279                 {
10280                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10281                   return true;
10282                 }
10283
10284               return false;
10285             }
10286           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10287             {
10288               *cost = LIBCALL_COST (2);
10289               return false;
10290             }
10291
10292           /* DImode compares normally take two insns.  */
10293           if (op0mode == DImode)
10294             {
10295               *cost += COSTS_N_INSNS (1);
10296               if (speed_p)
10297                 *cost += 2 * extra_cost->alu.arith;
10298               return false;
10299             }
10300
10301           if (op0mode == SImode)
10302             {
10303               rtx shift_op;
10304               rtx shift_reg;
10305
10306               if (XEXP (x, 1) == const0_rtx
10307                   && !(REG_P (XEXP (x, 0))
10308                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10309                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10310                 {
10311                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10312
10313                   /* Multiply operations that set the flags are often
10314                      significantly more expensive.  */
10315                   if (speed_p
10316                       && GET_CODE (XEXP (x, 0)) == MULT
10317                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10318                     *cost += extra_cost->mult[0].flag_setting;
10319
10320                   if (speed_p
10321                       && GET_CODE (XEXP (x, 0)) == PLUS
10322                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10323                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10324                                                             0), 1), mode))
10325                     *cost += extra_cost->mult[0].flag_setting;
10326                   return true;
10327                 }
10328
10329               shift_reg = NULL;
10330               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10331               if (shift_op != NULL)
10332                 {
10333                   if (shift_reg != NULL)
10334                     {
10335                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10336                                          1, speed_p);
10337                       if (speed_p)
10338                         *cost += extra_cost->alu.arith_shift_reg;
10339                     }
10340                   else if (speed_p)
10341                     *cost += extra_cost->alu.arith_shift;
10342                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10343                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10344                   return true;
10345                 }
10346
10347               if (speed_p)
10348                 *cost += extra_cost->alu.arith;
10349               if (CONST_INT_P (XEXP (x, 1))
10350                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10351                 {
10352                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10353                   return true;
10354                 }
10355               return false;
10356             }
10357
10358           /* Vector mode?  */
10359
10360           *cost = LIBCALL_COST (2);
10361           return false;
10362         }
10363       return true;
10364
10365     case EQ:
10366     case NE:
10367     case LT:
10368     case LE:
10369     case GT:
10370     case GE:
10371     case LTU:
10372     case LEU:
10373     case GEU:
10374     case GTU:
10375     case ORDERED:
10376     case UNORDERED:
10377     case UNEQ:
10378     case UNLE:
10379     case UNLT:
10380     case UNGE:
10381     case UNGT:
10382     case LTGT:
10383       if (outer_code == SET)
10384         {
10385           /* Is it a store-flag operation?  */
10386           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10387               && XEXP (x, 1) == const0_rtx)
10388             {
10389               /* Thumb also needs an IT insn.  */
10390               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10391               return true;
10392             }
10393           if (XEXP (x, 1) == const0_rtx)
10394             {
10395               switch (code)
10396                 {
10397                 case LT:
10398                   /* LSR Rd, Rn, #31.  */
10399                   if (speed_p)
10400                     *cost += extra_cost->alu.shift;
10401                   break;
10402
10403                 case EQ:
10404                   /* RSBS T1, Rn, #0
10405                      ADC  Rd, Rn, T1.  */
10406
10407                 case NE:
10408                   /* SUBS T1, Rn, #1
10409                      SBC  Rd, Rn, T1.  */
10410                   *cost += COSTS_N_INSNS (1);
10411                   break;
10412
10413                 case LE:
10414                   /* RSBS T1, Rn, Rn, LSR #31
10415                      ADC  Rd, Rn, T1. */
10416                   *cost += COSTS_N_INSNS (1);
10417                   if (speed_p)
10418                     *cost += extra_cost->alu.arith_shift;
10419                   break;
10420
10421                 case GT:
10422                   /* RSB  Rd, Rn, Rn, ASR #1
10423                      LSR  Rd, Rd, #31.  */
10424                   *cost += COSTS_N_INSNS (1);
10425                   if (speed_p)
10426                     *cost += (extra_cost->alu.arith_shift
10427                               + extra_cost->alu.shift);
10428                   break;
10429
10430                 case GE:
10431                   /* ASR  Rd, Rn, #31
10432                      ADD  Rd, Rn, #1.  */
10433                   *cost += COSTS_N_INSNS (1);
10434                   if (speed_p)
10435                     *cost += extra_cost->alu.shift;
10436                   break;
10437
10438                 default:
10439                   /* Remaining cases are either meaningless or would take
10440                      three insns anyway.  */
10441                   *cost = COSTS_N_INSNS (3);
10442                   break;
10443                 }
10444               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10445               return true;
10446             }
10447           else
10448             {
10449               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10450               if (CONST_INT_P (XEXP (x, 1))
10451                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10452                 {
10453                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10454                   return true;
10455                 }
10456
10457               return false;
10458             }
10459         }
10460       /* Not directly inside a set.  If it involves the condition code
10461          register it must be the condition for a branch, cond_exec or
10462          I_T_E operation.  Since the comparison is performed elsewhere
10463          this is just the control part which has no additional
10464          cost.  */
10465       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10466                && XEXP (x, 1) == const0_rtx)
10467         {
10468           *cost = 0;
10469           return true;
10470         }
10471       return false;
10472
10473     case ABS:
10474       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10475           && (mode == SFmode || !TARGET_VFP_SINGLE))
10476         {
10477           if (speed_p)
10478             *cost += extra_cost->fp[mode != SFmode].neg;
10479
10480           return false;
10481         }
10482       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10483         {
10484           *cost = LIBCALL_COST (1);
10485           return false;
10486         }
10487
10488       if (mode == SImode)
10489         {
10490           if (speed_p)
10491             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10492           return false;
10493         }
10494       /* Vector mode?  */
10495       *cost = LIBCALL_COST (1);
10496       return false;
10497
10498     case SIGN_EXTEND:
10499       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10500           && MEM_P (XEXP (x, 0)))
10501         {
10502           if (mode == DImode)
10503             *cost += COSTS_N_INSNS (1);
10504
10505           if (!speed_p)
10506             return true;
10507
10508           if (GET_MODE (XEXP (x, 0)) == SImode)
10509             *cost += extra_cost->ldst.load;
10510           else
10511             *cost += extra_cost->ldst.load_sign_extend;
10512
10513           if (mode == DImode)
10514             *cost += extra_cost->alu.shift;
10515
10516           return true;
10517         }
10518
10519       /* Widening from less than 32-bits requires an extend operation.  */
10520       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10521         {
10522           /* We have SXTB/SXTH.  */
10523           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10524           if (speed_p)
10525             *cost += extra_cost->alu.extend;
10526         }
10527       else if (GET_MODE (XEXP (x, 0)) != SImode)
10528         {
10529           /* Needs two shifts.  */
10530           *cost += COSTS_N_INSNS (1);
10531           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10532           if (speed_p)
10533             *cost += 2 * extra_cost->alu.shift;
10534         }
10535
10536       /* Widening beyond 32-bits requires one more insn.  */
10537       if (mode == DImode)
10538         {
10539           *cost += COSTS_N_INSNS (1);
10540           if (speed_p)
10541             *cost += extra_cost->alu.shift;
10542         }
10543
10544       return true;
10545
10546     case ZERO_EXTEND:
10547       if ((arm_arch4
10548            || GET_MODE (XEXP (x, 0)) == SImode
10549            || GET_MODE (XEXP (x, 0)) == QImode)
10550           && MEM_P (XEXP (x, 0)))
10551         {
10552           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10553
10554           if (mode == DImode)
10555             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10556
10557           return true;
10558         }
10559
10560       /* Widening from less than 32-bits requires an extend operation.  */
10561       if (GET_MODE (XEXP (x, 0)) == QImode)
10562         {
10563           /* UXTB can be a shorter instruction in Thumb2, but it might
10564              be slower than the AND Rd, Rn, #255 alternative.  When
10565              optimizing for speed it should never be slower to use
10566              AND, and we don't really model 16-bit vs 32-bit insns
10567              here.  */
10568           if (speed_p)
10569             *cost += extra_cost->alu.logical;
10570         }
10571       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10572         {
10573           /* We have UXTB/UXTH.  */
10574           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10575           if (speed_p)
10576             *cost += extra_cost->alu.extend;
10577         }
10578       else if (GET_MODE (XEXP (x, 0)) != SImode)
10579         {
10580           /* Needs two shifts.  It's marginally preferable to use
10581              shifts rather than two BIC instructions as the second
10582              shift may merge with a subsequent insn as a shifter
10583              op.  */
10584           *cost = COSTS_N_INSNS (2);
10585           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10586           if (speed_p)
10587             *cost += 2 * extra_cost->alu.shift;
10588         }
10589
10590       /* Widening beyond 32-bits requires one more insn.  */
10591       if (mode == DImode)
10592         {
10593           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10594         }
10595
10596       return true;
10597
10598     case CONST_INT:
10599       *cost = 0;
10600       /* CONST_INT has no mode, so we cannot tell for sure how many
10601          insns are really going to be needed.  The best we can do is
10602          look at the value passed.  If it fits in SImode, then assume
10603          that's the mode it will be used for.  Otherwise assume it
10604          will be used in DImode.  */
10605       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10606         mode = SImode;
10607       else
10608         mode = DImode;
10609
10610       /* Avoid blowing up in arm_gen_constant ().  */
10611       if (!(outer_code == PLUS
10612             || outer_code == AND
10613             || outer_code == IOR
10614             || outer_code == XOR
10615             || outer_code == MINUS))
10616         outer_code = SET;
10617
10618     const_int_cost:
10619       if (mode == SImode)
10620         {
10621           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10622                                                     INTVAL (x), NULL, NULL,
10623                                                     0, 0));
10624           /* Extra costs?  */
10625         }
10626       else
10627         {
10628           *cost += COSTS_N_INSNS (arm_gen_constant
10629                                   (outer_code, SImode, NULL,
10630                                    trunc_int_for_mode (INTVAL (x), SImode),
10631                                    NULL, NULL, 0, 0)
10632                                   + arm_gen_constant (outer_code, SImode, NULL,
10633                                                       INTVAL (x) >> 32, NULL,
10634                                                       NULL, 0, 0));
10635           /* Extra costs?  */
10636         }
10637
10638       return true;
10639
10640     case CONST:
10641     case LABEL_REF:
10642     case SYMBOL_REF:
10643       if (speed_p)
10644         {
10645           if (arm_arch_thumb2 && !flag_pic)
10646             *cost += COSTS_N_INSNS (1);
10647           else
10648             *cost += extra_cost->ldst.load;
10649         }
10650       else
10651         *cost += COSTS_N_INSNS (1);
10652
10653       if (flag_pic)
10654         {
10655           *cost += COSTS_N_INSNS (1);
10656           if (speed_p)
10657             *cost += extra_cost->alu.arith;
10658         }
10659
10660       return true;
10661
10662     case CONST_FIXED:
10663       *cost = COSTS_N_INSNS (4);
10664       /* Fixme.  */
10665       return true;
10666
10667     case CONST_DOUBLE:
10668       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10669           && (mode == SFmode || !TARGET_VFP_SINGLE))
10670         {
10671           if (vfp3_const_double_rtx (x))
10672             {
10673               if (speed_p)
10674                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10675               return true;
10676             }
10677
10678           if (speed_p)
10679             {
10680               if (mode == DFmode)
10681                 *cost += extra_cost->ldst.loadd;
10682               else
10683                 *cost += extra_cost->ldst.loadf;
10684             }
10685           else
10686             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10687
10688           return true;
10689         }
10690       *cost = COSTS_N_INSNS (4);
10691       return true;
10692
10693     case CONST_VECTOR:
10694       /* Fixme.  */
10695       if (TARGET_NEON
10696           && TARGET_HARD_FLOAT
10697           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10698           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10699         *cost = COSTS_N_INSNS (1);
10700       else
10701         *cost = COSTS_N_INSNS (4);
10702       return true;
10703
10704     case HIGH:
10705     case LO_SUM:
10706       /* When optimizing for size, we prefer constant pool entries to
10707          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10708       if (!speed_p)
10709         *cost += 1;
10710       return true;
10711
10712     case CLZ:
10713       if (speed_p)
10714         *cost += extra_cost->alu.clz;
10715       return false;
10716
10717     case SMIN:
10718       if (XEXP (x, 1) == const0_rtx)
10719         {
10720           if (speed_p)
10721             *cost += extra_cost->alu.log_shift;
10722           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10723           return true;
10724         }
10725       /* Fall through.  */
10726     case SMAX:
10727     case UMIN:
10728     case UMAX:
10729       *cost += COSTS_N_INSNS (1);
10730       return false;
10731
10732     case TRUNCATE:
10733       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10734           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10735           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10736           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10737           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10738                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10739               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10740                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10741                       == ZERO_EXTEND))))
10742         {
10743           if (speed_p)
10744             *cost += extra_cost->mult[1].extend;
10745           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10746                               ZERO_EXTEND, 0, speed_p)
10747                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10748                                 ZERO_EXTEND, 0, speed_p));
10749           return true;
10750         }
10751       *cost = LIBCALL_COST (1);
10752       return false;
10753
10754     case UNSPEC_VOLATILE:
10755     case UNSPEC:
10756       return arm_unspec_cost (x, outer_code, speed_p, cost);
10757
10758     case PC:
10759       /* Reading the PC is like reading any other register.  Writing it
10760          is more expensive, but we take that into account elsewhere.  */
10761       *cost = 0;
10762       return true;
10763
10764     case ZERO_EXTRACT:
10765       /* TODO: Simple zero_extract of bottom bits using AND.  */
10766       /* Fall through.  */
10767     case SIGN_EXTRACT:
10768       if (arm_arch6
10769           && mode == SImode
10770           && CONST_INT_P (XEXP (x, 1))
10771           && CONST_INT_P (XEXP (x, 2)))
10772         {
10773           if (speed_p)
10774             *cost += extra_cost->alu.bfx;
10775           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10776           return true;
10777         }
10778       /* Without UBFX/SBFX, need to resort to shift operations.  */
10779       *cost += COSTS_N_INSNS (1);
10780       if (speed_p)
10781         *cost += 2 * extra_cost->alu.shift;
10782       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10783       return true;
10784
10785     case FLOAT_EXTEND:
10786       if (TARGET_HARD_FLOAT)
10787         {
10788           if (speed_p)
10789             *cost += extra_cost->fp[mode == DFmode].widen;
10790           if (!TARGET_VFP5
10791               && GET_MODE (XEXP (x, 0)) == HFmode)
10792             {
10793               /* Pre v8, widening HF->DF is a two-step process, first
10794                  widening to SFmode.  */
10795               *cost += COSTS_N_INSNS (1);
10796               if (speed_p)
10797                 *cost += extra_cost->fp[0].widen;
10798             }
10799           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10800           return true;
10801         }
10802
10803       *cost = LIBCALL_COST (1);
10804       return false;
10805
10806     case FLOAT_TRUNCATE:
10807       if (TARGET_HARD_FLOAT)
10808         {
10809           if (speed_p)
10810             *cost += extra_cost->fp[mode == DFmode].narrow;
10811           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10812           return true;
10813           /* Vector modes?  */
10814         }
10815       *cost = LIBCALL_COST (1);
10816       return false;
10817
10818     case FMA:
10819       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10820         {
10821           rtx op0 = XEXP (x, 0);
10822           rtx op1 = XEXP (x, 1);
10823           rtx op2 = XEXP (x, 2);
10824
10825
10826           /* vfms or vfnma.  */
10827           if (GET_CODE (op0) == NEG)
10828             op0 = XEXP (op0, 0);
10829
10830           /* vfnms or vfnma.  */
10831           if (GET_CODE (op2) == NEG)
10832             op2 = XEXP (op2, 0);
10833
10834           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10835           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10836           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10837
10838           if (speed_p)
10839             *cost += extra_cost->fp[mode ==DFmode].fma;
10840
10841           return true;
10842         }
10843
10844       *cost = LIBCALL_COST (3);
10845       return false;
10846
10847     case FIX:
10848     case UNSIGNED_FIX:
10849       if (TARGET_HARD_FLOAT)
10850         {
10851           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10852              a vcvt fixed-point conversion.  */
10853           if (code == FIX && mode == SImode
10854               && GET_CODE (XEXP (x, 0)) == FIX
10855               && GET_MODE (XEXP (x, 0)) == SFmode
10856               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10857               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10858                  > 0)
10859             {
10860               if (speed_p)
10861                 *cost += extra_cost->fp[0].toint;
10862
10863               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10864                                  code, 0, speed_p);
10865               return true;
10866             }
10867
10868           if (GET_MODE_CLASS (mode) == MODE_INT)
10869             {
10870               mode = GET_MODE (XEXP (x, 0));
10871               if (speed_p)
10872                 *cost += extra_cost->fp[mode == DFmode].toint;
10873               /* Strip of the 'cost' of rounding towards zero.  */
10874               if (GET_CODE (XEXP (x, 0)) == FIX)
10875                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10876                                    0, speed_p);
10877               else
10878                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10879               /* ??? Increase the cost to deal with transferring from
10880                  FP -> CORE registers?  */
10881               return true;
10882             }
10883           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10884                    && TARGET_VFP5)
10885             {
10886               if (speed_p)
10887                 *cost += extra_cost->fp[mode == DFmode].roundint;
10888               return false;
10889             }
10890           /* Vector costs? */
10891         }
10892       *cost = LIBCALL_COST (1);
10893       return false;
10894
10895     case FLOAT:
10896     case UNSIGNED_FLOAT:
10897       if (TARGET_HARD_FLOAT)
10898         {
10899           /* ??? Increase the cost to deal with transferring from CORE
10900              -> FP registers?  */
10901           if (speed_p)
10902             *cost += extra_cost->fp[mode == DFmode].fromint;
10903           return false;
10904         }
10905       *cost = LIBCALL_COST (1);
10906       return false;
10907
10908     case CALL:
10909       return true;
10910
10911     case ASM_OPERANDS:
10912       {
10913       /* Just a guess.  Guess number of instructions in the asm
10914          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10915          though (see PR60663).  */
10916         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10917         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10918
10919         *cost = COSTS_N_INSNS (asm_length + num_operands);
10920         return true;
10921       }
10922     default:
10923       if (mode != VOIDmode)
10924         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10925       else
10926         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10927       return false;
10928     }
10929 }
10930
10931 #undef HANDLE_NARROW_SHIFT_ARITH
10932
10933 /* RTX costs entry point.  */
10934
10935 static bool
10936 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10937                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10938 {
10939   bool result;
10940   int code = GET_CODE (x);
10941   gcc_assert (current_tune->insn_extra_cost);
10942
10943   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10944                                 (enum rtx_code) outer_code,
10945                                 current_tune->insn_extra_cost,
10946                                 total, speed);
10947
10948   if (dump_file && (dump_flags & TDF_DETAILS))
10949     {
10950       print_rtl_single (dump_file, x);
10951       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10952                *total, result ? "final" : "partial");
10953     }
10954   return result;
10955 }
10956
10957 /* All address computations that can be done are free, but rtx cost returns
10958    the same for practically all of them.  So we weight the different types
10959    of address here in the order (most pref first):
10960    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10961 static inline int
10962 arm_arm_address_cost (rtx x)
10963 {
10964   enum rtx_code c  = GET_CODE (x);
10965
10966   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10967     return 0;
10968   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10969     return 10;
10970
10971   if (c == PLUS)
10972     {
10973       if (CONST_INT_P (XEXP (x, 1)))
10974         return 2;
10975
10976       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10977         return 3;
10978
10979       return 4;
10980     }
10981
10982   return 6;
10983 }
10984
10985 static inline int
10986 arm_thumb_address_cost (rtx x)
10987 {
10988   enum rtx_code c  = GET_CODE (x);
10989
10990   if (c == REG)
10991     return 1;
10992   if (c == PLUS
10993       && REG_P (XEXP (x, 0))
10994       && CONST_INT_P (XEXP (x, 1)))
10995     return 1;
10996
10997   return 2;
10998 }
10999
11000 static int
11001 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11002                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11003 {
11004   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11005 }
11006
11007 /* Adjust cost hook for XScale.  */
11008 static bool
11009 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11010                           int * cost)
11011 {
11012   /* Some true dependencies can have a higher cost depending
11013      on precisely how certain input operands are used.  */
11014   if (dep_type == 0
11015       && recog_memoized (insn) >= 0
11016       && recog_memoized (dep) >= 0)
11017     {
11018       int shift_opnum = get_attr_shift (insn);
11019       enum attr_type attr_type = get_attr_type (dep);
11020
11021       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11022          operand for INSN.  If we have a shifted input operand and the
11023          instruction we depend on is another ALU instruction, then we may
11024          have to account for an additional stall.  */
11025       if (shift_opnum != 0
11026           && (attr_type == TYPE_ALU_SHIFT_IMM
11027               || attr_type == TYPE_ALUS_SHIFT_IMM
11028               || attr_type == TYPE_LOGIC_SHIFT_IMM
11029               || attr_type == TYPE_LOGICS_SHIFT_IMM
11030               || attr_type == TYPE_ALU_SHIFT_REG
11031               || attr_type == TYPE_ALUS_SHIFT_REG
11032               || attr_type == TYPE_LOGIC_SHIFT_REG
11033               || attr_type == TYPE_LOGICS_SHIFT_REG
11034               || attr_type == TYPE_MOV_SHIFT
11035               || attr_type == TYPE_MVN_SHIFT
11036               || attr_type == TYPE_MOV_SHIFT_REG
11037               || attr_type == TYPE_MVN_SHIFT_REG))
11038         {
11039           rtx shifted_operand;
11040           int opno;
11041
11042           /* Get the shifted operand.  */
11043           extract_insn (insn);
11044           shifted_operand = recog_data.operand[shift_opnum];
11045
11046           /* Iterate over all the operands in DEP.  If we write an operand
11047              that overlaps with SHIFTED_OPERAND, then we have increase the
11048              cost of this dependency.  */
11049           extract_insn (dep);
11050           preprocess_constraints (dep);
11051           for (opno = 0; opno < recog_data.n_operands; opno++)
11052             {
11053               /* We can ignore strict inputs.  */
11054               if (recog_data.operand_type[opno] == OP_IN)
11055                 continue;
11056
11057               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11058                                            shifted_operand))
11059                 {
11060                   *cost = 2;
11061                   return false;
11062                 }
11063             }
11064         }
11065     }
11066   return true;
11067 }
11068
11069 /* Adjust cost hook for Cortex A9.  */
11070 static bool
11071 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11072                              int * cost)
11073 {
11074   switch (dep_type)
11075     {
11076     case REG_DEP_ANTI:
11077       *cost = 0;
11078       return false;
11079
11080     case REG_DEP_TRUE:
11081     case REG_DEP_OUTPUT:
11082         if (recog_memoized (insn) >= 0
11083             && recog_memoized (dep) >= 0)
11084           {
11085             if (GET_CODE (PATTERN (insn)) == SET)
11086               {
11087                 if (GET_MODE_CLASS
11088                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11089                   || GET_MODE_CLASS
11090                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11091                   {
11092                     enum attr_type attr_type_insn = get_attr_type (insn);
11093                     enum attr_type attr_type_dep = get_attr_type (dep);
11094
11095                     /* By default all dependencies of the form
11096                        s0 = s0 <op> s1
11097                        s0 = s0 <op> s2
11098                        have an extra latency of 1 cycle because
11099                        of the input and output dependency in this
11100                        case. However this gets modeled as an true
11101                        dependency and hence all these checks.  */
11102                     if (REG_P (SET_DEST (PATTERN (insn)))
11103                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11104                       {
11105                         /* FMACS is a special case where the dependent
11106                            instruction can be issued 3 cycles before
11107                            the normal latency in case of an output
11108                            dependency.  */
11109                         if ((attr_type_insn == TYPE_FMACS
11110                              || attr_type_insn == TYPE_FMACD)
11111                             && (attr_type_dep == TYPE_FMACS
11112                                 || attr_type_dep == TYPE_FMACD))
11113                           {
11114                             if (dep_type == REG_DEP_OUTPUT)
11115                               *cost = insn_default_latency (dep) - 3;
11116                             else
11117                               *cost = insn_default_latency (dep);
11118                             return false;
11119                           }
11120                         else
11121                           {
11122                             if (dep_type == REG_DEP_OUTPUT)
11123                               *cost = insn_default_latency (dep) + 1;
11124                             else
11125                               *cost = insn_default_latency (dep);
11126                           }
11127                         return false;
11128                       }
11129                   }
11130               }
11131           }
11132         break;
11133
11134     default:
11135       gcc_unreachable ();
11136     }
11137
11138   return true;
11139 }
11140
11141 /* Adjust cost hook for FA726TE.  */
11142 static bool
11143 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11144                            int * cost)
11145 {
11146   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11147      have penalty of 3.  */
11148   if (dep_type == REG_DEP_TRUE
11149       && recog_memoized (insn) >= 0
11150       && recog_memoized (dep) >= 0
11151       && get_attr_conds (dep) == CONDS_SET)
11152     {
11153       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11154       if (get_attr_conds (insn) == CONDS_USE
11155           && get_attr_type (insn) != TYPE_BRANCH)
11156         {
11157           *cost = 3;
11158           return false;
11159         }
11160
11161       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11162           || get_attr_conds (insn) == CONDS_USE)
11163         {
11164           *cost = 0;
11165           return false;
11166         }
11167     }
11168
11169   return true;
11170 }
11171
11172 /* Implement TARGET_REGISTER_MOVE_COST.
11173
11174    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11175    it is typically more expensive than a single memory access.  We set
11176    the cost to less than two memory accesses so that floating
11177    point to integer conversion does not go through memory.  */
11178
11179 int
11180 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11181                         reg_class_t from, reg_class_t to)
11182 {
11183   if (TARGET_32BIT)
11184     {
11185       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11186           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11187         return 15;
11188       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11189                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11190         return 4;
11191       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11192         return 20;
11193       else
11194         return 2;
11195     }
11196   else
11197     {
11198       if (from == HI_REGS || to == HI_REGS)
11199         return 4;
11200       else
11201         return 2;
11202     }
11203 }
11204
11205 /* Implement TARGET_MEMORY_MOVE_COST.  */
11206
11207 int
11208 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11209                       bool in ATTRIBUTE_UNUSED)
11210 {
11211   if (TARGET_32BIT)
11212     return 10;
11213   else
11214     {
11215       if (GET_MODE_SIZE (mode) < 4)
11216         return 8;
11217       else
11218         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11219     }
11220 }
11221
11222 /* Vectorizer cost model implementation.  */
11223
11224 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11225 static int
11226 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11227                                 tree vectype,
11228                                 int misalign ATTRIBUTE_UNUSED)
11229 {
11230   unsigned elements;
11231
11232   switch (type_of_cost)
11233     {
11234       case scalar_stmt:
11235         return current_tune->vec_costs->scalar_stmt_cost;
11236
11237       case scalar_load:
11238         return current_tune->vec_costs->scalar_load_cost;
11239
11240       case scalar_store:
11241         return current_tune->vec_costs->scalar_store_cost;
11242
11243       case vector_stmt:
11244         return current_tune->vec_costs->vec_stmt_cost;
11245
11246       case vector_load:
11247         return current_tune->vec_costs->vec_align_load_cost;
11248
11249       case vector_store:
11250         return current_tune->vec_costs->vec_store_cost;
11251
11252       case vec_to_scalar:
11253         return current_tune->vec_costs->vec_to_scalar_cost;
11254
11255       case scalar_to_vec:
11256         return current_tune->vec_costs->scalar_to_vec_cost;
11257
11258       case unaligned_load:
11259       case vector_gather_load:
11260         return current_tune->vec_costs->vec_unalign_load_cost;
11261
11262       case unaligned_store:
11263       case vector_scatter_store:
11264         return current_tune->vec_costs->vec_unalign_store_cost;
11265
11266       case cond_branch_taken:
11267         return current_tune->vec_costs->cond_taken_branch_cost;
11268
11269       case cond_branch_not_taken:
11270         return current_tune->vec_costs->cond_not_taken_branch_cost;
11271
11272       case vec_perm:
11273       case vec_promote_demote:
11274         return current_tune->vec_costs->vec_stmt_cost;
11275
11276       case vec_construct:
11277         elements = TYPE_VECTOR_SUBPARTS (vectype);
11278         return elements / 2 + 1;
11279
11280       default:
11281         gcc_unreachable ();
11282     }
11283 }
11284
11285 /* Implement targetm.vectorize.add_stmt_cost.  */
11286
11287 static unsigned
11288 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11289                    struct _stmt_vec_info *stmt_info, int misalign,
11290                    enum vect_cost_model_location where)
11291 {
11292   unsigned *cost = (unsigned *) data;
11293   unsigned retval = 0;
11294
11295   if (flag_vect_cost_model)
11296     {
11297       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11298       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11299
11300       /* Statements in an inner loop relative to the loop being
11301          vectorized are weighted more heavily.  The value here is
11302          arbitrary and could potentially be improved with analysis.  */
11303       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11304         count *= 50;  /* FIXME.  */
11305
11306       retval = (unsigned) (count * stmt_cost);
11307       cost[where] += retval;
11308     }
11309
11310   return retval;
11311 }
11312
11313 /* Return true if and only if this insn can dual-issue only as older.  */
11314 static bool
11315 cortexa7_older_only (rtx_insn *insn)
11316 {
11317   if (recog_memoized (insn) < 0)
11318     return false;
11319
11320   switch (get_attr_type (insn))
11321     {
11322     case TYPE_ALU_DSP_REG:
11323     case TYPE_ALU_SREG:
11324     case TYPE_ALUS_SREG:
11325     case TYPE_LOGIC_REG:
11326     case TYPE_LOGICS_REG:
11327     case TYPE_ADC_REG:
11328     case TYPE_ADCS_REG:
11329     case TYPE_ADR:
11330     case TYPE_BFM:
11331     case TYPE_REV:
11332     case TYPE_MVN_REG:
11333     case TYPE_SHIFT_IMM:
11334     case TYPE_SHIFT_REG:
11335     case TYPE_LOAD_BYTE:
11336     case TYPE_LOAD_4:
11337     case TYPE_STORE_4:
11338     case TYPE_FFARITHS:
11339     case TYPE_FADDS:
11340     case TYPE_FFARITHD:
11341     case TYPE_FADDD:
11342     case TYPE_FMOV:
11343     case TYPE_F_CVT:
11344     case TYPE_FCMPS:
11345     case TYPE_FCMPD:
11346     case TYPE_FCONSTS:
11347     case TYPE_FCONSTD:
11348     case TYPE_FMULS:
11349     case TYPE_FMACS:
11350     case TYPE_FMULD:
11351     case TYPE_FMACD:
11352     case TYPE_FDIVS:
11353     case TYPE_FDIVD:
11354     case TYPE_F_MRC:
11355     case TYPE_F_MRRC:
11356     case TYPE_F_FLAG:
11357     case TYPE_F_LOADS:
11358     case TYPE_F_STORES:
11359       return true;
11360     default:
11361       return false;
11362     }
11363 }
11364
11365 /* Return true if and only if this insn can dual-issue as younger.  */
11366 static bool
11367 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11368 {
11369   if (recog_memoized (insn) < 0)
11370     {
11371       if (verbose > 5)
11372         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11373       return false;
11374     }
11375
11376   switch (get_attr_type (insn))
11377     {
11378     case TYPE_ALU_IMM:
11379     case TYPE_ALUS_IMM:
11380     case TYPE_LOGIC_IMM:
11381     case TYPE_LOGICS_IMM:
11382     case TYPE_EXTEND:
11383     case TYPE_MVN_IMM:
11384     case TYPE_MOV_IMM:
11385     case TYPE_MOV_REG:
11386     case TYPE_MOV_SHIFT:
11387     case TYPE_MOV_SHIFT_REG:
11388     case TYPE_BRANCH:
11389     case TYPE_CALL:
11390       return true;
11391     default:
11392       return false;
11393     }
11394 }
11395
11396
11397 /* Look for an instruction that can dual issue only as an older
11398    instruction, and move it in front of any instructions that can
11399    dual-issue as younger, while preserving the relative order of all
11400    other instructions in the ready list.  This is a hueuristic to help
11401    dual-issue in later cycles, by postponing issue of more flexible
11402    instructions.  This heuristic may affect dual issue opportunities
11403    in the current cycle.  */
11404 static void
11405 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11406                         int *n_readyp, int clock)
11407 {
11408   int i;
11409   int first_older_only = -1, first_younger = -1;
11410
11411   if (verbose > 5)
11412     fprintf (file,
11413              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11414              clock,
11415              *n_readyp);
11416
11417   /* Traverse the ready list from the head (the instruction to issue
11418      first), and looking for the first instruction that can issue as
11419      younger and the first instruction that can dual-issue only as
11420      older.  */
11421   for (i = *n_readyp - 1; i >= 0; i--)
11422     {
11423       rtx_insn *insn = ready[i];
11424       if (cortexa7_older_only (insn))
11425         {
11426           first_older_only = i;
11427           if (verbose > 5)
11428             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11429           break;
11430         }
11431       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11432         first_younger = i;
11433     }
11434
11435   /* Nothing to reorder because either no younger insn found or insn
11436      that can dual-issue only as older appears before any insn that
11437      can dual-issue as younger.  */
11438   if (first_younger == -1)
11439     {
11440       if (verbose > 5)
11441         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11442       return;
11443     }
11444
11445   /* Nothing to reorder because no older-only insn in the ready list.  */
11446   if (first_older_only == -1)
11447     {
11448       if (verbose > 5)
11449         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11450       return;
11451     }
11452
11453   /* Move first_older_only insn before first_younger.  */
11454   if (verbose > 5)
11455     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11456              INSN_UID(ready [first_older_only]),
11457              INSN_UID(ready [first_younger]));
11458   rtx_insn *first_older_only_insn = ready [first_older_only];
11459   for (i = first_older_only; i < first_younger; i++)
11460     {
11461       ready[i] = ready[i+1];
11462     }
11463
11464   ready[i] = first_older_only_insn;
11465   return;
11466 }
11467
11468 /* Implement TARGET_SCHED_REORDER. */
11469 static int
11470 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11471                    int clock)
11472 {
11473   switch (arm_tune)
11474     {
11475     case TARGET_CPU_cortexa7:
11476       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11477       break;
11478     default:
11479       /* Do nothing for other cores.  */
11480       break;
11481     }
11482
11483   return arm_issue_rate ();
11484 }
11485
11486 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11487    It corrects the value of COST based on the relationship between
11488    INSN and DEP through the dependence LINK.  It returns the new
11489    value. There is a per-core adjust_cost hook to adjust scheduler costs
11490    and the per-core hook can choose to completely override the generic
11491    adjust_cost function. Only put bits of code into arm_adjust_cost that
11492    are common across all cores.  */
11493 static int
11494 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11495                  unsigned int)
11496 {
11497   rtx i_pat, d_pat;
11498
11499  /* When generating Thumb-1 code, we want to place flag-setting operations
11500     close to a conditional branch which depends on them, so that we can
11501     omit the comparison. */
11502   if (TARGET_THUMB1
11503       && dep_type == 0
11504       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11505       && recog_memoized (dep) >= 0
11506       && get_attr_conds (dep) == CONDS_SET)
11507     return 0;
11508
11509   if (current_tune->sched_adjust_cost != NULL)
11510     {
11511       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11512         return cost;
11513     }
11514
11515   /* XXX Is this strictly true?  */
11516   if (dep_type == REG_DEP_ANTI
11517       || dep_type == REG_DEP_OUTPUT)
11518     return 0;
11519
11520   /* Call insns don't incur a stall, even if they follow a load.  */
11521   if (dep_type == 0
11522       && CALL_P (insn))
11523     return 1;
11524
11525   if ((i_pat = single_set (insn)) != NULL
11526       && MEM_P (SET_SRC (i_pat))
11527       && (d_pat = single_set (dep)) != NULL
11528       && MEM_P (SET_DEST (d_pat)))
11529     {
11530       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11531       /* This is a load after a store, there is no conflict if the load reads
11532          from a cached area.  Assume that loads from the stack, and from the
11533          constant pool are cached, and that others will miss.  This is a
11534          hack.  */
11535
11536       if ((GET_CODE (src_mem) == SYMBOL_REF
11537            && CONSTANT_POOL_ADDRESS_P (src_mem))
11538           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11539           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11540           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11541         return 1;
11542     }
11543
11544   return cost;
11545 }
11546
11547 int
11548 arm_max_conditional_execute (void)
11549 {
11550   return max_insns_skipped;
11551 }
11552
11553 static int
11554 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11555 {
11556   if (TARGET_32BIT)
11557     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11558   else
11559     return (optimize > 0) ? 2 : 0;
11560 }
11561
11562 static int
11563 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11564 {
11565   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11566 }
11567
11568 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11569    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11570    sequences of non-executed instructions in IT blocks probably take the same
11571    amount of time as executed instructions (and the IT instruction itself takes
11572    space in icache).  This function was experimentally determined to give good
11573    results on a popular embedded benchmark.  */
11574
11575 static int
11576 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11577 {
11578   return (TARGET_32BIT && speed_p) ? 1
11579          : arm_default_branch_cost (speed_p, predictable_p);
11580 }
11581
11582 static int
11583 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11584 {
11585   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11586 }
11587
11588 static bool fp_consts_inited = false;
11589
11590 static REAL_VALUE_TYPE value_fp0;
11591
11592 static void
11593 init_fp_table (void)
11594 {
11595   REAL_VALUE_TYPE r;
11596
11597   r = REAL_VALUE_ATOF ("0", DFmode);
11598   value_fp0 = r;
11599   fp_consts_inited = true;
11600 }
11601
11602 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11603 int
11604 arm_const_double_rtx (rtx x)
11605 {
11606   const REAL_VALUE_TYPE *r;
11607
11608   if (!fp_consts_inited)
11609     init_fp_table ();
11610
11611   r = CONST_DOUBLE_REAL_VALUE (x);
11612   if (REAL_VALUE_MINUS_ZERO (*r))
11613     return 0;
11614
11615   if (real_equal (r, &value_fp0))
11616     return 1;
11617
11618   return 0;
11619 }
11620
11621 /* VFPv3 has a fairly wide range of representable immediates, formed from
11622    "quarter-precision" floating-point values. These can be evaluated using this
11623    formula (with ^ for exponentiation):
11624
11625      -1^s * n * 2^-r
11626
11627    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11628    16 <= n <= 31 and 0 <= r <= 7.
11629
11630    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11631
11632      - A (most-significant) is the sign bit.
11633      - BCD are the exponent (encoded as r XOR 3).
11634      - EFGH are the mantissa (encoded as n - 16).
11635 */
11636
11637 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11638    fconst[sd] instruction, or -1 if X isn't suitable.  */
11639 static int
11640 vfp3_const_double_index (rtx x)
11641 {
11642   REAL_VALUE_TYPE r, m;
11643   int sign, exponent;
11644   unsigned HOST_WIDE_INT mantissa, mant_hi;
11645   unsigned HOST_WIDE_INT mask;
11646   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11647   bool fail;
11648
11649   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11650     return -1;
11651
11652   r = *CONST_DOUBLE_REAL_VALUE (x);
11653
11654   /* We can't represent these things, so detect them first.  */
11655   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11656     return -1;
11657
11658   /* Extract sign, exponent and mantissa.  */
11659   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11660   r = real_value_abs (&r);
11661   exponent = REAL_EXP (&r);
11662   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11663      highest (sign) bit, with a fixed binary point at bit point_pos.
11664      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11665      bits for the mantissa, this may fail (low bits would be lost).  */
11666   real_ldexp (&m, &r, point_pos - exponent);
11667   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11668   mantissa = w.elt (0);
11669   mant_hi = w.elt (1);
11670
11671   /* If there are bits set in the low part of the mantissa, we can't
11672      represent this value.  */
11673   if (mantissa != 0)
11674     return -1;
11675
11676   /* Now make it so that mantissa contains the most-significant bits, and move
11677      the point_pos to indicate that the least-significant bits have been
11678      discarded.  */
11679   point_pos -= HOST_BITS_PER_WIDE_INT;
11680   mantissa = mant_hi;
11681
11682   /* We can permit four significant bits of mantissa only, plus a high bit
11683      which is always 1.  */
11684   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11685   if ((mantissa & mask) != 0)
11686     return -1;
11687
11688   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11689   mantissa >>= point_pos - 5;
11690
11691   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11692      floating-point immediate zero with Neon using an integer-zero load, but
11693      that case is handled elsewhere.)  */
11694   if (mantissa == 0)
11695     return -1;
11696
11697   gcc_assert (mantissa >= 16 && mantissa <= 31);
11698
11699   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11700      normalized significands are in the range [1, 2). (Our mantissa is shifted
11701      left 4 places at this point relative to normalized IEEE754 values).  GCC
11702      internally uses [0.5, 1) (see real.c), so the exponent returned from
11703      REAL_EXP must be altered.  */
11704   exponent = 5 - exponent;
11705
11706   if (exponent < 0 || exponent > 7)
11707     return -1;
11708
11709   /* Sign, mantissa and exponent are now in the correct form to plug into the
11710      formula described in the comment above.  */
11711   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11712 }
11713
11714 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11715 int
11716 vfp3_const_double_rtx (rtx x)
11717 {
11718   if (!TARGET_VFP3)
11719     return 0;
11720
11721   return vfp3_const_double_index (x) != -1;
11722 }
11723
11724 /* Recognize immediates which can be used in various Neon instructions. Legal
11725    immediates are described by the following table (for VMVN variants, the
11726    bitwise inverse of the constant shown is recognized. In either case, VMOV
11727    is output and the correct instruction to use for a given constant is chosen
11728    by the assembler). The constant shown is replicated across all elements of
11729    the destination vector.
11730
11731    insn elems variant constant (binary)
11732    ---- ----- ------- -----------------
11733    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11734    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11735    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11736    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11737    vmov  i16     4    00000000 abcdefgh
11738    vmov  i16     5    abcdefgh 00000000
11739    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11740    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11741    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11742    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11743    vmvn  i16    10    00000000 abcdefgh
11744    vmvn  i16    11    abcdefgh 00000000
11745    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11746    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11747    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11748    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11749    vmov   i8    16    abcdefgh
11750    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11751                       eeeeeeee ffffffff gggggggg hhhhhhhh
11752    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11753    vmov  f32    19    00000000 00000000 00000000 00000000
11754
11755    For case 18, B = !b. Representable values are exactly those accepted by
11756    vfp3_const_double_index, but are output as floating-point numbers rather
11757    than indices.
11758
11759    For case 19, we will change it to vmov.i32 when assembling.
11760
11761    Variants 0-5 (inclusive) may also be used as immediates for the second
11762    operand of VORR/VBIC instructions.
11763
11764    The INVERSE argument causes the bitwise inverse of the given operand to be
11765    recognized instead (used for recognizing legal immediates for the VAND/VORN
11766    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11767    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11768    output, rather than the real insns vbic/vorr).
11769
11770    INVERSE makes no difference to the recognition of float vectors.
11771
11772    The return value is the variant of immediate as shown in the above table, or
11773    -1 if the given value doesn't match any of the listed patterns.
11774 */
11775 static int
11776 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11777                       rtx *modconst, int *elementwidth)
11778 {
11779 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11780   matches = 1;                                  \
11781   for (i = 0; i < idx; i += (STRIDE))           \
11782     if (!(TEST))                                \
11783       matches = 0;                              \
11784   if (matches)                                  \
11785     {                                           \
11786       immtype = (CLASS);                        \
11787       elsize = (ELSIZE);                        \
11788       break;                                    \
11789     }
11790
11791   unsigned int i, elsize = 0, idx = 0, n_elts;
11792   unsigned int innersize;
11793   unsigned char bytes[16];
11794   int immtype = -1, matches;
11795   unsigned int invmask = inverse ? 0xff : 0;
11796   bool vector = GET_CODE (op) == CONST_VECTOR;
11797
11798   if (vector)
11799     n_elts = CONST_VECTOR_NUNITS (op);
11800   else
11801     {
11802       n_elts = 1;
11803       if (mode == VOIDmode)
11804         mode = DImode;
11805     }
11806
11807   innersize = GET_MODE_UNIT_SIZE (mode);
11808
11809   /* Vectors of float constants.  */
11810   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11811     {
11812       rtx el0 = CONST_VECTOR_ELT (op, 0);
11813
11814       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11815         return -1;
11816
11817       /* FP16 vectors cannot be represented.  */
11818       if (GET_MODE_INNER (mode) == HFmode)
11819         return -1;
11820
11821       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11822          are distinct in this context.  */
11823       if (!const_vec_duplicate_p (op))
11824         return -1;
11825
11826       if (modconst)
11827         *modconst = CONST_VECTOR_ELT (op, 0);
11828
11829       if (elementwidth)
11830         *elementwidth = 0;
11831
11832       if (el0 == CONST0_RTX (GET_MODE (el0)))
11833         return 19;
11834       else
11835         return 18;
11836     }
11837
11838   /* The tricks done in the code below apply for little-endian vector layout.
11839      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11840      FIXME: Implement logic for big-endian vectors.  */
11841   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11842     return -1;
11843
11844   /* Splat vector constant out into a byte vector.  */
11845   for (i = 0; i < n_elts; i++)
11846     {
11847       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11848       unsigned HOST_WIDE_INT elpart;
11849
11850       gcc_assert (CONST_INT_P (el));
11851       elpart = INTVAL (el);
11852
11853       for (unsigned int byte = 0; byte < innersize; byte++)
11854         {
11855           bytes[idx++] = (elpart & 0xff) ^ invmask;
11856           elpart >>= BITS_PER_UNIT;
11857         }
11858     }
11859
11860   /* Sanity check.  */
11861   gcc_assert (idx == GET_MODE_SIZE (mode));
11862
11863   do
11864     {
11865       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11866                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11867
11868       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11869                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11870
11871       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11872                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11873
11874       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11875                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11876
11877       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11878
11879       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11880
11881       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11882                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11883
11884       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11885                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11886
11887       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11888                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11889
11890       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11891                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11892
11893       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11894
11895       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11896
11897       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11898                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11899
11900       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11901                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11902
11903       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11904                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11905
11906       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11907                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11908
11909       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11910
11911       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11912                         && bytes[i] == bytes[(i + 8) % idx]);
11913     }
11914   while (0);
11915
11916   if (immtype == -1)
11917     return -1;
11918
11919   if (elementwidth)
11920     *elementwidth = elsize;
11921
11922   if (modconst)
11923     {
11924       unsigned HOST_WIDE_INT imm = 0;
11925
11926       /* Un-invert bytes of recognized vector, if necessary.  */
11927       if (invmask != 0)
11928         for (i = 0; i < idx; i++)
11929           bytes[i] ^= invmask;
11930
11931       if (immtype == 17)
11932         {
11933           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11934           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11935
11936           for (i = 0; i < 8; i++)
11937             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11938                    << (i * BITS_PER_UNIT);
11939
11940           *modconst = GEN_INT (imm);
11941         }
11942       else
11943         {
11944           unsigned HOST_WIDE_INT imm = 0;
11945
11946           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11947             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11948
11949           *modconst = GEN_INT (imm);
11950         }
11951     }
11952
11953   return immtype;
11954 #undef CHECK
11955 }
11956
11957 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11958    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11959    float elements), and a modified constant (whatever should be output for a
11960    VMOV) in *MODCONST.  */
11961
11962 int
11963 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11964                                rtx *modconst, int *elementwidth)
11965 {
11966   rtx tmpconst;
11967   int tmpwidth;
11968   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11969
11970   if (retval == -1)
11971     return 0;
11972
11973   if (modconst)
11974     *modconst = tmpconst;
11975
11976   if (elementwidth)
11977     *elementwidth = tmpwidth;
11978
11979   return 1;
11980 }
11981
11982 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11983    the immediate is valid, write a constant suitable for using as an operand
11984    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11985    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11986
11987 int
11988 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11989                                 rtx *modconst, int *elementwidth)
11990 {
11991   rtx tmpconst;
11992   int tmpwidth;
11993   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11994
11995   if (retval < 0 || retval > 5)
11996     return 0;
11997
11998   if (modconst)
11999     *modconst = tmpconst;
12000
12001   if (elementwidth)
12002     *elementwidth = tmpwidth;
12003
12004   return 1;
12005 }
12006
12007 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12008    the immediate is valid, write a constant suitable for using as an operand
12009    to VSHR/VSHL to *MODCONST and the corresponding element width to
12010    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12011    because they have different limitations.  */
12012
12013 int
12014 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12015                                 rtx *modconst, int *elementwidth,
12016                                 bool isleftshift)
12017 {
12018   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12019   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12020   unsigned HOST_WIDE_INT last_elt = 0;
12021   unsigned HOST_WIDE_INT maxshift;
12022
12023   /* Split vector constant out into a byte vector.  */
12024   for (i = 0; i < n_elts; i++)
12025     {
12026       rtx el = CONST_VECTOR_ELT (op, i);
12027       unsigned HOST_WIDE_INT elpart;
12028
12029       if (CONST_INT_P (el))
12030         elpart = INTVAL (el);
12031       else if (CONST_DOUBLE_P (el))
12032         return 0;
12033       else
12034         gcc_unreachable ();
12035
12036       if (i != 0 && elpart != last_elt)
12037         return 0;
12038
12039       last_elt = elpart;
12040     }
12041
12042   /* Shift less than element size.  */
12043   maxshift = innersize * 8;
12044
12045   if (isleftshift)
12046     {
12047       /* Left shift immediate value can be from 0 to <size>-1.  */
12048       if (last_elt >= maxshift)
12049         return 0;
12050     }
12051   else
12052     {
12053       /* Right shift immediate value can be from 1 to <size>.  */
12054       if (last_elt == 0 || last_elt > maxshift)
12055         return 0;
12056     }
12057
12058   if (elementwidth)
12059     *elementwidth = innersize * 8;
12060
12061   if (modconst)
12062     *modconst = CONST_VECTOR_ELT (op, 0);
12063
12064   return 1;
12065 }
12066
12067 /* Return a string suitable for output of Neon immediate logic operation
12068    MNEM.  */
12069
12070 char *
12071 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12072                              int inverse, int quad)
12073 {
12074   int width, is_valid;
12075   static char templ[40];
12076
12077   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12078
12079   gcc_assert (is_valid != 0);
12080
12081   if (quad)
12082     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12083   else
12084     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12085
12086   return templ;
12087 }
12088
12089 /* Return a string suitable for output of Neon immediate shift operation
12090    (VSHR or VSHL) MNEM.  */
12091
12092 char *
12093 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12094                              machine_mode mode, int quad,
12095                              bool isleftshift)
12096 {
12097   int width, is_valid;
12098   static char templ[40];
12099
12100   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12101   gcc_assert (is_valid != 0);
12102
12103   if (quad)
12104     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12105   else
12106     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12107
12108   return templ;
12109 }
12110
12111 /* Output a sequence of pairwise operations to implement a reduction.
12112    NOTE: We do "too much work" here, because pairwise operations work on two
12113    registers-worth of operands in one go. Unfortunately we can't exploit those
12114    extra calculations to do the full operation in fewer steps, I don't think.
12115    Although all vector elements of the result but the first are ignored, we
12116    actually calculate the same result in each of the elements. An alternative
12117    such as initially loading a vector with zero to use as each of the second
12118    operands would use up an additional register and take an extra instruction,
12119    for no particular gain.  */
12120
12121 void
12122 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12123                       rtx (*reduc) (rtx, rtx, rtx))
12124 {
12125   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12126   rtx tmpsum = op1;
12127
12128   for (i = parts / 2; i >= 1; i /= 2)
12129     {
12130       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12131       emit_insn (reduc (dest, tmpsum, tmpsum));
12132       tmpsum = dest;
12133     }
12134 }
12135
12136 /* If VALS is a vector constant that can be loaded into a register
12137    using VDUP, generate instructions to do so and return an RTX to
12138    assign to the register.  Otherwise return NULL_RTX.  */
12139
12140 static rtx
12141 neon_vdup_constant (rtx vals)
12142 {
12143   machine_mode mode = GET_MODE (vals);
12144   machine_mode inner_mode = GET_MODE_INNER (mode);
12145   rtx x;
12146
12147   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12148     return NULL_RTX;
12149
12150   if (!const_vec_duplicate_p (vals, &x))
12151     /* The elements are not all the same.  We could handle repeating
12152        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12153        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12154        vdup.i16).  */
12155     return NULL_RTX;
12156
12157   /* We can load this constant by using VDUP and a constant in a
12158      single ARM register.  This will be cheaper than a vector
12159      load.  */
12160
12161   x = copy_to_mode_reg (inner_mode, x);
12162   return gen_vec_duplicate (mode, x);
12163 }
12164
12165 /* Generate code to load VALS, which is a PARALLEL containing only
12166    constants (for vec_init) or CONST_VECTOR, efficiently into a
12167    register.  Returns an RTX to copy into the register, or NULL_RTX
12168    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12169
12170 rtx
12171 neon_make_constant (rtx vals)
12172 {
12173   machine_mode mode = GET_MODE (vals);
12174   rtx target;
12175   rtx const_vec = NULL_RTX;
12176   int n_elts = GET_MODE_NUNITS (mode);
12177   int n_const = 0;
12178   int i;
12179
12180   if (GET_CODE (vals) == CONST_VECTOR)
12181     const_vec = vals;
12182   else if (GET_CODE (vals) == PARALLEL)
12183     {
12184       /* A CONST_VECTOR must contain only CONST_INTs and
12185          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12186          Only store valid constants in a CONST_VECTOR.  */
12187       for (i = 0; i < n_elts; ++i)
12188         {
12189           rtx x = XVECEXP (vals, 0, i);
12190           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12191             n_const++;
12192         }
12193       if (n_const == n_elts)
12194         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12195     }
12196   else
12197     gcc_unreachable ();
12198
12199   if (const_vec != NULL
12200       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12201     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12202     return const_vec;
12203   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12204     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12205        pipeline cycle; creating the constant takes one or two ARM
12206        pipeline cycles.  */
12207     return target;
12208   else if (const_vec != NULL_RTX)
12209     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12210        (for either double or quad vectors).  We can not take advantage
12211        of single-cycle VLD1 because we need a PC-relative addressing
12212        mode.  */
12213     return const_vec;
12214   else
12215     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12216        We can not construct an initializer.  */
12217     return NULL_RTX;
12218 }
12219
12220 /* Initialize vector TARGET to VALS.  */
12221
12222 void
12223 neon_expand_vector_init (rtx target, rtx vals)
12224 {
12225   machine_mode mode = GET_MODE (target);
12226   machine_mode inner_mode = GET_MODE_INNER (mode);
12227   int n_elts = GET_MODE_NUNITS (mode);
12228   int n_var = 0, one_var = -1;
12229   bool all_same = true;
12230   rtx x, mem;
12231   int i;
12232
12233   for (i = 0; i < n_elts; ++i)
12234     {
12235       x = XVECEXP (vals, 0, i);
12236       if (!CONSTANT_P (x))
12237         ++n_var, one_var = i;
12238
12239       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12240         all_same = false;
12241     }
12242
12243   if (n_var == 0)
12244     {
12245       rtx constant = neon_make_constant (vals);
12246       if (constant != NULL_RTX)
12247         {
12248           emit_move_insn (target, constant);
12249           return;
12250         }
12251     }
12252
12253   /* Splat a single non-constant element if we can.  */
12254   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12255     {
12256       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12257       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12258       return;
12259     }
12260
12261   /* One field is non-constant.  Load constant then overwrite varying
12262      field.  This is more efficient than using the stack.  */
12263   if (n_var == 1)
12264     {
12265       rtx copy = copy_rtx (vals);
12266       rtx index = GEN_INT (one_var);
12267
12268       /* Load constant part of vector, substitute neighboring value for
12269          varying element.  */
12270       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12271       neon_expand_vector_init (target, copy);
12272
12273       /* Insert variable.  */
12274       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12275       switch (mode)
12276         {
12277         case E_V8QImode:
12278           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12279           break;
12280         case E_V16QImode:
12281           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12282           break;
12283         case E_V4HImode:
12284           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12285           break;
12286         case E_V8HImode:
12287           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12288           break;
12289         case E_V2SImode:
12290           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12291           break;
12292         case E_V4SImode:
12293           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12294           break;
12295         case E_V2SFmode:
12296           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12297           break;
12298         case E_V4SFmode:
12299           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12300           break;
12301         case E_V2DImode:
12302           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12303           break;
12304         default:
12305           gcc_unreachable ();
12306         }
12307       return;
12308     }
12309
12310   /* Construct the vector in memory one field at a time
12311      and load the whole vector.  */
12312   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12313   for (i = 0; i < n_elts; i++)
12314     emit_move_insn (adjust_address_nv (mem, inner_mode,
12315                                     i * GET_MODE_SIZE (inner_mode)),
12316                     XVECEXP (vals, 0, i));
12317   emit_move_insn (target, mem);
12318 }
12319
12320 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12321    ERR if it doesn't.  EXP indicates the source location, which includes the
12322    inlining history for intrinsics.  */
12323
12324 static void
12325 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12326               const_tree exp, const char *desc)
12327 {
12328   HOST_WIDE_INT lane;
12329
12330   gcc_assert (CONST_INT_P (operand));
12331
12332   lane = INTVAL (operand);
12333
12334   if (lane < low || lane >= high)
12335     {
12336       if (exp)
12337         error ("%K%s %wd out of range %wd - %wd",
12338                exp, desc, lane, low, high - 1);
12339       else
12340         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12341     }
12342 }
12343
12344 /* Bounds-check lanes.  */
12345
12346 void
12347 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12348                   const_tree exp)
12349 {
12350   bounds_check (operand, low, high, exp, "lane");
12351 }
12352
12353 /* Bounds-check constants.  */
12354
12355 void
12356 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12357 {
12358   bounds_check (operand, low, high, NULL_TREE, "constant");
12359 }
12360
12361 HOST_WIDE_INT
12362 neon_element_bits (machine_mode mode)
12363 {
12364   return GET_MODE_UNIT_BITSIZE (mode);
12365 }
12366
12367 \f
12368 /* Predicates for `match_operand' and `match_operator'.  */
12369
12370 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12371    WB is true if full writeback address modes are allowed and is false
12372    if limited writeback address modes (POST_INC and PRE_DEC) are
12373    allowed.  */
12374
12375 int
12376 arm_coproc_mem_operand (rtx op, bool wb)
12377 {
12378   rtx ind;
12379
12380   /* Reject eliminable registers.  */
12381   if (! (reload_in_progress || reload_completed || lra_in_progress)
12382       && (   reg_mentioned_p (frame_pointer_rtx, op)
12383           || reg_mentioned_p (arg_pointer_rtx, op)
12384           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12385           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12386           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12387           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12388     return FALSE;
12389
12390   /* Constants are converted into offsets from labels.  */
12391   if (!MEM_P (op))
12392     return FALSE;
12393
12394   ind = XEXP (op, 0);
12395
12396   if (reload_completed
12397       && (GET_CODE (ind) == LABEL_REF
12398           || (GET_CODE (ind) == CONST
12399               && GET_CODE (XEXP (ind, 0)) == PLUS
12400               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12401               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12402     return TRUE;
12403
12404   /* Match: (mem (reg)).  */
12405   if (REG_P (ind))
12406     return arm_address_register_rtx_p (ind, 0);
12407
12408   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12409      acceptable in any case (subject to verification by
12410      arm_address_register_rtx_p).  We need WB to be true to accept
12411      PRE_INC and POST_DEC.  */
12412   if (GET_CODE (ind) == POST_INC
12413       || GET_CODE (ind) == PRE_DEC
12414       || (wb
12415           && (GET_CODE (ind) == PRE_INC
12416               || GET_CODE (ind) == POST_DEC)))
12417     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12418
12419   if (wb
12420       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12421       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12422       && GET_CODE (XEXP (ind, 1)) == PLUS
12423       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12424     ind = XEXP (ind, 1);
12425
12426   /* Match:
12427      (plus (reg)
12428            (const)).  */
12429   if (GET_CODE (ind) == PLUS
12430       && REG_P (XEXP (ind, 0))
12431       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12432       && CONST_INT_P (XEXP (ind, 1))
12433       && INTVAL (XEXP (ind, 1)) > -1024
12434       && INTVAL (XEXP (ind, 1)) <  1024
12435       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12436     return TRUE;
12437
12438   return FALSE;
12439 }
12440
12441 /* Return TRUE if OP is a memory operand which we can load or store a vector
12442    to/from. TYPE is one of the following values:
12443     0 - Vector load/stor (vldr)
12444     1 - Core registers (ldm)
12445     2 - Element/structure loads (vld1)
12446  */
12447 int
12448 neon_vector_mem_operand (rtx op, int type, bool strict)
12449 {
12450   rtx ind;
12451
12452   /* Reject eliminable registers.  */
12453   if (strict && ! (reload_in_progress || reload_completed)
12454       && (reg_mentioned_p (frame_pointer_rtx, op)
12455           || reg_mentioned_p (arg_pointer_rtx, op)
12456           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12457           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12458           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12459           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12460     return FALSE;
12461
12462   /* Constants are converted into offsets from labels.  */
12463   if (!MEM_P (op))
12464     return FALSE;
12465
12466   ind = XEXP (op, 0);
12467
12468   if (reload_completed
12469       && (GET_CODE (ind) == LABEL_REF
12470           || (GET_CODE (ind) == CONST
12471               && GET_CODE (XEXP (ind, 0)) == PLUS
12472               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12473               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12474     return TRUE;
12475
12476   /* Match: (mem (reg)).  */
12477   if (REG_P (ind))
12478     return arm_address_register_rtx_p (ind, 0);
12479
12480   /* Allow post-increment with Neon registers.  */
12481   if ((type != 1 && GET_CODE (ind) == POST_INC)
12482       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12483     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12484
12485   /* Allow post-increment by register for VLDn */
12486   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12487       && GET_CODE (XEXP (ind, 1)) == PLUS
12488       && REG_P (XEXP (XEXP (ind, 1), 1)))
12489      return true;
12490
12491   /* Match:
12492      (plus (reg)
12493           (const)).  */
12494   if (type == 0
12495       && GET_CODE (ind) == PLUS
12496       && REG_P (XEXP (ind, 0))
12497       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12498       && CONST_INT_P (XEXP (ind, 1))
12499       && INTVAL (XEXP (ind, 1)) > -1024
12500       /* For quad modes, we restrict the constant offset to be slightly less
12501          than what the instruction format permits.  We have no such constraint
12502          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12503       && (INTVAL (XEXP (ind, 1))
12504           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12505       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12506     return TRUE;
12507
12508   return FALSE;
12509 }
12510
12511 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12512    type.  */
12513 int
12514 neon_struct_mem_operand (rtx op)
12515 {
12516   rtx ind;
12517
12518   /* Reject eliminable registers.  */
12519   if (! (reload_in_progress || reload_completed)
12520       && (   reg_mentioned_p (frame_pointer_rtx, op)
12521           || reg_mentioned_p (arg_pointer_rtx, op)
12522           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12523           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12524           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12525           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12526     return FALSE;
12527
12528   /* Constants are converted into offsets from labels.  */
12529   if (!MEM_P (op))
12530     return FALSE;
12531
12532   ind = XEXP (op, 0);
12533
12534   if (reload_completed
12535       && (GET_CODE (ind) == LABEL_REF
12536           || (GET_CODE (ind) == CONST
12537               && GET_CODE (XEXP (ind, 0)) == PLUS
12538               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12539               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12540     return TRUE;
12541
12542   /* Match: (mem (reg)).  */
12543   if (REG_P (ind))
12544     return arm_address_register_rtx_p (ind, 0);
12545
12546   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12547   if (GET_CODE (ind) == POST_INC
12548       || GET_CODE (ind) == PRE_DEC)
12549     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12550
12551   return FALSE;
12552 }
12553
12554 /* Return true if X is a register that will be eliminated later on.  */
12555 int
12556 arm_eliminable_register (rtx x)
12557 {
12558   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12559                        || REGNO (x) == ARG_POINTER_REGNUM
12560                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12561                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12562 }
12563
12564 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12565    coprocessor registers.  Otherwise return NO_REGS.  */
12566
12567 enum reg_class
12568 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12569 {
12570   if (mode == HFmode)
12571     {
12572       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12573         return GENERAL_REGS;
12574       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12575         return NO_REGS;
12576       return GENERAL_REGS;
12577     }
12578
12579   /* The neon move patterns handle all legitimate vector and struct
12580      addresses.  */
12581   if (TARGET_NEON
12582       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12583       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12584           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12585           || VALID_NEON_STRUCT_MODE (mode)))
12586     return NO_REGS;
12587
12588   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12589     return NO_REGS;
12590
12591   return GENERAL_REGS;
12592 }
12593
12594 /* Values which must be returned in the most-significant end of the return
12595    register.  */
12596
12597 static bool
12598 arm_return_in_msb (const_tree valtype)
12599 {
12600   return (TARGET_AAPCS_BASED
12601           && BYTES_BIG_ENDIAN
12602           && (AGGREGATE_TYPE_P (valtype)
12603               || TREE_CODE (valtype) == COMPLEX_TYPE
12604               || FIXED_POINT_TYPE_P (valtype)));
12605 }
12606
12607 /* Return TRUE if X references a SYMBOL_REF.  */
12608 int
12609 symbol_mentioned_p (rtx x)
12610 {
12611   const char * fmt;
12612   int i;
12613
12614   if (GET_CODE (x) == SYMBOL_REF)
12615     return 1;
12616
12617   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12618      are constant offsets, not symbols.  */
12619   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12620     return 0;
12621
12622   fmt = GET_RTX_FORMAT (GET_CODE (x));
12623
12624   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12625     {
12626       if (fmt[i] == 'E')
12627         {
12628           int j;
12629
12630           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12631             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12632               return 1;
12633         }
12634       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12635         return 1;
12636     }
12637
12638   return 0;
12639 }
12640
12641 /* Return TRUE if X references a LABEL_REF.  */
12642 int
12643 label_mentioned_p (rtx x)
12644 {
12645   const char * fmt;
12646   int i;
12647
12648   if (GET_CODE (x) == LABEL_REF)
12649     return 1;
12650
12651   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12652      instruction, but they are constant offsets, not symbols.  */
12653   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12654     return 0;
12655
12656   fmt = GET_RTX_FORMAT (GET_CODE (x));
12657   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12658     {
12659       if (fmt[i] == 'E')
12660         {
12661           int j;
12662
12663           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12664             if (label_mentioned_p (XVECEXP (x, i, j)))
12665               return 1;
12666         }
12667       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12668         return 1;
12669     }
12670
12671   return 0;
12672 }
12673
12674 int
12675 tls_mentioned_p (rtx x)
12676 {
12677   switch (GET_CODE (x))
12678     {
12679     case CONST:
12680       return tls_mentioned_p (XEXP (x, 0));
12681
12682     case UNSPEC:
12683       if (XINT (x, 1) == UNSPEC_TLS)
12684         return 1;
12685
12686     /* Fall through.  */
12687     default:
12688       return 0;
12689     }
12690 }
12691
12692 /* Must not copy any rtx that uses a pc-relative address.
12693    Also, disallow copying of load-exclusive instructions that
12694    may appear after splitting of compare-and-swap-style operations
12695    so as to prevent those loops from being transformed away from their
12696    canonical forms (see PR 69904).  */
12697
12698 static bool
12699 arm_cannot_copy_insn_p (rtx_insn *insn)
12700 {
12701   /* The tls call insn cannot be copied, as it is paired with a data
12702      word.  */
12703   if (recog_memoized (insn) == CODE_FOR_tlscall)
12704     return true;
12705
12706   subrtx_iterator::array_type array;
12707   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12708     {
12709       const_rtx x = *iter;
12710       if (GET_CODE (x) == UNSPEC
12711           && (XINT (x, 1) == UNSPEC_PIC_BASE
12712               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12713         return true;
12714     }
12715
12716   rtx set = single_set (insn);
12717   if (set)
12718     {
12719       rtx src = SET_SRC (set);
12720       if (GET_CODE (src) == ZERO_EXTEND)
12721         src = XEXP (src, 0);
12722
12723       /* Catch the load-exclusive and load-acquire operations.  */
12724       if (GET_CODE (src) == UNSPEC_VOLATILE
12725           && (XINT (src, 1) == VUNSPEC_LL
12726               || XINT (src, 1) == VUNSPEC_LAX))
12727         return true;
12728     }
12729   return false;
12730 }
12731
12732 enum rtx_code
12733 minmax_code (rtx x)
12734 {
12735   enum rtx_code code = GET_CODE (x);
12736
12737   switch (code)
12738     {
12739     case SMAX:
12740       return GE;
12741     case SMIN:
12742       return LE;
12743     case UMIN:
12744       return LEU;
12745     case UMAX:
12746       return GEU;
12747     default:
12748       gcc_unreachable ();
12749     }
12750 }
12751
12752 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12753
12754 bool
12755 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12756                         int *mask, bool *signed_sat)
12757 {
12758   /* The high bound must be a power of two minus one.  */
12759   int log = exact_log2 (INTVAL (hi_bound) + 1);
12760   if (log == -1)
12761     return false;
12762
12763   /* The low bound is either zero (for usat) or one less than the
12764      negation of the high bound (for ssat).  */
12765   if (INTVAL (lo_bound) == 0)
12766     {
12767       if (mask)
12768         *mask = log;
12769       if (signed_sat)
12770         *signed_sat = false;
12771
12772       return true;
12773     }
12774
12775   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12776     {
12777       if (mask)
12778         *mask = log + 1;
12779       if (signed_sat)
12780         *signed_sat = true;
12781
12782       return true;
12783     }
12784
12785   return false;
12786 }
12787
12788 /* Return 1 if memory locations are adjacent.  */
12789 int
12790 adjacent_mem_locations (rtx a, rtx b)
12791 {
12792   /* We don't guarantee to preserve the order of these memory refs.  */
12793   if (volatile_refs_p (a) || volatile_refs_p (b))
12794     return 0;
12795
12796   if ((REG_P (XEXP (a, 0))
12797        || (GET_CODE (XEXP (a, 0)) == PLUS
12798            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12799       && (REG_P (XEXP (b, 0))
12800           || (GET_CODE (XEXP (b, 0)) == PLUS
12801               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12802     {
12803       HOST_WIDE_INT val0 = 0, val1 = 0;
12804       rtx reg0, reg1;
12805       int val_diff;
12806
12807       if (GET_CODE (XEXP (a, 0)) == PLUS)
12808         {
12809           reg0 = XEXP (XEXP (a, 0), 0);
12810           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12811         }
12812       else
12813         reg0 = XEXP (a, 0);
12814
12815       if (GET_CODE (XEXP (b, 0)) == PLUS)
12816         {
12817           reg1 = XEXP (XEXP (b, 0), 0);
12818           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12819         }
12820       else
12821         reg1 = XEXP (b, 0);
12822
12823       /* Don't accept any offset that will require multiple
12824          instructions to handle, since this would cause the
12825          arith_adjacentmem pattern to output an overlong sequence.  */
12826       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12827         return 0;
12828
12829       /* Don't allow an eliminable register: register elimination can make
12830          the offset too large.  */
12831       if (arm_eliminable_register (reg0))
12832         return 0;
12833
12834       val_diff = val1 - val0;
12835
12836       if (arm_ld_sched)
12837         {
12838           /* If the target has load delay slots, then there's no benefit
12839              to using an ldm instruction unless the offset is zero and
12840              we are optimizing for size.  */
12841           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12842                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12843                   && (val_diff == 4 || val_diff == -4));
12844         }
12845
12846       return ((REGNO (reg0) == REGNO (reg1))
12847               && (val_diff == 4 || val_diff == -4));
12848     }
12849
12850   return 0;
12851 }
12852
12853 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12854    for load operations, false for store operations.  CONSECUTIVE is true
12855    if the register numbers in the operation must be consecutive in the register
12856    bank. RETURN_PC is true if value is to be loaded in PC.
12857    The pattern we are trying to match for load is:
12858      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12859       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12860        :
12861        :
12862       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12863      ]
12864      where
12865      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12866      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12867      3.  If consecutive is TRUE, then for kth register being loaded,
12868          REGNO (R_dk) = REGNO (R_d0) + k.
12869    The pattern for store is similar.  */
12870 bool
12871 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12872                      bool consecutive, bool return_pc)
12873 {
12874   HOST_WIDE_INT count = XVECLEN (op, 0);
12875   rtx reg, mem, addr;
12876   unsigned regno;
12877   unsigned first_regno;
12878   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12879   rtx elt;
12880   bool addr_reg_in_reglist = false;
12881   bool update = false;
12882   int reg_increment;
12883   int offset_adj;
12884   int regs_per_val;
12885
12886   /* If not in SImode, then registers must be consecutive
12887      (e.g., VLDM instructions for DFmode).  */
12888   gcc_assert ((mode == SImode) || consecutive);
12889   /* Setting return_pc for stores is illegal.  */
12890   gcc_assert (!return_pc || load);
12891
12892   /* Set up the increments and the regs per val based on the mode.  */
12893   reg_increment = GET_MODE_SIZE (mode);
12894   regs_per_val = reg_increment / 4;
12895   offset_adj = return_pc ? 1 : 0;
12896
12897   if (count <= 1
12898       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12899       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12900     return false;
12901
12902   /* Check if this is a write-back.  */
12903   elt = XVECEXP (op, 0, offset_adj);
12904   if (GET_CODE (SET_SRC (elt)) == PLUS)
12905     {
12906       i++;
12907       base = 1;
12908       update = true;
12909
12910       /* The offset adjustment must be the number of registers being
12911          popped times the size of a single register.  */
12912       if (!REG_P (SET_DEST (elt))
12913           || !REG_P (XEXP (SET_SRC (elt), 0))
12914           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12915           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12916           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12917              ((count - 1 - offset_adj) * reg_increment))
12918         return false;
12919     }
12920
12921   i = i + offset_adj;
12922   base = base + offset_adj;
12923   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12924      success depends on the type: VLDM can do just one reg,
12925      LDM must do at least two.  */
12926   if ((count <= i) && (mode == SImode))
12927       return false;
12928
12929   elt = XVECEXP (op, 0, i - 1);
12930   if (GET_CODE (elt) != SET)
12931     return false;
12932
12933   if (load)
12934     {
12935       reg = SET_DEST (elt);
12936       mem = SET_SRC (elt);
12937     }
12938   else
12939     {
12940       reg = SET_SRC (elt);
12941       mem = SET_DEST (elt);
12942     }
12943
12944   if (!REG_P (reg) || !MEM_P (mem))
12945     return false;
12946
12947   regno = REGNO (reg);
12948   first_regno = regno;
12949   addr = XEXP (mem, 0);
12950   if (GET_CODE (addr) == PLUS)
12951     {
12952       if (!CONST_INT_P (XEXP (addr, 1)))
12953         return false;
12954
12955       offset = INTVAL (XEXP (addr, 1));
12956       addr = XEXP (addr, 0);
12957     }
12958
12959   if (!REG_P (addr))
12960     return false;
12961
12962   /* Don't allow SP to be loaded unless it is also the base register. It
12963      guarantees that SP is reset correctly when an LDM instruction
12964      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12965   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12966     return false;
12967
12968   for (; i < count; i++)
12969     {
12970       elt = XVECEXP (op, 0, i);
12971       if (GET_CODE (elt) != SET)
12972         return false;
12973
12974       if (load)
12975         {
12976           reg = SET_DEST (elt);
12977           mem = SET_SRC (elt);
12978         }
12979       else
12980         {
12981           reg = SET_SRC (elt);
12982           mem = SET_DEST (elt);
12983         }
12984
12985       if (!REG_P (reg)
12986           || GET_MODE (reg) != mode
12987           || REGNO (reg) <= regno
12988           || (consecutive
12989               && (REGNO (reg) !=
12990                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12991           /* Don't allow SP to be loaded unless it is also the base register. It
12992              guarantees that SP is reset correctly when an LDM instruction
12993              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12994           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12995           || !MEM_P (mem)
12996           || GET_MODE (mem) != mode
12997           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12998                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12999                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13000                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13001                    offset + (i - base) * reg_increment))
13002               && (!REG_P (XEXP (mem, 0))
13003                   || offset + (i - base) * reg_increment != 0)))
13004         return false;
13005
13006       regno = REGNO (reg);
13007       if (regno == REGNO (addr))
13008         addr_reg_in_reglist = true;
13009     }
13010
13011   if (load)
13012     {
13013       if (update && addr_reg_in_reglist)
13014         return false;
13015
13016       /* For Thumb-1, address register is always modified - either by write-back
13017          or by explicit load.  If the pattern does not describe an update,
13018          then the address register must be in the list of loaded registers.  */
13019       if (TARGET_THUMB1)
13020         return update || addr_reg_in_reglist;
13021     }
13022
13023   return true;
13024 }
13025
13026 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13027    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13028    instruction.  ADD_OFFSET is nonzero if the base address register needs
13029    to be modified with an add instruction before we can use it.  */
13030
13031 static bool
13032 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13033                                  int nops, HOST_WIDE_INT add_offset)
13034  {
13035   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13036      if the offset isn't small enough.  The reason 2 ldrs are faster
13037      is because these ARMs are able to do more than one cache access
13038      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13039      whilst the ARM8 has a double bandwidth cache.  This means that
13040      these cores can do both an instruction fetch and a data fetch in
13041      a single cycle, so the trick of calculating the address into a
13042      scratch register (one of the result regs) and then doing a load
13043      multiple actually becomes slower (and no smaller in code size).
13044      That is the transformation
13045
13046         ldr     rd1, [rbase + offset]
13047         ldr     rd2, [rbase + offset + 4]
13048
13049      to
13050
13051         add     rd1, rbase, offset
13052         ldmia   rd1, {rd1, rd2}
13053
13054      produces worse code -- '3 cycles + any stalls on rd2' instead of
13055      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13056      access per cycle, the first sequence could never complete in less
13057      than 6 cycles, whereas the ldm sequence would only take 5 and
13058      would make better use of sequential accesses if not hitting the
13059      cache.
13060
13061      We cheat here and test 'arm_ld_sched' which we currently know to
13062      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13063      changes, then the test below needs to be reworked.  */
13064   if (nops == 2 && arm_ld_sched && add_offset != 0)
13065     return false;
13066
13067   /* XScale has load-store double instructions, but they have stricter
13068      alignment requirements than load-store multiple, so we cannot
13069      use them.
13070
13071      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13072      the pipeline until completion.
13073
13074         NREGS           CYCLES
13075           1               3
13076           2               4
13077           3               5
13078           4               6
13079
13080      An ldr instruction takes 1-3 cycles, but does not block the
13081      pipeline.
13082
13083         NREGS           CYCLES
13084           1              1-3
13085           2              2-6
13086           3              3-9
13087           4              4-12
13088
13089      Best case ldr will always win.  However, the more ldr instructions
13090      we issue, the less likely we are to be able to schedule them well.
13091      Using ldr instructions also increases code size.
13092
13093      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13094      for counts of 3 or 4 regs.  */
13095   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13096     return false;
13097   return true;
13098 }
13099
13100 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13101    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13102    an array ORDER which describes the sequence to use when accessing the
13103    offsets that produces an ascending order.  In this sequence, each
13104    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13105    must have been filled in with the lowest offset by the caller.
13106    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13107    we use to verify that ORDER produces an ascending order of registers.
13108    Return true if it was possible to construct such an order, false if
13109    not.  */
13110
13111 static bool
13112 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13113                       int *unsorted_regs)
13114 {
13115   int i;
13116   for (i = 1; i < nops; i++)
13117     {
13118       int j;
13119
13120       order[i] = order[i - 1];
13121       for (j = 0; j < nops; j++)
13122         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13123           {
13124             /* We must find exactly one offset that is higher than the
13125                previous one by 4.  */
13126             if (order[i] != order[i - 1])
13127               return false;
13128             order[i] = j;
13129           }
13130       if (order[i] == order[i - 1])
13131         return false;
13132       /* The register numbers must be ascending.  */
13133       if (unsorted_regs != NULL
13134           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13135         return false;
13136     }
13137   return true;
13138 }
13139
13140 /* Used to determine in a peephole whether a sequence of load
13141    instructions can be changed into a load-multiple instruction.
13142    NOPS is the number of separate load instructions we are examining.  The
13143    first NOPS entries in OPERANDS are the destination registers, the
13144    next NOPS entries are memory operands.  If this function is
13145    successful, *BASE is set to the common base register of the memory
13146    accesses; *LOAD_OFFSET is set to the first memory location's offset
13147    from that base register.
13148    REGS is an array filled in with the destination register numbers.
13149    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13150    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13151    the sequence of registers in REGS matches the loads from ascending memory
13152    locations, and the function verifies that the register numbers are
13153    themselves ascending.  If CHECK_REGS is false, the register numbers
13154    are stored in the order they are found in the operands.  */
13155 static int
13156 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13157                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13158 {
13159   int unsorted_regs[MAX_LDM_STM_OPS];
13160   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13161   int order[MAX_LDM_STM_OPS];
13162   rtx base_reg_rtx = NULL;
13163   int base_reg = -1;
13164   int i, ldm_case;
13165
13166   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13167      easily extended if required.  */
13168   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13169
13170   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13171
13172   /* Loop over the operands and check that the memory references are
13173      suitable (i.e. immediate offsets from the same base register).  At
13174      the same time, extract the target register, and the memory
13175      offsets.  */
13176   for (i = 0; i < nops; i++)
13177     {
13178       rtx reg;
13179       rtx offset;
13180
13181       /* Convert a subreg of a mem into the mem itself.  */
13182       if (GET_CODE (operands[nops + i]) == SUBREG)
13183         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13184
13185       gcc_assert (MEM_P (operands[nops + i]));
13186
13187       /* Don't reorder volatile memory references; it doesn't seem worth
13188          looking for the case where the order is ok anyway.  */
13189       if (MEM_VOLATILE_P (operands[nops + i]))
13190         return 0;
13191
13192       offset = const0_rtx;
13193
13194       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13195            || (GET_CODE (reg) == SUBREG
13196                && REG_P (reg = SUBREG_REG (reg))))
13197           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13198               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13199                   || (GET_CODE (reg) == SUBREG
13200                       && REG_P (reg = SUBREG_REG (reg))))
13201               && (CONST_INT_P (offset
13202                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13203         {
13204           if (i == 0)
13205             {
13206               base_reg = REGNO (reg);
13207               base_reg_rtx = reg;
13208               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13209                 return 0;
13210             }
13211           else if (base_reg != (int) REGNO (reg))
13212             /* Not addressed from the same base register.  */
13213             return 0;
13214
13215           unsorted_regs[i] = (REG_P (operands[i])
13216                               ? REGNO (operands[i])
13217                               : REGNO (SUBREG_REG (operands[i])));
13218
13219           /* If it isn't an integer register, or if it overwrites the
13220              base register but isn't the last insn in the list, then
13221              we can't do this.  */
13222           if (unsorted_regs[i] < 0
13223               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13224               || unsorted_regs[i] > 14
13225               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13226             return 0;
13227
13228           /* Don't allow SP to be loaded unless it is also the base
13229              register.  It guarantees that SP is reset correctly when
13230              an LDM instruction is interrupted.  Otherwise, we might
13231              end up with a corrupt stack.  */
13232           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13233             return 0;
13234
13235           unsorted_offsets[i] = INTVAL (offset);
13236           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13237             order[0] = i;
13238         }
13239       else
13240         /* Not a suitable memory address.  */
13241         return 0;
13242     }
13243
13244   /* All the useful information has now been extracted from the
13245      operands into unsorted_regs and unsorted_offsets; additionally,
13246      order[0] has been set to the lowest offset in the list.  Sort
13247      the offsets into order, verifying that they are adjacent, and
13248      check that the register numbers are ascending.  */
13249   if (!compute_offset_order (nops, unsorted_offsets, order,
13250                              check_regs ? unsorted_regs : NULL))
13251     return 0;
13252
13253   if (saved_order)
13254     memcpy (saved_order, order, sizeof order);
13255
13256   if (base)
13257     {
13258       *base = base_reg;
13259
13260       for (i = 0; i < nops; i++)
13261         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13262
13263       *load_offset = unsorted_offsets[order[0]];
13264     }
13265
13266   if (TARGET_THUMB1
13267       && !peep2_reg_dead_p (nops, base_reg_rtx))
13268     return 0;
13269
13270   if (unsorted_offsets[order[0]] == 0)
13271     ldm_case = 1; /* ldmia */
13272   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13273     ldm_case = 2; /* ldmib */
13274   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13275     ldm_case = 3; /* ldmda */
13276   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13277     ldm_case = 4; /* ldmdb */
13278   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13279            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13280     ldm_case = 5;
13281   else
13282     return 0;
13283
13284   if (!multiple_operation_profitable_p (false, nops,
13285                                         ldm_case == 5
13286                                         ? unsorted_offsets[order[0]] : 0))
13287     return 0;
13288
13289   return ldm_case;
13290 }
13291
13292 /* Used to determine in a peephole whether a sequence of store instructions can
13293    be changed into a store-multiple instruction.
13294    NOPS is the number of separate store instructions we are examining.
13295    NOPS_TOTAL is the total number of instructions recognized by the peephole
13296    pattern.
13297    The first NOPS entries in OPERANDS are the source registers, the next
13298    NOPS entries are memory operands.  If this function is successful, *BASE is
13299    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13300    to the first memory location's offset from that base register.  REGS is an
13301    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13302    likewise filled with the corresponding rtx's.
13303    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13304    numbers to an ascending order of stores.
13305    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13306    from ascending memory locations, and the function verifies that the register
13307    numbers are themselves ascending.  If CHECK_REGS is false, the register
13308    numbers are stored in the order they are found in the operands.  */
13309 static int
13310 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13311                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13312                          HOST_WIDE_INT *load_offset, bool check_regs)
13313 {
13314   int unsorted_regs[MAX_LDM_STM_OPS];
13315   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13316   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13317   int order[MAX_LDM_STM_OPS];
13318   int base_reg = -1;
13319   rtx base_reg_rtx = NULL;
13320   int i, stm_case;
13321
13322   /* Write back of base register is currently only supported for Thumb 1.  */
13323   int base_writeback = TARGET_THUMB1;
13324
13325   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13326      easily extended if required.  */
13327   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13328
13329   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13330
13331   /* Loop over the operands and check that the memory references are
13332      suitable (i.e. immediate offsets from the same base register).  At
13333      the same time, extract the target register, and the memory
13334      offsets.  */
13335   for (i = 0; i < nops; i++)
13336     {
13337       rtx reg;
13338       rtx offset;
13339
13340       /* Convert a subreg of a mem into the mem itself.  */
13341       if (GET_CODE (operands[nops + i]) == SUBREG)
13342         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13343
13344       gcc_assert (MEM_P (operands[nops + i]));
13345
13346       /* Don't reorder volatile memory references; it doesn't seem worth
13347          looking for the case where the order is ok anyway.  */
13348       if (MEM_VOLATILE_P (operands[nops + i]))
13349         return 0;
13350
13351       offset = const0_rtx;
13352
13353       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13354            || (GET_CODE (reg) == SUBREG
13355                && REG_P (reg = SUBREG_REG (reg))))
13356           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13357               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13358                   || (GET_CODE (reg) == SUBREG
13359                       && REG_P (reg = SUBREG_REG (reg))))
13360               && (CONST_INT_P (offset
13361                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13362         {
13363           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13364                                   ? operands[i] : SUBREG_REG (operands[i]));
13365           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13366
13367           if (i == 0)
13368             {
13369               base_reg = REGNO (reg);
13370               base_reg_rtx = reg;
13371               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13372                 return 0;
13373             }
13374           else if (base_reg != (int) REGNO (reg))
13375             /* Not addressed from the same base register.  */
13376             return 0;
13377
13378           /* If it isn't an integer register, then we can't do this.  */
13379           if (unsorted_regs[i] < 0
13380               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13381               /* The effects are unpredictable if the base register is
13382                  both updated and stored.  */
13383               || (base_writeback && unsorted_regs[i] == base_reg)
13384               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13385               || unsorted_regs[i] > 14)
13386             return 0;
13387
13388           unsorted_offsets[i] = INTVAL (offset);
13389           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13390             order[0] = i;
13391         }
13392       else
13393         /* Not a suitable memory address.  */
13394         return 0;
13395     }
13396
13397   /* All the useful information has now been extracted from the
13398      operands into unsorted_regs and unsorted_offsets; additionally,
13399      order[0] has been set to the lowest offset in the list.  Sort
13400      the offsets into order, verifying that they are adjacent, and
13401      check that the register numbers are ascending.  */
13402   if (!compute_offset_order (nops, unsorted_offsets, order,
13403                              check_regs ? unsorted_regs : NULL))
13404     return 0;
13405
13406   if (saved_order)
13407     memcpy (saved_order, order, sizeof order);
13408
13409   if (base)
13410     {
13411       *base = base_reg;
13412
13413       for (i = 0; i < nops; i++)
13414         {
13415           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13416           if (reg_rtxs)
13417             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13418         }
13419
13420       *load_offset = unsorted_offsets[order[0]];
13421     }
13422
13423   if (TARGET_THUMB1
13424       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13425     return 0;
13426
13427   if (unsorted_offsets[order[0]] == 0)
13428     stm_case = 1; /* stmia */
13429   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13430     stm_case = 2; /* stmib */
13431   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13432     stm_case = 3; /* stmda */
13433   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13434     stm_case = 4; /* stmdb */
13435   else
13436     return 0;
13437
13438   if (!multiple_operation_profitable_p (false, nops, 0))
13439     return 0;
13440
13441   return stm_case;
13442 }
13443 \f
13444 /* Routines for use in generating RTL.  */
13445
13446 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13447    the instruction; REGS and MEMS are arrays containing the operands.
13448    BASEREG is the base register to be used in addressing the memory operands.
13449    WBACK_OFFSET is nonzero if the instruction should update the base
13450    register.  */
13451
13452 static rtx
13453 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13454                          HOST_WIDE_INT wback_offset)
13455 {
13456   int i = 0, j;
13457   rtx result;
13458
13459   if (!multiple_operation_profitable_p (false, count, 0))
13460     {
13461       rtx seq;
13462
13463       start_sequence ();
13464
13465       for (i = 0; i < count; i++)
13466         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13467
13468       if (wback_offset != 0)
13469         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13470
13471       seq = get_insns ();
13472       end_sequence ();
13473
13474       return seq;
13475     }
13476
13477   result = gen_rtx_PARALLEL (VOIDmode,
13478                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13479   if (wback_offset != 0)
13480     {
13481       XVECEXP (result, 0, 0)
13482         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13483       i = 1;
13484       count++;
13485     }
13486
13487   for (j = 0; i < count; i++, j++)
13488     XVECEXP (result, 0, i)
13489       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13490
13491   return result;
13492 }
13493
13494 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13495    the instruction; REGS and MEMS are arrays containing the operands.
13496    BASEREG is the base register to be used in addressing the memory operands.
13497    WBACK_OFFSET is nonzero if the instruction should update the base
13498    register.  */
13499
13500 static rtx
13501 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13502                           HOST_WIDE_INT wback_offset)
13503 {
13504   int i = 0, j;
13505   rtx result;
13506
13507   if (GET_CODE (basereg) == PLUS)
13508     basereg = XEXP (basereg, 0);
13509
13510   if (!multiple_operation_profitable_p (false, count, 0))
13511     {
13512       rtx seq;
13513
13514       start_sequence ();
13515
13516       for (i = 0; i < count; i++)
13517         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13518
13519       if (wback_offset != 0)
13520         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13521
13522       seq = get_insns ();
13523       end_sequence ();
13524
13525       return seq;
13526     }
13527
13528   result = gen_rtx_PARALLEL (VOIDmode,
13529                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13530   if (wback_offset != 0)
13531     {
13532       XVECEXP (result, 0, 0)
13533         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13534       i = 1;
13535       count++;
13536     }
13537
13538   for (j = 0; i < count; i++, j++)
13539     XVECEXP (result, 0, i)
13540       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13541
13542   return result;
13543 }
13544
13545 /* Generate either a load-multiple or a store-multiple instruction.  This
13546    function can be used in situations where we can start with a single MEM
13547    rtx and adjust its address upwards.
13548    COUNT is the number of operations in the instruction, not counting a
13549    possible update of the base register.  REGS is an array containing the
13550    register operands.
13551    BASEREG is the base register to be used in addressing the memory operands,
13552    which are constructed from BASEMEM.
13553    WRITE_BACK specifies whether the generated instruction should include an
13554    update of the base register.
13555    OFFSETP is used to pass an offset to and from this function; this offset
13556    is not used when constructing the address (instead BASEMEM should have an
13557    appropriate offset in its address), it is used only for setting
13558    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13559
13560 static rtx
13561 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13562                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13563 {
13564   rtx mems[MAX_LDM_STM_OPS];
13565   HOST_WIDE_INT offset = *offsetp;
13566   int i;
13567
13568   gcc_assert (count <= MAX_LDM_STM_OPS);
13569
13570   if (GET_CODE (basereg) == PLUS)
13571     basereg = XEXP (basereg, 0);
13572
13573   for (i = 0; i < count; i++)
13574     {
13575       rtx addr = plus_constant (Pmode, basereg, i * 4);
13576       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13577       offset += 4;
13578     }
13579
13580   if (write_back)
13581     *offsetp = offset;
13582
13583   if (is_load)
13584     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13585                                     write_back ? 4 * count : 0);
13586   else
13587     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13588                                      write_back ? 4 * count : 0);
13589 }
13590
13591 rtx
13592 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13593                        rtx basemem, HOST_WIDE_INT *offsetp)
13594 {
13595   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13596                               offsetp);
13597 }
13598
13599 rtx
13600 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13601                         rtx basemem, HOST_WIDE_INT *offsetp)
13602 {
13603   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13604                               offsetp);
13605 }
13606
13607 /* Called from a peephole2 expander to turn a sequence of loads into an
13608    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13609    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13610    is true if we can reorder the registers because they are used commutatively
13611    subsequently.
13612    Returns true iff we could generate a new instruction.  */
13613
13614 bool
13615 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13616 {
13617   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13618   rtx mems[MAX_LDM_STM_OPS];
13619   int i, j, base_reg;
13620   rtx base_reg_rtx;
13621   HOST_WIDE_INT offset;
13622   int write_back = FALSE;
13623   int ldm_case;
13624   rtx addr;
13625
13626   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13627                                      &base_reg, &offset, !sort_regs);
13628
13629   if (ldm_case == 0)
13630     return false;
13631
13632   if (sort_regs)
13633     for (i = 0; i < nops - 1; i++)
13634       for (j = i + 1; j < nops; j++)
13635         if (regs[i] > regs[j])
13636           {
13637             int t = regs[i];
13638             regs[i] = regs[j];
13639             regs[j] = t;
13640           }
13641   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13642
13643   if (TARGET_THUMB1)
13644     {
13645       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13646       gcc_assert (ldm_case == 1 || ldm_case == 5);
13647       write_back = TRUE;
13648     }
13649
13650   if (ldm_case == 5)
13651     {
13652       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13653       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13654       offset = 0;
13655       if (!TARGET_THUMB1)
13656         base_reg_rtx = newbase;
13657     }
13658
13659   for (i = 0; i < nops; i++)
13660     {
13661       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13662       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13663                                               SImode, addr, 0);
13664     }
13665   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13666                                       write_back ? offset + i * 4 : 0));
13667   return true;
13668 }
13669
13670 /* Called from a peephole2 expander to turn a sequence of stores into an
13671    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13672    NOPS indicates how many separate stores we are trying to combine.
13673    Returns true iff we could generate a new instruction.  */
13674
13675 bool
13676 gen_stm_seq (rtx *operands, int nops)
13677 {
13678   int i;
13679   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13680   rtx mems[MAX_LDM_STM_OPS];
13681   int base_reg;
13682   rtx base_reg_rtx;
13683   HOST_WIDE_INT offset;
13684   int write_back = FALSE;
13685   int stm_case;
13686   rtx addr;
13687   bool base_reg_dies;
13688
13689   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13690                                       mem_order, &base_reg, &offset, true);
13691
13692   if (stm_case == 0)
13693     return false;
13694
13695   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13696
13697   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13698   if (TARGET_THUMB1)
13699     {
13700       gcc_assert (base_reg_dies);
13701       write_back = TRUE;
13702     }
13703
13704   if (stm_case == 5)
13705     {
13706       gcc_assert (base_reg_dies);
13707       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13708       offset = 0;
13709     }
13710
13711   addr = plus_constant (Pmode, base_reg_rtx, offset);
13712
13713   for (i = 0; i < nops; i++)
13714     {
13715       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13716       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13717                                               SImode, addr, 0);
13718     }
13719   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13720                                        write_back ? offset + i * 4 : 0));
13721   return true;
13722 }
13723
13724 /* Called from a peephole2 expander to turn a sequence of stores that are
13725    preceded by constant loads into an STM instruction.  OPERANDS are the
13726    operands found by the peephole matcher; NOPS indicates how many
13727    separate stores we are trying to combine; there are 2 * NOPS
13728    instructions in the peephole.
13729    Returns true iff we could generate a new instruction.  */
13730
13731 bool
13732 gen_const_stm_seq (rtx *operands, int nops)
13733 {
13734   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13735   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13736   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13737   rtx mems[MAX_LDM_STM_OPS];
13738   int base_reg;
13739   rtx base_reg_rtx;
13740   HOST_WIDE_INT offset;
13741   int write_back = FALSE;
13742   int stm_case;
13743   rtx addr;
13744   bool base_reg_dies;
13745   int i, j;
13746   HARD_REG_SET allocated;
13747
13748   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13749                                       mem_order, &base_reg, &offset, false);
13750
13751   if (stm_case == 0)
13752     return false;
13753
13754   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13755
13756   /* If the same register is used more than once, try to find a free
13757      register.  */
13758   CLEAR_HARD_REG_SET (allocated);
13759   for (i = 0; i < nops; i++)
13760     {
13761       for (j = i + 1; j < nops; j++)
13762         if (regs[i] == regs[j])
13763           {
13764             rtx t = peep2_find_free_register (0, nops * 2,
13765                                               TARGET_THUMB1 ? "l" : "r",
13766                                               SImode, &allocated);
13767             if (t == NULL_RTX)
13768               return false;
13769             reg_rtxs[i] = t;
13770             regs[i] = REGNO (t);
13771           }
13772     }
13773
13774   /* Compute an ordering that maps the register numbers to an ascending
13775      sequence.  */
13776   reg_order[0] = 0;
13777   for (i = 0; i < nops; i++)
13778     if (regs[i] < regs[reg_order[0]])
13779       reg_order[0] = i;
13780
13781   for (i = 1; i < nops; i++)
13782     {
13783       int this_order = reg_order[i - 1];
13784       for (j = 0; j < nops; j++)
13785         if (regs[j] > regs[reg_order[i - 1]]
13786             && (this_order == reg_order[i - 1]
13787                 || regs[j] < regs[this_order]))
13788           this_order = j;
13789       reg_order[i] = this_order;
13790     }
13791
13792   /* Ensure that registers that must be live after the instruction end
13793      up with the correct value.  */
13794   for (i = 0; i < nops; i++)
13795     {
13796       int this_order = reg_order[i];
13797       if ((this_order != mem_order[i]
13798            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13799           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13800         return false;
13801     }
13802
13803   /* Load the constants.  */
13804   for (i = 0; i < nops; i++)
13805     {
13806       rtx op = operands[2 * nops + mem_order[i]];
13807       sorted_regs[i] = regs[reg_order[i]];
13808       emit_move_insn (reg_rtxs[reg_order[i]], op);
13809     }
13810
13811   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13812
13813   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13814   if (TARGET_THUMB1)
13815     {
13816       gcc_assert (base_reg_dies);
13817       write_back = TRUE;
13818     }
13819
13820   if (stm_case == 5)
13821     {
13822       gcc_assert (base_reg_dies);
13823       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13824       offset = 0;
13825     }
13826
13827   addr = plus_constant (Pmode, base_reg_rtx, offset);
13828
13829   for (i = 0; i < nops; i++)
13830     {
13831       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13832       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13833                                               SImode, addr, 0);
13834     }
13835   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13836                                        write_back ? offset + i * 4 : 0));
13837   return true;
13838 }
13839
13840 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13841    unaligned copies on processors which support unaligned semantics for those
13842    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13843    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13844    An interleave factor of 1 (the minimum) will perform no interleaving.
13845    Load/store multiple are used for aligned addresses where possible.  */
13846
13847 static void
13848 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13849                                    HOST_WIDE_INT length,
13850                                    unsigned int interleave_factor)
13851 {
13852   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13853   int *regnos = XALLOCAVEC (int, interleave_factor);
13854   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13855   HOST_WIDE_INT i, j;
13856   HOST_WIDE_INT remaining = length, words;
13857   rtx halfword_tmp = NULL, byte_tmp = NULL;
13858   rtx dst, src;
13859   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13860   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13861   HOST_WIDE_INT srcoffset, dstoffset;
13862   HOST_WIDE_INT src_autoinc, dst_autoinc;
13863   rtx mem, addr;
13864
13865   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13866
13867   /* Use hard registers if we have aligned source or destination so we can use
13868      load/store multiple with contiguous registers.  */
13869   if (dst_aligned || src_aligned)
13870     for (i = 0; i < interleave_factor; i++)
13871       regs[i] = gen_rtx_REG (SImode, i);
13872   else
13873     for (i = 0; i < interleave_factor; i++)
13874       regs[i] = gen_reg_rtx (SImode);
13875
13876   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13877   src = copy_addr_to_reg (XEXP (srcbase, 0));
13878
13879   srcoffset = dstoffset = 0;
13880
13881   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13882      For copying the last bytes we want to subtract this offset again.  */
13883   src_autoinc = dst_autoinc = 0;
13884
13885   for (i = 0; i < interleave_factor; i++)
13886     regnos[i] = i;
13887
13888   /* Copy BLOCK_SIZE_BYTES chunks.  */
13889
13890   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13891     {
13892       /* Load words.  */
13893       if (src_aligned && interleave_factor > 1)
13894         {
13895           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13896                                             TRUE, srcbase, &srcoffset));
13897           src_autoinc += UNITS_PER_WORD * interleave_factor;
13898         }
13899       else
13900         {
13901           for (j = 0; j < interleave_factor; j++)
13902             {
13903               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13904                                                  - src_autoinc));
13905               mem = adjust_automodify_address (srcbase, SImode, addr,
13906                                                srcoffset + j * UNITS_PER_WORD);
13907               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13908             }
13909           srcoffset += block_size_bytes;
13910         }
13911
13912       /* Store words.  */
13913       if (dst_aligned && interleave_factor > 1)
13914         {
13915           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13916                                              TRUE, dstbase, &dstoffset));
13917           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13918         }
13919       else
13920         {
13921           for (j = 0; j < interleave_factor; j++)
13922             {
13923               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13924                                                  - dst_autoinc));
13925               mem = adjust_automodify_address (dstbase, SImode, addr,
13926                                                dstoffset + j * UNITS_PER_WORD);
13927               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13928             }
13929           dstoffset += block_size_bytes;
13930         }
13931
13932       remaining -= block_size_bytes;
13933     }
13934
13935   /* Copy any whole words left (note these aren't interleaved with any
13936      subsequent halfword/byte load/stores in the interests of simplicity).  */
13937
13938   words = remaining / UNITS_PER_WORD;
13939
13940   gcc_assert (words < interleave_factor);
13941
13942   if (src_aligned && words > 1)
13943     {
13944       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13945                                         &srcoffset));
13946       src_autoinc += UNITS_PER_WORD * words;
13947     }
13948   else
13949     {
13950       for (j = 0; j < words; j++)
13951         {
13952           addr = plus_constant (Pmode, src,
13953                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13954           mem = adjust_automodify_address (srcbase, SImode, addr,
13955                                            srcoffset + j * UNITS_PER_WORD);
13956           if (src_aligned)
13957             emit_move_insn (regs[j], mem);
13958           else
13959             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13960         }
13961       srcoffset += words * UNITS_PER_WORD;
13962     }
13963
13964   if (dst_aligned && words > 1)
13965     {
13966       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13967                                          &dstoffset));
13968       dst_autoinc += words * UNITS_PER_WORD;
13969     }
13970   else
13971     {
13972       for (j = 0; j < words; j++)
13973         {
13974           addr = plus_constant (Pmode, dst,
13975                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13976           mem = adjust_automodify_address (dstbase, SImode, addr,
13977                                            dstoffset + j * UNITS_PER_WORD);
13978           if (dst_aligned)
13979             emit_move_insn (mem, regs[j]);
13980           else
13981             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13982         }
13983       dstoffset += words * UNITS_PER_WORD;
13984     }
13985
13986   remaining -= words * UNITS_PER_WORD;
13987
13988   gcc_assert (remaining < 4);
13989
13990   /* Copy a halfword if necessary.  */
13991
13992   if (remaining >= 2)
13993     {
13994       halfword_tmp = gen_reg_rtx (SImode);
13995
13996       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13997       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13998       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13999
14000       /* Either write out immediately, or delay until we've loaded the last
14001          byte, depending on interleave factor.  */
14002       if (interleave_factor == 1)
14003         {
14004           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14005           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14006           emit_insn (gen_unaligned_storehi (mem,
14007                        gen_lowpart (HImode, halfword_tmp)));
14008           halfword_tmp = NULL;
14009           dstoffset += 2;
14010         }
14011
14012       remaining -= 2;
14013       srcoffset += 2;
14014     }
14015
14016   gcc_assert (remaining < 2);
14017
14018   /* Copy last byte.  */
14019
14020   if ((remaining & 1) != 0)
14021     {
14022       byte_tmp = gen_reg_rtx (SImode);
14023
14024       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14025       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14026       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14027
14028       if (interleave_factor == 1)
14029         {
14030           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14031           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14032           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14033           byte_tmp = NULL;
14034           dstoffset++;
14035         }
14036
14037       remaining--;
14038       srcoffset++;
14039     }
14040
14041   /* Store last halfword if we haven't done so already.  */
14042
14043   if (halfword_tmp)
14044     {
14045       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14046       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14047       emit_insn (gen_unaligned_storehi (mem,
14048                    gen_lowpart (HImode, halfword_tmp)));
14049       dstoffset += 2;
14050     }
14051
14052   /* Likewise for last byte.  */
14053
14054   if (byte_tmp)
14055     {
14056       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14057       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14058       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14059       dstoffset++;
14060     }
14061
14062   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14063 }
14064
14065 /* From mips_adjust_block_mem:
14066
14067    Helper function for doing a loop-based block operation on memory
14068    reference MEM.  Each iteration of the loop will operate on LENGTH
14069    bytes of MEM.
14070
14071    Create a new base register for use within the loop and point it to
14072    the start of MEM.  Create a new memory reference that uses this
14073    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14074
14075 static void
14076 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14077                       rtx *loop_mem)
14078 {
14079   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14080
14081   /* Although the new mem does not refer to a known location,
14082      it does keep up to LENGTH bytes of alignment.  */
14083   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14084   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14085 }
14086
14087 /* From mips_block_move_loop:
14088
14089    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14090    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14091    the memory regions do not overlap.  */
14092
14093 static void
14094 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14095                                unsigned int interleave_factor,
14096                                HOST_WIDE_INT bytes_per_iter)
14097 {
14098   rtx src_reg, dest_reg, final_src, test;
14099   HOST_WIDE_INT leftover;
14100
14101   leftover = length % bytes_per_iter;
14102   length -= leftover;
14103
14104   /* Create registers and memory references for use within the loop.  */
14105   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14106   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14107
14108   /* Calculate the value that SRC_REG should have after the last iteration of
14109      the loop.  */
14110   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14111                                    0, 0, OPTAB_WIDEN);
14112
14113   /* Emit the start of the loop.  */
14114   rtx_code_label *label = gen_label_rtx ();
14115   emit_label (label);
14116
14117   /* Emit the loop body.  */
14118   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14119                                      interleave_factor);
14120
14121   /* Move on to the next block.  */
14122   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14123   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14124
14125   /* Emit the loop condition.  */
14126   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14127   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14128
14129   /* Mop up any left-over bytes.  */
14130   if (leftover)
14131     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14132 }
14133
14134 /* Emit a block move when either the source or destination is unaligned (not
14135    aligned to a four-byte boundary).  This may need further tuning depending on
14136    core type, optimize_size setting, etc.  */
14137
14138 static int
14139 arm_movmemqi_unaligned (rtx *operands)
14140 {
14141   HOST_WIDE_INT length = INTVAL (operands[2]);
14142
14143   if (optimize_size)
14144     {
14145       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14146       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14147       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14148          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14149          or dst_aligned though: allow more interleaving in those cases since the
14150          resulting code can be smaller.  */
14151       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14152       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14153
14154       if (length > 12)
14155         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14156                                        interleave_factor, bytes_per_iter);
14157       else
14158         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14159                                            interleave_factor);
14160     }
14161   else
14162     {
14163       /* Note that the loop created by arm_block_move_unaligned_loop may be
14164          subject to loop unrolling, which makes tuning this condition a little
14165          redundant.  */
14166       if (length > 32)
14167         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14168       else
14169         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14170     }
14171
14172   return 1;
14173 }
14174
14175 int
14176 arm_gen_movmemqi (rtx *operands)
14177 {
14178   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14179   HOST_WIDE_INT srcoffset, dstoffset;
14180   rtx src, dst, srcbase, dstbase;
14181   rtx part_bytes_reg = NULL;
14182   rtx mem;
14183
14184   if (!CONST_INT_P (operands[2])
14185       || !CONST_INT_P (operands[3])
14186       || INTVAL (operands[2]) > 64)
14187     return 0;
14188
14189   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14190     return arm_movmemqi_unaligned (operands);
14191
14192   if (INTVAL (operands[3]) & 3)
14193     return 0;
14194
14195   dstbase = operands[0];
14196   srcbase = operands[1];
14197
14198   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14199   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14200
14201   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14202   out_words_to_go = INTVAL (operands[2]) / 4;
14203   last_bytes = INTVAL (operands[2]) & 3;
14204   dstoffset = srcoffset = 0;
14205
14206   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14207     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14208
14209   while (in_words_to_go >= 2)
14210     {
14211       if (in_words_to_go > 4)
14212         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14213                                           TRUE, srcbase, &srcoffset));
14214       else
14215         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14216                                           src, FALSE, srcbase,
14217                                           &srcoffset));
14218
14219       if (out_words_to_go)
14220         {
14221           if (out_words_to_go > 4)
14222             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14223                                                TRUE, dstbase, &dstoffset));
14224           else if (out_words_to_go != 1)
14225             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14226                                                out_words_to_go, dst,
14227                                                (last_bytes == 0
14228                                                 ? FALSE : TRUE),
14229                                                dstbase, &dstoffset));
14230           else
14231             {
14232               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14233               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14234               if (last_bytes != 0)
14235                 {
14236                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14237                   dstoffset += 4;
14238                 }
14239             }
14240         }
14241
14242       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14243       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14244     }
14245
14246   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14247   if (out_words_to_go)
14248     {
14249       rtx sreg;
14250
14251       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14252       sreg = copy_to_reg (mem);
14253
14254       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14255       emit_move_insn (mem, sreg);
14256       in_words_to_go--;
14257
14258       gcc_assert (!in_words_to_go);     /* Sanity check */
14259     }
14260
14261   if (in_words_to_go)
14262     {
14263       gcc_assert (in_words_to_go > 0);
14264
14265       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14266       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14267     }
14268
14269   gcc_assert (!last_bytes || part_bytes_reg);
14270
14271   if (BYTES_BIG_ENDIAN && last_bytes)
14272     {
14273       rtx tmp = gen_reg_rtx (SImode);
14274
14275       /* The bytes we want are in the top end of the word.  */
14276       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14277                               GEN_INT (8 * (4 - last_bytes))));
14278       part_bytes_reg = tmp;
14279
14280       while (last_bytes)
14281         {
14282           mem = adjust_automodify_address (dstbase, QImode,
14283                                            plus_constant (Pmode, dst,
14284                                                           last_bytes - 1),
14285                                            dstoffset + last_bytes - 1);
14286           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14287
14288           if (--last_bytes)
14289             {
14290               tmp = gen_reg_rtx (SImode);
14291               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14292               part_bytes_reg = tmp;
14293             }
14294         }
14295
14296     }
14297   else
14298     {
14299       if (last_bytes > 1)
14300         {
14301           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14302           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14303           last_bytes -= 2;
14304           if (last_bytes)
14305             {
14306               rtx tmp = gen_reg_rtx (SImode);
14307               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14308               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14309               part_bytes_reg = tmp;
14310               dstoffset += 2;
14311             }
14312         }
14313
14314       if (last_bytes)
14315         {
14316           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14317           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14318         }
14319     }
14320
14321   return 1;
14322 }
14323
14324 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14325 by mode size.  */
14326 inline static rtx
14327 next_consecutive_mem (rtx mem)
14328 {
14329   machine_mode mode = GET_MODE (mem);
14330   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14331   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14332
14333   return adjust_automodify_address (mem, mode, addr, offset);
14334 }
14335
14336 /* Copy using LDRD/STRD instructions whenever possible.
14337    Returns true upon success. */
14338 bool
14339 gen_movmem_ldrd_strd (rtx *operands)
14340 {
14341   unsigned HOST_WIDE_INT len;
14342   HOST_WIDE_INT align;
14343   rtx src, dst, base;
14344   rtx reg0;
14345   bool src_aligned, dst_aligned;
14346   bool src_volatile, dst_volatile;
14347
14348   gcc_assert (CONST_INT_P (operands[2]));
14349   gcc_assert (CONST_INT_P (operands[3]));
14350
14351   len = UINTVAL (operands[2]);
14352   if (len > 64)
14353     return false;
14354
14355   /* Maximum alignment we can assume for both src and dst buffers.  */
14356   align = INTVAL (operands[3]);
14357
14358   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14359     return false;
14360
14361   /* Place src and dst addresses in registers
14362      and update the corresponding mem rtx.  */
14363   dst = operands[0];
14364   dst_volatile = MEM_VOLATILE_P (dst);
14365   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14366   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14367   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14368
14369   src = operands[1];
14370   src_volatile = MEM_VOLATILE_P (src);
14371   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14372   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14373   src = adjust_automodify_address (src, VOIDmode, base, 0);
14374
14375   if (!unaligned_access && !(src_aligned && dst_aligned))
14376     return false;
14377
14378   if (src_volatile || dst_volatile)
14379     return false;
14380
14381   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14382   if (!(dst_aligned || src_aligned))
14383     return arm_gen_movmemqi (operands);
14384
14385   /* If the either src or dst is unaligned we'll be accessing it as pairs
14386      of unaligned SImode accesses.  Otherwise we can generate DImode
14387      ldrd/strd instructions.  */
14388   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14389   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14390
14391   while (len >= 8)
14392     {
14393       len -= 8;
14394       reg0 = gen_reg_rtx (DImode);
14395       rtx low_reg = NULL_RTX;
14396       rtx hi_reg = NULL_RTX;
14397
14398       if (!src_aligned || !dst_aligned)
14399         {
14400           low_reg = gen_lowpart (SImode, reg0);
14401           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14402         }
14403       if (src_aligned)
14404         emit_move_insn (reg0, src);
14405       else
14406         {
14407           emit_insn (gen_unaligned_loadsi (low_reg, src));
14408           src = next_consecutive_mem (src);
14409           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14410         }
14411
14412       if (dst_aligned)
14413         emit_move_insn (dst, reg0);
14414       else
14415         {
14416           emit_insn (gen_unaligned_storesi (dst, low_reg));
14417           dst = next_consecutive_mem (dst);
14418           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14419         }
14420
14421       src = next_consecutive_mem (src);
14422       dst = next_consecutive_mem (dst);
14423     }
14424
14425   gcc_assert (len < 8);
14426   if (len >= 4)
14427     {
14428       /* More than a word but less than a double-word to copy.  Copy a word.  */
14429       reg0 = gen_reg_rtx (SImode);
14430       src = adjust_address (src, SImode, 0);
14431       dst = adjust_address (dst, SImode, 0);
14432       if (src_aligned)
14433         emit_move_insn (reg0, src);
14434       else
14435         emit_insn (gen_unaligned_loadsi (reg0, src));
14436
14437       if (dst_aligned)
14438         emit_move_insn (dst, reg0);
14439       else
14440         emit_insn (gen_unaligned_storesi (dst, reg0));
14441
14442       src = next_consecutive_mem (src);
14443       dst = next_consecutive_mem (dst);
14444       len -= 4;
14445     }
14446
14447   if (len == 0)
14448     return true;
14449
14450   /* Copy the remaining bytes.  */
14451   if (len >= 2)
14452     {
14453       dst = adjust_address (dst, HImode, 0);
14454       src = adjust_address (src, HImode, 0);
14455       reg0 = gen_reg_rtx (SImode);
14456       if (src_aligned)
14457         emit_insn (gen_zero_extendhisi2 (reg0, src));
14458       else
14459         emit_insn (gen_unaligned_loadhiu (reg0, src));
14460
14461       if (dst_aligned)
14462         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14463       else
14464         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14465
14466       src = next_consecutive_mem (src);
14467       dst = next_consecutive_mem (dst);
14468       if (len == 2)
14469         return true;
14470     }
14471
14472   dst = adjust_address (dst, QImode, 0);
14473   src = adjust_address (src, QImode, 0);
14474   reg0 = gen_reg_rtx (QImode);
14475   emit_move_insn (reg0, src);
14476   emit_move_insn (dst, reg0);
14477   return true;
14478 }
14479
14480 /* Select a dominance comparison mode if possible for a test of the general
14481    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14482    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14483    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14484    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14485    In all cases OP will be either EQ or NE, but we don't need to know which
14486    here.  If we are unable to support a dominance comparison we return
14487    CC mode.  This will then fail to match for the RTL expressions that
14488    generate this call.  */
14489 machine_mode
14490 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14491 {
14492   enum rtx_code cond1, cond2;
14493   int swapped = 0;
14494
14495   /* Currently we will probably get the wrong result if the individual
14496      comparisons are not simple.  This also ensures that it is safe to
14497      reverse a comparison if necessary.  */
14498   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14499        != CCmode)
14500       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14501           != CCmode))
14502     return CCmode;
14503
14504   /* The if_then_else variant of this tests the second condition if the
14505      first passes, but is true if the first fails.  Reverse the first
14506      condition to get a true "inclusive-or" expression.  */
14507   if (cond_or == DOM_CC_NX_OR_Y)
14508     cond1 = reverse_condition (cond1);
14509
14510   /* If the comparisons are not equal, and one doesn't dominate the other,
14511      then we can't do this.  */
14512   if (cond1 != cond2
14513       && !comparison_dominates_p (cond1, cond2)
14514       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14515     return CCmode;
14516
14517   if (swapped)
14518     std::swap (cond1, cond2);
14519
14520   switch (cond1)
14521     {
14522     case EQ:
14523       if (cond_or == DOM_CC_X_AND_Y)
14524         return CC_DEQmode;
14525
14526       switch (cond2)
14527         {
14528         case EQ: return CC_DEQmode;
14529         case LE: return CC_DLEmode;
14530         case LEU: return CC_DLEUmode;
14531         case GE: return CC_DGEmode;
14532         case GEU: return CC_DGEUmode;
14533         default: gcc_unreachable ();
14534         }
14535
14536     case LT:
14537       if (cond_or == DOM_CC_X_AND_Y)
14538         return CC_DLTmode;
14539
14540       switch (cond2)
14541         {
14542         case  LT:
14543             return CC_DLTmode;
14544         case LE:
14545           return CC_DLEmode;
14546         case NE:
14547           return CC_DNEmode;
14548         default:
14549           gcc_unreachable ();
14550         }
14551
14552     case GT:
14553       if (cond_or == DOM_CC_X_AND_Y)
14554         return CC_DGTmode;
14555
14556       switch (cond2)
14557         {
14558         case GT:
14559           return CC_DGTmode;
14560         case GE:
14561           return CC_DGEmode;
14562         case NE:
14563           return CC_DNEmode;
14564         default:
14565           gcc_unreachable ();
14566         }
14567
14568     case LTU:
14569       if (cond_or == DOM_CC_X_AND_Y)
14570         return CC_DLTUmode;
14571
14572       switch (cond2)
14573         {
14574         case LTU:
14575           return CC_DLTUmode;
14576         case LEU:
14577           return CC_DLEUmode;
14578         case NE:
14579           return CC_DNEmode;
14580         default:
14581           gcc_unreachable ();
14582         }
14583
14584     case GTU:
14585       if (cond_or == DOM_CC_X_AND_Y)
14586         return CC_DGTUmode;
14587
14588       switch (cond2)
14589         {
14590         case GTU:
14591           return CC_DGTUmode;
14592         case GEU:
14593           return CC_DGEUmode;
14594         case NE:
14595           return CC_DNEmode;
14596         default:
14597           gcc_unreachable ();
14598         }
14599
14600     /* The remaining cases only occur when both comparisons are the
14601        same.  */
14602     case NE:
14603       gcc_assert (cond1 == cond2);
14604       return CC_DNEmode;
14605
14606     case LE:
14607       gcc_assert (cond1 == cond2);
14608       return CC_DLEmode;
14609
14610     case GE:
14611       gcc_assert (cond1 == cond2);
14612       return CC_DGEmode;
14613
14614     case LEU:
14615       gcc_assert (cond1 == cond2);
14616       return CC_DLEUmode;
14617
14618     case GEU:
14619       gcc_assert (cond1 == cond2);
14620       return CC_DGEUmode;
14621
14622     default:
14623       gcc_unreachable ();
14624     }
14625 }
14626
14627 machine_mode
14628 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14629 {
14630   /* All floating point compares return CCFP if it is an equality
14631      comparison, and CCFPE otherwise.  */
14632   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14633     {
14634       switch (op)
14635         {
14636         case EQ:
14637         case NE:
14638         case UNORDERED:
14639         case ORDERED:
14640         case UNLT:
14641         case UNLE:
14642         case UNGT:
14643         case UNGE:
14644         case UNEQ:
14645         case LTGT:
14646           return CCFPmode;
14647
14648         case LT:
14649         case LE:
14650         case GT:
14651         case GE:
14652           return CCFPEmode;
14653
14654         default:
14655           gcc_unreachable ();
14656         }
14657     }
14658
14659   /* A compare with a shifted operand.  Because of canonicalization, the
14660      comparison will have to be swapped when we emit the assembler.  */
14661   if (GET_MODE (y) == SImode
14662       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14663       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14664           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14665           || GET_CODE (x) == ROTATERT))
14666     return CC_SWPmode;
14667
14668   /* This operation is performed swapped, but since we only rely on the Z
14669      flag we don't need an additional mode.  */
14670   if (GET_MODE (y) == SImode
14671       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14672       && GET_CODE (x) == NEG
14673       && (op == EQ || op == NE))
14674     return CC_Zmode;
14675
14676   /* This is a special case that is used by combine to allow a
14677      comparison of a shifted byte load to be split into a zero-extend
14678      followed by a comparison of the shifted integer (only valid for
14679      equalities and unsigned inequalities).  */
14680   if (GET_MODE (x) == SImode
14681       && GET_CODE (x) == ASHIFT
14682       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14683       && GET_CODE (XEXP (x, 0)) == SUBREG
14684       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14685       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14686       && (op == EQ || op == NE
14687           || op == GEU || op == GTU || op == LTU || op == LEU)
14688       && CONST_INT_P (y))
14689     return CC_Zmode;
14690
14691   /* A construct for a conditional compare, if the false arm contains
14692      0, then both conditions must be true, otherwise either condition
14693      must be true.  Not all conditions are possible, so CCmode is
14694      returned if it can't be done.  */
14695   if (GET_CODE (x) == IF_THEN_ELSE
14696       && (XEXP (x, 2) == const0_rtx
14697           || XEXP (x, 2) == const1_rtx)
14698       && COMPARISON_P (XEXP (x, 0))
14699       && COMPARISON_P (XEXP (x, 1)))
14700     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14701                                          INTVAL (XEXP (x, 2)));
14702
14703   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14704   if (GET_CODE (x) == AND
14705       && (op == EQ || op == NE)
14706       && COMPARISON_P (XEXP (x, 0))
14707       && COMPARISON_P (XEXP (x, 1)))
14708     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14709                                          DOM_CC_X_AND_Y);
14710
14711   if (GET_CODE (x) == IOR
14712       && (op == EQ || op == NE)
14713       && COMPARISON_P (XEXP (x, 0))
14714       && COMPARISON_P (XEXP (x, 1)))
14715     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14716                                          DOM_CC_X_OR_Y);
14717
14718   /* An operation (on Thumb) where we want to test for a single bit.
14719      This is done by shifting that bit up into the top bit of a
14720      scratch register; we can then branch on the sign bit.  */
14721   if (TARGET_THUMB1
14722       && GET_MODE (x) == SImode
14723       && (op == EQ || op == NE)
14724       && GET_CODE (x) == ZERO_EXTRACT
14725       && XEXP (x, 1) == const1_rtx)
14726     return CC_Nmode;
14727
14728   /* An operation that sets the condition codes as a side-effect, the
14729      V flag is not set correctly, so we can only use comparisons where
14730      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14731      instead.)  */
14732   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14733   if (GET_MODE (x) == SImode
14734       && y == const0_rtx
14735       && (op == EQ || op == NE || op == LT || op == GE)
14736       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14737           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14738           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14739           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14740           || GET_CODE (x) == LSHIFTRT
14741           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14742           || GET_CODE (x) == ROTATERT
14743           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14744     return CC_NOOVmode;
14745
14746   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14747     return CC_Zmode;
14748
14749   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14750       && GET_CODE (x) == PLUS
14751       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14752     return CC_Cmode;
14753
14754   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14755     {
14756       switch (op)
14757         {
14758         case EQ:
14759         case NE:
14760           /* A DImode comparison against zero can be implemented by
14761              or'ing the two halves together.  */
14762           if (y == const0_rtx)
14763             return CC_Zmode;
14764
14765           /* We can do an equality test in three Thumb instructions.  */
14766           if (!TARGET_32BIT)
14767             return CC_Zmode;
14768
14769           /* FALLTHROUGH */
14770
14771         case LTU:
14772         case LEU:
14773         case GTU:
14774         case GEU:
14775           /* DImode unsigned comparisons can be implemented by cmp +
14776              cmpeq without a scratch register.  Not worth doing in
14777              Thumb-2.  */
14778           if (TARGET_32BIT)
14779             return CC_CZmode;
14780
14781           /* FALLTHROUGH */
14782
14783         case LT:
14784         case LE:
14785         case GT:
14786         case GE:
14787           /* DImode signed and unsigned comparisons can be implemented
14788              by cmp + sbcs with a scratch register, but that does not
14789              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14790           gcc_assert (op != EQ && op != NE);
14791           return CC_NCVmode;
14792
14793         default:
14794           gcc_unreachable ();
14795         }
14796     }
14797
14798   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14799     return GET_MODE (x);
14800
14801   return CCmode;
14802 }
14803
14804 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14805    return the rtx for register 0 in the proper mode.  FP means this is a
14806    floating point compare: I don't think that it is needed on the arm.  */
14807 rtx
14808 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14809 {
14810   machine_mode mode;
14811   rtx cc_reg;
14812   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14813
14814   /* We might have X as a constant, Y as a register because of the predicates
14815      used for cmpdi.  If so, force X to a register here.  */
14816   if (dimode_comparison && !REG_P (x))
14817     x = force_reg (DImode, x);
14818
14819   mode = SELECT_CC_MODE (code, x, y);
14820   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14821
14822   if (dimode_comparison
14823       && mode != CC_CZmode)
14824     {
14825       rtx clobber, set;
14826
14827       /* To compare two non-zero values for equality, XOR them and
14828          then compare against zero.  Not used for ARM mode; there
14829          CC_CZmode is cheaper.  */
14830       if (mode == CC_Zmode && y != const0_rtx)
14831         {
14832           gcc_assert (!reload_completed);
14833           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14834           y = const0_rtx;
14835         }
14836
14837       /* A scratch register is required.  */
14838       if (reload_completed)
14839         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14840       else
14841         scratch = gen_rtx_SCRATCH (SImode);
14842
14843       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14844       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14845       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14846     }
14847   else
14848     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14849
14850   return cc_reg;
14851 }
14852
14853 /* Generate a sequence of insns that will generate the correct return
14854    address mask depending on the physical architecture that the program
14855    is running on.  */
14856 rtx
14857 arm_gen_return_addr_mask (void)
14858 {
14859   rtx reg = gen_reg_rtx (Pmode);
14860
14861   emit_insn (gen_return_addr_mask (reg));
14862   return reg;
14863 }
14864
14865 void
14866 arm_reload_in_hi (rtx *operands)
14867 {
14868   rtx ref = operands[1];
14869   rtx base, scratch;
14870   HOST_WIDE_INT offset = 0;
14871
14872   if (GET_CODE (ref) == SUBREG)
14873     {
14874       offset = SUBREG_BYTE (ref);
14875       ref = SUBREG_REG (ref);
14876     }
14877
14878   if (REG_P (ref))
14879     {
14880       /* We have a pseudo which has been spilt onto the stack; there
14881          are two cases here: the first where there is a simple
14882          stack-slot replacement and a second where the stack-slot is
14883          out of range, or is used as a subreg.  */
14884       if (reg_equiv_mem (REGNO (ref)))
14885         {
14886           ref = reg_equiv_mem (REGNO (ref));
14887           base = find_replacement (&XEXP (ref, 0));
14888         }
14889       else
14890         /* The slot is out of range, or was dressed up in a SUBREG.  */
14891         base = reg_equiv_address (REGNO (ref));
14892
14893       /* PR 62554: If there is no equivalent memory location then just move
14894          the value as an SImode register move.  This happens when the target
14895          architecture variant does not have an HImode register move.  */
14896       if (base == NULL)
14897         {
14898           gcc_assert (REG_P (operands[0]));
14899           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14900                                 gen_rtx_SUBREG (SImode, ref, 0)));
14901           return;
14902         }
14903     }
14904   else
14905     base = find_replacement (&XEXP (ref, 0));
14906
14907   /* Handle the case where the address is too complex to be offset by 1.  */
14908   if (GET_CODE (base) == MINUS
14909       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14910     {
14911       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14912
14913       emit_set_insn (base_plus, base);
14914       base = base_plus;
14915     }
14916   else if (GET_CODE (base) == PLUS)
14917     {
14918       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14919       HOST_WIDE_INT hi, lo;
14920
14921       offset += INTVAL (XEXP (base, 1));
14922       base = XEXP (base, 0);
14923
14924       /* Rework the address into a legal sequence of insns.  */
14925       /* Valid range for lo is -4095 -> 4095 */
14926       lo = (offset >= 0
14927             ? (offset & 0xfff)
14928             : -((-offset) & 0xfff));
14929
14930       /* Corner case, if lo is the max offset then we would be out of range
14931          once we have added the additional 1 below, so bump the msb into the
14932          pre-loading insn(s).  */
14933       if (lo == 4095)
14934         lo &= 0x7ff;
14935
14936       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14937              ^ (HOST_WIDE_INT) 0x80000000)
14938             - (HOST_WIDE_INT) 0x80000000);
14939
14940       gcc_assert (hi + lo == offset);
14941
14942       if (hi != 0)
14943         {
14944           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14945
14946           /* Get the base address; addsi3 knows how to handle constants
14947              that require more than one insn.  */
14948           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14949           base = base_plus;
14950           offset = lo;
14951         }
14952     }
14953
14954   /* Operands[2] may overlap operands[0] (though it won't overlap
14955      operands[1]), that's why we asked for a DImode reg -- so we can
14956      use the bit that does not overlap.  */
14957   if (REGNO (operands[2]) == REGNO (operands[0]))
14958     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14959   else
14960     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14961
14962   emit_insn (gen_zero_extendqisi2 (scratch,
14963                                    gen_rtx_MEM (QImode,
14964                                                 plus_constant (Pmode, base,
14965                                                                offset))));
14966   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14967                                    gen_rtx_MEM (QImode,
14968                                                 plus_constant (Pmode, base,
14969                                                                offset + 1))));
14970   if (!BYTES_BIG_ENDIAN)
14971     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14972                    gen_rtx_IOR (SImode,
14973                                 gen_rtx_ASHIFT
14974                                 (SImode,
14975                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14976                                  GEN_INT (8)),
14977                                 scratch));
14978   else
14979     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14980                    gen_rtx_IOR (SImode,
14981                                 gen_rtx_ASHIFT (SImode, scratch,
14982                                                 GEN_INT (8)),
14983                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14984 }
14985
14986 /* Handle storing a half-word to memory during reload by synthesizing as two
14987    byte stores.  Take care not to clobber the input values until after we
14988    have moved them somewhere safe.  This code assumes that if the DImode
14989    scratch in operands[2] overlaps either the input value or output address
14990    in some way, then that value must die in this insn (we absolutely need
14991    two scratch registers for some corner cases).  */
14992 void
14993 arm_reload_out_hi (rtx *operands)
14994 {
14995   rtx ref = operands[0];
14996   rtx outval = operands[1];
14997   rtx base, scratch;
14998   HOST_WIDE_INT offset = 0;
14999
15000   if (GET_CODE (ref) == SUBREG)
15001     {
15002       offset = SUBREG_BYTE (ref);
15003       ref = SUBREG_REG (ref);
15004     }
15005
15006   if (REG_P (ref))
15007     {
15008       /* We have a pseudo which has been spilt onto the stack; there
15009          are two cases here: the first where there is a simple
15010          stack-slot replacement and a second where the stack-slot is
15011          out of range, or is used as a subreg.  */
15012       if (reg_equiv_mem (REGNO (ref)))
15013         {
15014           ref = reg_equiv_mem (REGNO (ref));
15015           base = find_replacement (&XEXP (ref, 0));
15016         }
15017       else
15018         /* The slot is out of range, or was dressed up in a SUBREG.  */
15019         base = reg_equiv_address (REGNO (ref));
15020
15021       /* PR 62254: If there is no equivalent memory location then just move
15022          the value as an SImode register move.  This happens when the target
15023          architecture variant does not have an HImode register move.  */
15024       if (base == NULL)
15025         {
15026           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15027
15028           if (REG_P (outval))
15029             {
15030               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15031                                     gen_rtx_SUBREG (SImode, outval, 0)));
15032             }
15033           else /* SUBREG_P (outval)  */
15034             {
15035               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15036                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15037                                       SUBREG_REG (outval)));
15038               else
15039                 /* FIXME: Handle other cases ?  */
15040                 gcc_unreachable ();
15041             }
15042           return;
15043         }
15044     }
15045   else
15046     base = find_replacement (&XEXP (ref, 0));
15047
15048   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15049
15050   /* Handle the case where the address is too complex to be offset by 1.  */
15051   if (GET_CODE (base) == MINUS
15052       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15053     {
15054       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15055
15056       /* Be careful not to destroy OUTVAL.  */
15057       if (reg_overlap_mentioned_p (base_plus, outval))
15058         {
15059           /* Updating base_plus might destroy outval, see if we can
15060              swap the scratch and base_plus.  */
15061           if (!reg_overlap_mentioned_p (scratch, outval))
15062             std::swap (scratch, base_plus);
15063           else
15064             {
15065               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15066
15067               /* Be conservative and copy OUTVAL into the scratch now,
15068                  this should only be necessary if outval is a subreg
15069                  of something larger than a word.  */
15070               /* XXX Might this clobber base?  I can't see how it can,
15071                  since scratch is known to overlap with OUTVAL, and
15072                  must be wider than a word.  */
15073               emit_insn (gen_movhi (scratch_hi, outval));
15074               outval = scratch_hi;
15075             }
15076         }
15077
15078       emit_set_insn (base_plus, base);
15079       base = base_plus;
15080     }
15081   else if (GET_CODE (base) == PLUS)
15082     {
15083       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15084       HOST_WIDE_INT hi, lo;
15085
15086       offset += INTVAL (XEXP (base, 1));
15087       base = XEXP (base, 0);
15088
15089       /* Rework the address into a legal sequence of insns.  */
15090       /* Valid range for lo is -4095 -> 4095 */
15091       lo = (offset >= 0
15092             ? (offset & 0xfff)
15093             : -((-offset) & 0xfff));
15094
15095       /* Corner case, if lo is the max offset then we would be out of range
15096          once we have added the additional 1 below, so bump the msb into the
15097          pre-loading insn(s).  */
15098       if (lo == 4095)
15099         lo &= 0x7ff;
15100
15101       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15102              ^ (HOST_WIDE_INT) 0x80000000)
15103             - (HOST_WIDE_INT) 0x80000000);
15104
15105       gcc_assert (hi + lo == offset);
15106
15107       if (hi != 0)
15108         {
15109           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15110
15111           /* Be careful not to destroy OUTVAL.  */
15112           if (reg_overlap_mentioned_p (base_plus, outval))
15113             {
15114               /* Updating base_plus might destroy outval, see if we
15115                  can swap the scratch and base_plus.  */
15116               if (!reg_overlap_mentioned_p (scratch, outval))
15117                 std::swap (scratch, base_plus);
15118               else
15119                 {
15120                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15121
15122                   /* Be conservative and copy outval into scratch now,
15123                      this should only be necessary if outval is a
15124                      subreg of something larger than a word.  */
15125                   /* XXX Might this clobber base?  I can't see how it
15126                      can, since scratch is known to overlap with
15127                      outval.  */
15128                   emit_insn (gen_movhi (scratch_hi, outval));
15129                   outval = scratch_hi;
15130                 }
15131             }
15132
15133           /* Get the base address; addsi3 knows how to handle constants
15134              that require more than one insn.  */
15135           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15136           base = base_plus;
15137           offset = lo;
15138         }
15139     }
15140
15141   if (BYTES_BIG_ENDIAN)
15142     {
15143       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15144                                          plus_constant (Pmode, base,
15145                                                         offset + 1)),
15146                             gen_lowpart (QImode, outval)));
15147       emit_insn (gen_lshrsi3 (scratch,
15148                               gen_rtx_SUBREG (SImode, outval, 0),
15149                               GEN_INT (8)));
15150       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15151                                                                 offset)),
15152                             gen_lowpart (QImode, scratch)));
15153     }
15154   else
15155     {
15156       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15157                                                                 offset)),
15158                             gen_lowpart (QImode, outval)));
15159       emit_insn (gen_lshrsi3 (scratch,
15160                               gen_rtx_SUBREG (SImode, outval, 0),
15161                               GEN_INT (8)));
15162       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15163                                          plus_constant (Pmode, base,
15164                                                         offset + 1)),
15165                             gen_lowpart (QImode, scratch)));
15166     }
15167 }
15168
15169 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15170    (padded to the size of a word) should be passed in a register.  */
15171
15172 static bool
15173 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15174 {
15175   if (TARGET_AAPCS_BASED)
15176     return must_pass_in_stack_var_size (mode, type);
15177   else
15178     return must_pass_in_stack_var_size_or_pad (mode, type);
15179 }
15180
15181
15182 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15183    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15184    the default.  For AAPCS based ABIs small aggregate types are placed
15185    in the lowest memory address.  */
15186
15187 static pad_direction
15188 arm_function_arg_padding (machine_mode mode, const_tree type)
15189 {
15190   if (!TARGET_AAPCS_BASED)
15191     return default_function_arg_padding (mode, type);
15192
15193   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15194     return PAD_DOWNWARD;
15195
15196   return PAD_UPWARD;
15197 }
15198
15199
15200 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15201    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15202    register has useful data, and return the opposite if the most
15203    significant byte does.  */
15204
15205 bool
15206 arm_pad_reg_upward (machine_mode mode,
15207                     tree type, int first ATTRIBUTE_UNUSED)
15208 {
15209   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15210     {
15211       /* For AAPCS, small aggregates, small fixed-point types,
15212          and small complex types are always padded upwards.  */
15213       if (type)
15214         {
15215           if ((AGGREGATE_TYPE_P (type)
15216                || TREE_CODE (type) == COMPLEX_TYPE
15217                || FIXED_POINT_TYPE_P (type))
15218               && int_size_in_bytes (type) <= 4)
15219             return true;
15220         }
15221       else
15222         {
15223           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15224               && GET_MODE_SIZE (mode) <= 4)
15225             return true;
15226         }
15227     }
15228
15229   /* Otherwise, use default padding.  */
15230   return !BYTES_BIG_ENDIAN;
15231 }
15232
15233 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15234    assuming that the address in the base register is word aligned.  */
15235 bool
15236 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15237 {
15238   HOST_WIDE_INT max_offset;
15239
15240   /* Offset must be a multiple of 4 in Thumb mode.  */
15241   if (TARGET_THUMB2 && ((offset & 3) != 0))
15242     return false;
15243
15244   if (TARGET_THUMB2)
15245     max_offset = 1020;
15246   else if (TARGET_ARM)
15247     max_offset = 255;
15248   else
15249     return false;
15250
15251   return ((offset <= max_offset) && (offset >= -max_offset));
15252 }
15253
15254 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15255    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15256    Assumes that the address in the base register RN is word aligned.  Pattern
15257    guarantees that both memory accesses use the same base register,
15258    the offsets are constants within the range, and the gap between the offsets is 4.
15259    If preload complete then check that registers are legal.  WBACK indicates whether
15260    address is updated.  LOAD indicates whether memory access is load or store.  */
15261 bool
15262 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15263                        bool wback, bool load)
15264 {
15265   unsigned int t, t2, n;
15266
15267   if (!reload_completed)
15268     return true;
15269
15270   if (!offset_ok_for_ldrd_strd (offset))
15271     return false;
15272
15273   t = REGNO (rt);
15274   t2 = REGNO (rt2);
15275   n = REGNO (rn);
15276
15277   if ((TARGET_THUMB2)
15278       && ((wback && (n == t || n == t2))
15279           || (t == SP_REGNUM)
15280           || (t == PC_REGNUM)
15281           || (t2 == SP_REGNUM)
15282           || (t2 == PC_REGNUM)
15283           || (!load && (n == PC_REGNUM))
15284           || (load && (t == t2))
15285           /* Triggers Cortex-M3 LDRD errata.  */
15286           || (!wback && load && fix_cm3_ldrd && (n == t))))
15287     return false;
15288
15289   if ((TARGET_ARM)
15290       && ((wback && (n == t || n == t2))
15291           || (t2 == PC_REGNUM)
15292           || (t % 2 != 0)   /* First destination register is not even.  */
15293           || (t2 != t + 1)
15294           /* PC can be used as base register (for offset addressing only),
15295              but it is depricated.  */
15296           || (n == PC_REGNUM)))
15297     return false;
15298
15299   return true;
15300 }
15301
15302 /* Return true if a 64-bit access with alignment ALIGN and with a
15303    constant offset OFFSET from the base pointer is permitted on this
15304    architecture.  */
15305 static bool
15306 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15307 {
15308   return (unaligned_access
15309           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15310           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15311 }
15312
15313 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15314    operand MEM's address contains an immediate offset from the base
15315    register and has no side effects, in which case it sets BASE,
15316    OFFSET and ALIGN accordingly.  */
15317 static bool
15318 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15319 {
15320   rtx addr;
15321
15322   gcc_assert (base != NULL && offset != NULL);
15323
15324   /* TODO: Handle more general memory operand patterns, such as
15325      PRE_DEC and PRE_INC.  */
15326
15327   if (side_effects_p (mem))
15328     return false;
15329
15330   /* Can't deal with subregs.  */
15331   if (GET_CODE (mem) == SUBREG)
15332     return false;
15333
15334   gcc_assert (MEM_P (mem));
15335
15336   *offset = const0_rtx;
15337   *align = MEM_ALIGN (mem);
15338
15339   addr = XEXP (mem, 0);
15340
15341   /* If addr isn't valid for DImode, then we can't handle it.  */
15342   if (!arm_legitimate_address_p (DImode, addr,
15343                                  reload_in_progress || reload_completed))
15344     return false;
15345
15346   if (REG_P (addr))
15347     {
15348       *base = addr;
15349       return true;
15350     }
15351   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15352     {
15353       *base = XEXP (addr, 0);
15354       *offset = XEXP (addr, 1);
15355       return (REG_P (*base) && CONST_INT_P (*offset));
15356     }
15357
15358   return false;
15359 }
15360
15361 /* Called from a peephole2 to replace two word-size accesses with a
15362    single LDRD/STRD instruction.  Returns true iff we can generate a
15363    new instruction sequence.  That is, both accesses use the same base
15364    register and the gap between constant offsets is 4.  This function
15365    may reorder its operands to match ldrd/strd RTL templates.
15366    OPERANDS are the operands found by the peephole matcher;
15367    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15368    corresponding memory operands.  LOAD indicaates whether the access
15369    is load or store.  CONST_STORE indicates a store of constant
15370    integer values held in OPERANDS[4,5] and assumes that the pattern
15371    is of length 4 insn, for the purpose of checking dead registers.
15372    COMMUTE indicates that register operands may be reordered.  */
15373 bool
15374 gen_operands_ldrd_strd (rtx *operands, bool load,
15375                         bool const_store, bool commute)
15376 {
15377   int nops = 2;
15378   HOST_WIDE_INT offsets[2], offset, align[2];
15379   rtx base = NULL_RTX;
15380   rtx cur_base, cur_offset, tmp;
15381   int i, gap;
15382   HARD_REG_SET regset;
15383
15384   gcc_assert (!const_store || !load);
15385   /* Check that the memory references are immediate offsets from the
15386      same base register.  Extract the base register, the destination
15387      registers, and the corresponding memory offsets.  */
15388   for (i = 0; i < nops; i++)
15389     {
15390       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15391                                  &align[i]))
15392         return false;
15393
15394       if (i == 0)
15395         base = cur_base;
15396       else if (REGNO (base) != REGNO (cur_base))
15397         return false;
15398
15399       offsets[i] = INTVAL (cur_offset);
15400       if (GET_CODE (operands[i]) == SUBREG)
15401         {
15402           tmp = SUBREG_REG (operands[i]);
15403           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15404           operands[i] = tmp;
15405         }
15406     }
15407
15408   /* Make sure there is no dependency between the individual loads.  */
15409   if (load && REGNO (operands[0]) == REGNO (base))
15410     return false; /* RAW */
15411
15412   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15413     return false; /* WAW */
15414
15415   /* If the same input register is used in both stores
15416      when storing different constants, try to find a free register.
15417      For example, the code
15418         mov r0, 0
15419         str r0, [r2]
15420         mov r0, 1
15421         str r0, [r2, #4]
15422      can be transformed into
15423         mov r1, 0
15424         mov r0, 1
15425         strd r1, r0, [r2]
15426      in Thumb mode assuming that r1 is free.
15427      For ARM mode do the same but only if the starting register
15428      can be made to be even.  */
15429   if (const_store
15430       && REGNO (operands[0]) == REGNO (operands[1])
15431       && INTVAL (operands[4]) != INTVAL (operands[5]))
15432     {
15433     if (TARGET_THUMB2)
15434       {
15435         CLEAR_HARD_REG_SET (regset);
15436         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15437         if (tmp == NULL_RTX)
15438           return false;
15439
15440         /* Use the new register in the first load to ensure that
15441            if the original input register is not dead after peephole,
15442            then it will have the correct constant value.  */
15443         operands[0] = tmp;
15444       }
15445     else if (TARGET_ARM)
15446       {
15447         int regno = REGNO (operands[0]);
15448         if (!peep2_reg_dead_p (4, operands[0]))
15449           {
15450             /* When the input register is even and is not dead after the
15451                pattern, it has to hold the second constant but we cannot
15452                form a legal STRD in ARM mode with this register as the second
15453                register.  */
15454             if (regno % 2 == 0)
15455               return false;
15456
15457             /* Is regno-1 free? */
15458             SET_HARD_REG_SET (regset);
15459             CLEAR_HARD_REG_BIT(regset, regno - 1);
15460             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15461             if (tmp == NULL_RTX)
15462               return false;
15463
15464             operands[0] = tmp;
15465           }
15466         else
15467           {
15468             /* Find a DImode register.  */
15469             CLEAR_HARD_REG_SET (regset);
15470             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15471             if (tmp != NULL_RTX)
15472               {
15473                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15474                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15475               }
15476             else
15477               {
15478                 /* Can we use the input register to form a DI register?  */
15479                 SET_HARD_REG_SET (regset);
15480                 CLEAR_HARD_REG_BIT(regset,
15481                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15482                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15483                 if (tmp == NULL_RTX)
15484                   return false;
15485                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15486               }
15487           }
15488
15489         gcc_assert (operands[0] != NULL_RTX);
15490         gcc_assert (operands[1] != NULL_RTX);
15491         gcc_assert (REGNO (operands[0]) % 2 == 0);
15492         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15493       }
15494     }
15495
15496   /* Make sure the instructions are ordered with lower memory access first.  */
15497   if (offsets[0] > offsets[1])
15498     {
15499       gap = offsets[0] - offsets[1];
15500       offset = offsets[1];
15501
15502       /* Swap the instructions such that lower memory is accessed first.  */
15503       std::swap (operands[0], operands[1]);
15504       std::swap (operands[2], operands[3]);
15505       std::swap (align[0], align[1]);
15506       if (const_store)
15507         std::swap (operands[4], operands[5]);
15508     }
15509   else
15510     {
15511       gap = offsets[1] - offsets[0];
15512       offset = offsets[0];
15513     }
15514
15515   /* Make sure accesses are to consecutive memory locations.  */
15516   if (gap != 4)
15517     return false;
15518
15519   if (!align_ok_ldrd_strd (align[0], offset))
15520     return false;
15521
15522   /* Make sure we generate legal instructions.  */
15523   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15524                              false, load))
15525     return true;
15526
15527   /* In Thumb state, where registers are almost unconstrained, there
15528      is little hope to fix it.  */
15529   if (TARGET_THUMB2)
15530     return false;
15531
15532   if (load && commute)
15533     {
15534       /* Try reordering registers.  */
15535       std::swap (operands[0], operands[1]);
15536       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15537                                  false, load))
15538         return true;
15539     }
15540
15541   if (const_store)
15542     {
15543       /* If input registers are dead after this pattern, they can be
15544          reordered or replaced by other registers that are free in the
15545          current pattern.  */
15546       if (!peep2_reg_dead_p (4, operands[0])
15547           || !peep2_reg_dead_p (4, operands[1]))
15548         return false;
15549
15550       /* Try to reorder the input registers.  */
15551       /* For example, the code
15552            mov r0, 0
15553            mov r1, 1
15554            str r1, [r2]
15555            str r0, [r2, #4]
15556          can be transformed into
15557            mov r1, 0
15558            mov r0, 1
15559            strd r0, [r2]
15560       */
15561       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15562                                   false, false))
15563         {
15564           std::swap (operands[0], operands[1]);
15565           return true;
15566         }
15567
15568       /* Try to find a free DI register.  */
15569       CLEAR_HARD_REG_SET (regset);
15570       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15571       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15572       while (true)
15573         {
15574           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15575           if (tmp == NULL_RTX)
15576             return false;
15577
15578           /* DREG must be an even-numbered register in DImode.
15579              Split it into SI registers.  */
15580           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15581           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15582           gcc_assert (operands[0] != NULL_RTX);
15583           gcc_assert (operands[1] != NULL_RTX);
15584           gcc_assert (REGNO (operands[0]) % 2 == 0);
15585           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15586
15587           return (operands_ok_ldrd_strd (operands[0], operands[1],
15588                                          base, offset,
15589                                          false, load));
15590         }
15591     }
15592
15593   return false;
15594 }
15595
15596
15597
15598 \f
15599 /* Print a symbolic form of X to the debug file, F.  */
15600 static void
15601 arm_print_value (FILE *f, rtx x)
15602 {
15603   switch (GET_CODE (x))
15604     {
15605     case CONST_INT:
15606       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15607       return;
15608
15609     case CONST_DOUBLE:
15610       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15611       return;
15612
15613     case CONST_VECTOR:
15614       {
15615         int i;
15616
15617         fprintf (f, "<");
15618         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15619           {
15620             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15621             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15622               fputc (',', f);
15623           }
15624         fprintf (f, ">");
15625       }
15626       return;
15627
15628     case CONST_STRING:
15629       fprintf (f, "\"%s\"", XSTR (x, 0));
15630       return;
15631
15632     case SYMBOL_REF:
15633       fprintf (f, "`%s'", XSTR (x, 0));
15634       return;
15635
15636     case LABEL_REF:
15637       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15638       return;
15639
15640     case CONST:
15641       arm_print_value (f, XEXP (x, 0));
15642       return;
15643
15644     case PLUS:
15645       arm_print_value (f, XEXP (x, 0));
15646       fprintf (f, "+");
15647       arm_print_value (f, XEXP (x, 1));
15648       return;
15649
15650     case PC:
15651       fprintf (f, "pc");
15652       return;
15653
15654     default:
15655       fprintf (f, "????");
15656       return;
15657     }
15658 }
15659 \f
15660 /* Routines for manipulation of the constant pool.  */
15661
15662 /* Arm instructions cannot load a large constant directly into a
15663    register; they have to come from a pc relative load.  The constant
15664    must therefore be placed in the addressable range of the pc
15665    relative load.  Depending on the precise pc relative load
15666    instruction the range is somewhere between 256 bytes and 4k.  This
15667    means that we often have to dump a constant inside a function, and
15668    generate code to branch around it.
15669
15670    It is important to minimize this, since the branches will slow
15671    things down and make the code larger.
15672
15673    Normally we can hide the table after an existing unconditional
15674    branch so that there is no interruption of the flow, but in the
15675    worst case the code looks like this:
15676
15677         ldr     rn, L1
15678         ...
15679         b       L2
15680         align
15681         L1:     .long value
15682         L2:
15683         ...
15684
15685         ldr     rn, L3
15686         ...
15687         b       L4
15688         align
15689         L3:     .long value
15690         L4:
15691         ...
15692
15693    We fix this by performing a scan after scheduling, which notices
15694    which instructions need to have their operands fetched from the
15695    constant table and builds the table.
15696
15697    The algorithm starts by building a table of all the constants that
15698    need fixing up and all the natural barriers in the function (places
15699    where a constant table can be dropped without breaking the flow).
15700    For each fixup we note how far the pc-relative replacement will be
15701    able to reach and the offset of the instruction into the function.
15702
15703    Having built the table we then group the fixes together to form
15704    tables that are as large as possible (subject to addressing
15705    constraints) and emit each table of constants after the last
15706    barrier that is within range of all the instructions in the group.
15707    If a group does not contain a barrier, then we forcibly create one
15708    by inserting a jump instruction into the flow.  Once the table has
15709    been inserted, the insns are then modified to reference the
15710    relevant entry in the pool.
15711
15712    Possible enhancements to the algorithm (not implemented) are:
15713
15714    1) For some processors and object formats, there may be benefit in
15715    aligning the pools to the start of cache lines; this alignment
15716    would need to be taken into account when calculating addressability
15717    of a pool.  */
15718
15719 /* These typedefs are located at the start of this file, so that
15720    they can be used in the prototypes there.  This comment is to
15721    remind readers of that fact so that the following structures
15722    can be understood more easily.
15723
15724      typedef struct minipool_node    Mnode;
15725      typedef struct minipool_fixup   Mfix;  */
15726
15727 struct minipool_node
15728 {
15729   /* Doubly linked chain of entries.  */
15730   Mnode * next;
15731   Mnode * prev;
15732   /* The maximum offset into the code that this entry can be placed.  While
15733      pushing fixes for forward references, all entries are sorted in order
15734      of increasing max_address.  */
15735   HOST_WIDE_INT max_address;
15736   /* Similarly for an entry inserted for a backwards ref.  */
15737   HOST_WIDE_INT min_address;
15738   /* The number of fixes referencing this entry.  This can become zero
15739      if we "unpush" an entry.  In this case we ignore the entry when we
15740      come to emit the code.  */
15741   int refcount;
15742   /* The offset from the start of the minipool.  */
15743   HOST_WIDE_INT offset;
15744   /* The value in table.  */
15745   rtx value;
15746   /* The mode of value.  */
15747   machine_mode mode;
15748   /* The size of the value.  With iWMMXt enabled
15749      sizes > 4 also imply an alignment of 8-bytes.  */
15750   int fix_size;
15751 };
15752
15753 struct minipool_fixup
15754 {
15755   Mfix *            next;
15756   rtx_insn *        insn;
15757   HOST_WIDE_INT     address;
15758   rtx *             loc;
15759   machine_mode mode;
15760   int               fix_size;
15761   rtx               value;
15762   Mnode *           minipool;
15763   HOST_WIDE_INT     forwards;
15764   HOST_WIDE_INT     backwards;
15765 };
15766
15767 /* Fixes less than a word need padding out to a word boundary.  */
15768 #define MINIPOOL_FIX_SIZE(mode) \
15769   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15770
15771 static Mnode *  minipool_vector_head;
15772 static Mnode *  minipool_vector_tail;
15773 static rtx_code_label   *minipool_vector_label;
15774 static int      minipool_pad;
15775
15776 /* The linked list of all minipool fixes required for this function.  */
15777 Mfix *          minipool_fix_head;
15778 Mfix *          minipool_fix_tail;
15779 /* The fix entry for the current minipool, once it has been placed.  */
15780 Mfix *          minipool_barrier;
15781
15782 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15783 #define JUMP_TABLES_IN_TEXT_SECTION 0
15784 #endif
15785
15786 static HOST_WIDE_INT
15787 get_jump_table_size (rtx_jump_table_data *insn)
15788 {
15789   /* ADDR_VECs only take room if read-only data does into the text
15790      section.  */
15791   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15792     {
15793       rtx body = PATTERN (insn);
15794       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15795       HOST_WIDE_INT size;
15796       HOST_WIDE_INT modesize;
15797
15798       modesize = GET_MODE_SIZE (GET_MODE (body));
15799       size = modesize * XVECLEN (body, elt);
15800       switch (modesize)
15801         {
15802         case 1:
15803           /* Round up size  of TBB table to a halfword boundary.  */
15804           size = (size + 1) & ~HOST_WIDE_INT_1;
15805           break;
15806         case 2:
15807           /* No padding necessary for TBH.  */
15808           break;
15809         case 4:
15810           /* Add two bytes for alignment on Thumb.  */
15811           if (TARGET_THUMB)
15812             size += 2;
15813           break;
15814         default:
15815           gcc_unreachable ();
15816         }
15817       return size;
15818     }
15819
15820   return 0;
15821 }
15822
15823 /* Return the maximum amount of padding that will be inserted before
15824    label LABEL.  */
15825
15826 static HOST_WIDE_INT
15827 get_label_padding (rtx label)
15828 {
15829   HOST_WIDE_INT align, min_insn_size;
15830
15831   align = 1 << label_to_alignment (label);
15832   min_insn_size = TARGET_THUMB ? 2 : 4;
15833   return align > min_insn_size ? align - min_insn_size : 0;
15834 }
15835
15836 /* Move a minipool fix MP from its current location to before MAX_MP.
15837    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15838    constraints may need updating.  */
15839 static Mnode *
15840 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15841                                HOST_WIDE_INT max_address)
15842 {
15843   /* The code below assumes these are different.  */
15844   gcc_assert (mp != max_mp);
15845
15846   if (max_mp == NULL)
15847     {
15848       if (max_address < mp->max_address)
15849         mp->max_address = max_address;
15850     }
15851   else
15852     {
15853       if (max_address > max_mp->max_address - mp->fix_size)
15854         mp->max_address = max_mp->max_address - mp->fix_size;
15855       else
15856         mp->max_address = max_address;
15857
15858       /* Unlink MP from its current position.  Since max_mp is non-null,
15859        mp->prev must be non-null.  */
15860       mp->prev->next = mp->next;
15861       if (mp->next != NULL)
15862         mp->next->prev = mp->prev;
15863       else
15864         minipool_vector_tail = mp->prev;
15865
15866       /* Re-insert it before MAX_MP.  */
15867       mp->next = max_mp;
15868       mp->prev = max_mp->prev;
15869       max_mp->prev = mp;
15870
15871       if (mp->prev != NULL)
15872         mp->prev->next = mp;
15873       else
15874         minipool_vector_head = mp;
15875     }
15876
15877   /* Save the new entry.  */
15878   max_mp = mp;
15879
15880   /* Scan over the preceding entries and adjust their addresses as
15881      required.  */
15882   while (mp->prev != NULL
15883          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15884     {
15885       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15886       mp = mp->prev;
15887     }
15888
15889   return max_mp;
15890 }
15891
15892 /* Add a constant to the minipool for a forward reference.  Returns the
15893    node added or NULL if the constant will not fit in this pool.  */
15894 static Mnode *
15895 add_minipool_forward_ref (Mfix *fix)
15896 {
15897   /* If set, max_mp is the first pool_entry that has a lower
15898      constraint than the one we are trying to add.  */
15899   Mnode *       max_mp = NULL;
15900   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15901   Mnode *       mp;
15902
15903   /* If the minipool starts before the end of FIX->INSN then this FIX
15904      can not be placed into the current pool.  Furthermore, adding the
15905      new constant pool entry may cause the pool to start FIX_SIZE bytes
15906      earlier.  */
15907   if (minipool_vector_head &&
15908       (fix->address + get_attr_length (fix->insn)
15909        >= minipool_vector_head->max_address - fix->fix_size))
15910     return NULL;
15911
15912   /* Scan the pool to see if a constant with the same value has
15913      already been added.  While we are doing this, also note the
15914      location where we must insert the constant if it doesn't already
15915      exist.  */
15916   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15917     {
15918       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15919           && fix->mode == mp->mode
15920           && (!LABEL_P (fix->value)
15921               || (CODE_LABEL_NUMBER (fix->value)
15922                   == CODE_LABEL_NUMBER (mp->value)))
15923           && rtx_equal_p (fix->value, mp->value))
15924         {
15925           /* More than one fix references this entry.  */
15926           mp->refcount++;
15927           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15928         }
15929
15930       /* Note the insertion point if necessary.  */
15931       if (max_mp == NULL
15932           && mp->max_address > max_address)
15933         max_mp = mp;
15934
15935       /* If we are inserting an 8-bytes aligned quantity and
15936          we have not already found an insertion point, then
15937          make sure that all such 8-byte aligned quantities are
15938          placed at the start of the pool.  */
15939       if (ARM_DOUBLEWORD_ALIGN
15940           && max_mp == NULL
15941           && fix->fix_size >= 8
15942           && mp->fix_size < 8)
15943         {
15944           max_mp = mp;
15945           max_address = mp->max_address;
15946         }
15947     }
15948
15949   /* The value is not currently in the minipool, so we need to create
15950      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15951      the end of the list since the placement is less constrained than
15952      any existing entry.  Otherwise, we insert the new fix before
15953      MAX_MP and, if necessary, adjust the constraints on the other
15954      entries.  */
15955   mp = XNEW (Mnode);
15956   mp->fix_size = fix->fix_size;
15957   mp->mode = fix->mode;
15958   mp->value = fix->value;
15959   mp->refcount = 1;
15960   /* Not yet required for a backwards ref.  */
15961   mp->min_address = -65536;
15962
15963   if (max_mp == NULL)
15964     {
15965       mp->max_address = max_address;
15966       mp->next = NULL;
15967       mp->prev = minipool_vector_tail;
15968
15969       if (mp->prev == NULL)
15970         {
15971           minipool_vector_head = mp;
15972           minipool_vector_label = gen_label_rtx ();
15973         }
15974       else
15975         mp->prev->next = mp;
15976
15977       minipool_vector_tail = mp;
15978     }
15979   else
15980     {
15981       if (max_address > max_mp->max_address - mp->fix_size)
15982         mp->max_address = max_mp->max_address - mp->fix_size;
15983       else
15984         mp->max_address = max_address;
15985
15986       mp->next = max_mp;
15987       mp->prev = max_mp->prev;
15988       max_mp->prev = mp;
15989       if (mp->prev != NULL)
15990         mp->prev->next = mp;
15991       else
15992         minipool_vector_head = mp;
15993     }
15994
15995   /* Save the new entry.  */
15996   max_mp = mp;
15997
15998   /* Scan over the preceding entries and adjust their addresses as
15999      required.  */
16000   while (mp->prev != NULL
16001          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16002     {
16003       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16004       mp = mp->prev;
16005     }
16006
16007   return max_mp;
16008 }
16009
16010 static Mnode *
16011 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16012                                 HOST_WIDE_INT  min_address)
16013 {
16014   HOST_WIDE_INT offset;
16015
16016   /* The code below assumes these are different.  */
16017   gcc_assert (mp != min_mp);
16018
16019   if (min_mp == NULL)
16020     {
16021       if (min_address > mp->min_address)
16022         mp->min_address = min_address;
16023     }
16024   else
16025     {
16026       /* We will adjust this below if it is too loose.  */
16027       mp->min_address = min_address;
16028
16029       /* Unlink MP from its current position.  Since min_mp is non-null,
16030          mp->next must be non-null.  */
16031       mp->next->prev = mp->prev;
16032       if (mp->prev != NULL)
16033         mp->prev->next = mp->next;
16034       else
16035         minipool_vector_head = mp->next;
16036
16037       /* Reinsert it after MIN_MP.  */
16038       mp->prev = min_mp;
16039       mp->next = min_mp->next;
16040       min_mp->next = mp;
16041       if (mp->next != NULL)
16042         mp->next->prev = mp;
16043       else
16044         minipool_vector_tail = mp;
16045     }
16046
16047   min_mp = mp;
16048
16049   offset = 0;
16050   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16051     {
16052       mp->offset = offset;
16053       if (mp->refcount > 0)
16054         offset += mp->fix_size;
16055
16056       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16057         mp->next->min_address = mp->min_address + mp->fix_size;
16058     }
16059
16060   return min_mp;
16061 }
16062
16063 /* Add a constant to the minipool for a backward reference.  Returns the
16064    node added or NULL if the constant will not fit in this pool.
16065
16066    Note that the code for insertion for a backwards reference can be
16067    somewhat confusing because the calculated offsets for each fix do
16068    not take into account the size of the pool (which is still under
16069    construction.  */
16070 static Mnode *
16071 add_minipool_backward_ref (Mfix *fix)
16072 {
16073   /* If set, min_mp is the last pool_entry that has a lower constraint
16074      than the one we are trying to add.  */
16075   Mnode *min_mp = NULL;
16076   /* This can be negative, since it is only a constraint.  */
16077   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16078   Mnode *mp;
16079
16080   /* If we can't reach the current pool from this insn, or if we can't
16081      insert this entry at the end of the pool without pushing other
16082      fixes out of range, then we don't try.  This ensures that we
16083      can't fail later on.  */
16084   if (min_address >= minipool_barrier->address
16085       || (minipool_vector_tail->min_address + fix->fix_size
16086           >= minipool_barrier->address))
16087     return NULL;
16088
16089   /* Scan the pool to see if a constant with the same value has
16090      already been added.  While we are doing this, also note the
16091      location where we must insert the constant if it doesn't already
16092      exist.  */
16093   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16094     {
16095       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16096           && fix->mode == mp->mode
16097           && (!LABEL_P (fix->value)
16098               || (CODE_LABEL_NUMBER (fix->value)
16099                   == CODE_LABEL_NUMBER (mp->value)))
16100           && rtx_equal_p (fix->value, mp->value)
16101           /* Check that there is enough slack to move this entry to the
16102              end of the table (this is conservative).  */
16103           && (mp->max_address
16104               > (minipool_barrier->address
16105                  + minipool_vector_tail->offset
16106                  + minipool_vector_tail->fix_size)))
16107         {
16108           mp->refcount++;
16109           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16110         }
16111
16112       if (min_mp != NULL)
16113         mp->min_address += fix->fix_size;
16114       else
16115         {
16116           /* Note the insertion point if necessary.  */
16117           if (mp->min_address < min_address)
16118             {
16119               /* For now, we do not allow the insertion of 8-byte alignment
16120                  requiring nodes anywhere but at the start of the pool.  */
16121               if (ARM_DOUBLEWORD_ALIGN
16122                   && fix->fix_size >= 8 && mp->fix_size < 8)
16123                 return NULL;
16124               else
16125                 min_mp = mp;
16126             }
16127           else if (mp->max_address
16128                    < minipool_barrier->address + mp->offset + fix->fix_size)
16129             {
16130               /* Inserting before this entry would push the fix beyond
16131                  its maximum address (which can happen if we have
16132                  re-located a forwards fix); force the new fix to come
16133                  after it.  */
16134               if (ARM_DOUBLEWORD_ALIGN
16135                   && fix->fix_size >= 8 && mp->fix_size < 8)
16136                 return NULL;
16137               else
16138                 {
16139                   min_mp = mp;
16140                   min_address = mp->min_address + fix->fix_size;
16141                 }
16142             }
16143           /* Do not insert a non-8-byte aligned quantity before 8-byte
16144              aligned quantities.  */
16145           else if (ARM_DOUBLEWORD_ALIGN
16146                    && fix->fix_size < 8
16147                    && mp->fix_size >= 8)
16148             {
16149               min_mp = mp;
16150               min_address = mp->min_address + fix->fix_size;
16151             }
16152         }
16153     }
16154
16155   /* We need to create a new entry.  */
16156   mp = XNEW (Mnode);
16157   mp->fix_size = fix->fix_size;
16158   mp->mode = fix->mode;
16159   mp->value = fix->value;
16160   mp->refcount = 1;
16161   mp->max_address = minipool_barrier->address + 65536;
16162
16163   mp->min_address = min_address;
16164
16165   if (min_mp == NULL)
16166     {
16167       mp->prev = NULL;
16168       mp->next = minipool_vector_head;
16169
16170       if (mp->next == NULL)
16171         {
16172           minipool_vector_tail = mp;
16173           minipool_vector_label = gen_label_rtx ();
16174         }
16175       else
16176         mp->next->prev = mp;
16177
16178       minipool_vector_head = mp;
16179     }
16180   else
16181     {
16182       mp->next = min_mp->next;
16183       mp->prev = min_mp;
16184       min_mp->next = mp;
16185
16186       if (mp->next != NULL)
16187         mp->next->prev = mp;
16188       else
16189         minipool_vector_tail = mp;
16190     }
16191
16192   /* Save the new entry.  */
16193   min_mp = mp;
16194
16195   if (mp->prev)
16196     mp = mp->prev;
16197   else
16198     mp->offset = 0;
16199
16200   /* Scan over the following entries and adjust their offsets.  */
16201   while (mp->next != NULL)
16202     {
16203       if (mp->next->min_address < mp->min_address + mp->fix_size)
16204         mp->next->min_address = mp->min_address + mp->fix_size;
16205
16206       if (mp->refcount)
16207         mp->next->offset = mp->offset + mp->fix_size;
16208       else
16209         mp->next->offset = mp->offset;
16210
16211       mp = mp->next;
16212     }
16213
16214   return min_mp;
16215 }
16216
16217 static void
16218 assign_minipool_offsets (Mfix *barrier)
16219 {
16220   HOST_WIDE_INT offset = 0;
16221   Mnode *mp;
16222
16223   minipool_barrier = barrier;
16224
16225   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16226     {
16227       mp->offset = offset;
16228
16229       if (mp->refcount > 0)
16230         offset += mp->fix_size;
16231     }
16232 }
16233
16234 /* Output the literal table */
16235 static void
16236 dump_minipool (rtx_insn *scan)
16237 {
16238   Mnode * mp;
16239   Mnode * nmp;
16240   int align64 = 0;
16241
16242   if (ARM_DOUBLEWORD_ALIGN)
16243     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16244       if (mp->refcount > 0 && mp->fix_size >= 8)
16245         {
16246           align64 = 1;
16247           break;
16248         }
16249
16250   if (dump_file)
16251     fprintf (dump_file,
16252              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16253              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16254
16255   scan = emit_label_after (gen_label_rtx (), scan);
16256   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16257   scan = emit_label_after (minipool_vector_label, scan);
16258
16259   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16260     {
16261       if (mp->refcount > 0)
16262         {
16263           if (dump_file)
16264             {
16265               fprintf (dump_file,
16266                        ";;  Offset %u, min %ld, max %ld ",
16267                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16268                        (unsigned long) mp->max_address);
16269               arm_print_value (dump_file, mp->value);
16270               fputc ('\n', dump_file);
16271             }
16272
16273           rtx val = copy_rtx (mp->value);
16274
16275           switch (GET_MODE_SIZE (mp->mode))
16276             {
16277 #ifdef HAVE_consttable_1
16278             case 1:
16279               scan = emit_insn_after (gen_consttable_1 (val), scan);
16280               break;
16281
16282 #endif
16283 #ifdef HAVE_consttable_2
16284             case 2:
16285               scan = emit_insn_after (gen_consttable_2 (val), scan);
16286               break;
16287
16288 #endif
16289 #ifdef HAVE_consttable_4
16290             case 4:
16291               scan = emit_insn_after (gen_consttable_4 (val), scan);
16292               break;
16293
16294 #endif
16295 #ifdef HAVE_consttable_8
16296             case 8:
16297               scan = emit_insn_after (gen_consttable_8 (val), scan);
16298               break;
16299
16300 #endif
16301 #ifdef HAVE_consttable_16
16302             case 16:
16303               scan = emit_insn_after (gen_consttable_16 (val), scan);
16304               break;
16305
16306 #endif
16307             default:
16308               gcc_unreachable ();
16309             }
16310         }
16311
16312       nmp = mp->next;
16313       free (mp);
16314     }
16315
16316   minipool_vector_head = minipool_vector_tail = NULL;
16317   scan = emit_insn_after (gen_consttable_end (), scan);
16318   scan = emit_barrier_after (scan);
16319 }
16320
16321 /* Return the cost of forcibly inserting a barrier after INSN.  */
16322 static int
16323 arm_barrier_cost (rtx_insn *insn)
16324 {
16325   /* Basing the location of the pool on the loop depth is preferable,
16326      but at the moment, the basic block information seems to be
16327      corrupt by this stage of the compilation.  */
16328   int base_cost = 50;
16329   rtx_insn *next = next_nonnote_insn (insn);
16330
16331   if (next != NULL && LABEL_P (next))
16332     base_cost -= 20;
16333
16334   switch (GET_CODE (insn))
16335     {
16336     case CODE_LABEL:
16337       /* It will always be better to place the table before the label, rather
16338          than after it.  */
16339       return 50;
16340
16341     case INSN:
16342     case CALL_INSN:
16343       return base_cost;
16344
16345     case JUMP_INSN:
16346       return base_cost - 10;
16347
16348     default:
16349       return base_cost + 10;
16350     }
16351 }
16352
16353 /* Find the best place in the insn stream in the range
16354    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16355    Create the barrier by inserting a jump and add a new fix entry for
16356    it.  */
16357 static Mfix *
16358 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16359 {
16360   HOST_WIDE_INT count = 0;
16361   rtx_barrier *barrier;
16362   rtx_insn *from = fix->insn;
16363   /* The instruction after which we will insert the jump.  */
16364   rtx_insn *selected = NULL;
16365   int selected_cost;
16366   /* The address at which the jump instruction will be placed.  */
16367   HOST_WIDE_INT selected_address;
16368   Mfix * new_fix;
16369   HOST_WIDE_INT max_count = max_address - fix->address;
16370   rtx_code_label *label = gen_label_rtx ();
16371
16372   selected_cost = arm_barrier_cost (from);
16373   selected_address = fix->address;
16374
16375   while (from && count < max_count)
16376     {
16377       rtx_jump_table_data *tmp;
16378       int new_cost;
16379
16380       /* This code shouldn't have been called if there was a natural barrier
16381          within range.  */
16382       gcc_assert (!BARRIER_P (from));
16383
16384       /* Count the length of this insn.  This must stay in sync with the
16385          code that pushes minipool fixes.  */
16386       if (LABEL_P (from))
16387         count += get_label_padding (from);
16388       else
16389         count += get_attr_length (from);
16390
16391       /* If there is a jump table, add its length.  */
16392       if (tablejump_p (from, NULL, &tmp))
16393         {
16394           count += get_jump_table_size (tmp);
16395
16396           /* Jump tables aren't in a basic block, so base the cost on
16397              the dispatch insn.  If we select this location, we will
16398              still put the pool after the table.  */
16399           new_cost = arm_barrier_cost (from);
16400
16401           if (count < max_count
16402               && (!selected || new_cost <= selected_cost))
16403             {
16404               selected = tmp;
16405               selected_cost = new_cost;
16406               selected_address = fix->address + count;
16407             }
16408
16409           /* Continue after the dispatch table.  */
16410           from = NEXT_INSN (tmp);
16411           continue;
16412         }
16413
16414       new_cost = arm_barrier_cost (from);
16415
16416       if (count < max_count
16417           && (!selected || new_cost <= selected_cost))
16418         {
16419           selected = from;
16420           selected_cost = new_cost;
16421           selected_address = fix->address + count;
16422         }
16423
16424       from = NEXT_INSN (from);
16425     }
16426
16427   /* Make sure that we found a place to insert the jump.  */
16428   gcc_assert (selected);
16429
16430   /* Make sure we do not split a call and its corresponding
16431      CALL_ARG_LOCATION note.  */
16432   if (CALL_P (selected))
16433     {
16434       rtx_insn *next = NEXT_INSN (selected);
16435       if (next && NOTE_P (next)
16436           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16437           selected = next;
16438     }
16439
16440   /* Create a new JUMP_INSN that branches around a barrier.  */
16441   from = emit_jump_insn_after (gen_jump (label), selected);
16442   JUMP_LABEL (from) = label;
16443   barrier = emit_barrier_after (from);
16444   emit_label_after (label, barrier);
16445
16446   /* Create a minipool barrier entry for the new barrier.  */
16447   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16448   new_fix->insn = barrier;
16449   new_fix->address = selected_address;
16450   new_fix->next = fix->next;
16451   fix->next = new_fix;
16452
16453   return new_fix;
16454 }
16455
16456 /* Record that there is a natural barrier in the insn stream at
16457    ADDRESS.  */
16458 static void
16459 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16460 {
16461   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16462
16463   fix->insn = insn;
16464   fix->address = address;
16465
16466   fix->next = NULL;
16467   if (minipool_fix_head != NULL)
16468     minipool_fix_tail->next = fix;
16469   else
16470     minipool_fix_head = fix;
16471
16472   minipool_fix_tail = fix;
16473 }
16474
16475 /* Record INSN, which will need fixing up to load a value from the
16476    minipool.  ADDRESS is the offset of the insn since the start of the
16477    function; LOC is a pointer to the part of the insn which requires
16478    fixing; VALUE is the constant that must be loaded, which is of type
16479    MODE.  */
16480 static void
16481 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16482                    machine_mode mode, rtx value)
16483 {
16484   gcc_assert (!arm_disable_literal_pool);
16485   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16486
16487   fix->insn = insn;
16488   fix->address = address;
16489   fix->loc = loc;
16490   fix->mode = mode;
16491   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16492   fix->value = value;
16493   fix->forwards = get_attr_pool_range (insn);
16494   fix->backwards = get_attr_neg_pool_range (insn);
16495   fix->minipool = NULL;
16496
16497   /* If an insn doesn't have a range defined for it, then it isn't
16498      expecting to be reworked by this code.  Better to stop now than
16499      to generate duff assembly code.  */
16500   gcc_assert (fix->forwards || fix->backwards);
16501
16502   /* If an entry requires 8-byte alignment then assume all constant pools
16503      require 4 bytes of padding.  Trying to do this later on a per-pool
16504      basis is awkward because existing pool entries have to be modified.  */
16505   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16506     minipool_pad = 4;
16507
16508   if (dump_file)
16509     {
16510       fprintf (dump_file,
16511                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16512                GET_MODE_NAME (mode),
16513                INSN_UID (insn), (unsigned long) address,
16514                -1 * (long)fix->backwards, (long)fix->forwards);
16515       arm_print_value (dump_file, fix->value);
16516       fprintf (dump_file, "\n");
16517     }
16518
16519   /* Add it to the chain of fixes.  */
16520   fix->next = NULL;
16521
16522   if (minipool_fix_head != NULL)
16523     minipool_fix_tail->next = fix;
16524   else
16525     minipool_fix_head = fix;
16526
16527   minipool_fix_tail = fix;
16528 }
16529
16530 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16531    Returns the number of insns needed, or 99 if we always want to synthesize
16532    the value.  */
16533 int
16534 arm_max_const_double_inline_cost ()
16535 {
16536   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16537 }
16538
16539 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16540    Returns the number of insns needed, or 99 if we don't know how to
16541    do it.  */
16542 int
16543 arm_const_double_inline_cost (rtx val)
16544 {
16545   rtx lowpart, highpart;
16546   machine_mode mode;
16547
16548   mode = GET_MODE (val);
16549
16550   if (mode == VOIDmode)
16551     mode = DImode;
16552
16553   gcc_assert (GET_MODE_SIZE (mode) == 8);
16554
16555   lowpart = gen_lowpart (SImode, val);
16556   highpart = gen_highpart_mode (SImode, mode, val);
16557
16558   gcc_assert (CONST_INT_P (lowpart));
16559   gcc_assert (CONST_INT_P (highpart));
16560
16561   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16562                             NULL_RTX, NULL_RTX, 0, 0)
16563           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16564                               NULL_RTX, NULL_RTX, 0, 0));
16565 }
16566
16567 /* Cost of loading a SImode constant.  */
16568 static inline int
16569 arm_const_inline_cost (enum rtx_code code, rtx val)
16570 {
16571   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16572                            NULL_RTX, NULL_RTX, 1, 0);
16573 }
16574
16575 /* Return true if it is worthwhile to split a 64-bit constant into two
16576    32-bit operations.  This is the case if optimizing for size, or
16577    if we have load delay slots, or if one 32-bit part can be done with
16578    a single data operation.  */
16579 bool
16580 arm_const_double_by_parts (rtx val)
16581 {
16582   machine_mode mode = GET_MODE (val);
16583   rtx part;
16584
16585   if (optimize_size || arm_ld_sched)
16586     return true;
16587
16588   if (mode == VOIDmode)
16589     mode = DImode;
16590
16591   part = gen_highpart_mode (SImode, mode, val);
16592
16593   gcc_assert (CONST_INT_P (part));
16594
16595   if (const_ok_for_arm (INTVAL (part))
16596       || const_ok_for_arm (~INTVAL (part)))
16597     return true;
16598
16599   part = gen_lowpart (SImode, val);
16600
16601   gcc_assert (CONST_INT_P (part));
16602
16603   if (const_ok_for_arm (INTVAL (part))
16604       || const_ok_for_arm (~INTVAL (part)))
16605     return true;
16606
16607   return false;
16608 }
16609
16610 /* Return true if it is possible to inline both the high and low parts
16611    of a 64-bit constant into 32-bit data processing instructions.  */
16612 bool
16613 arm_const_double_by_immediates (rtx val)
16614 {
16615   machine_mode mode = GET_MODE (val);
16616   rtx part;
16617
16618   if (mode == VOIDmode)
16619     mode = DImode;
16620
16621   part = gen_highpart_mode (SImode, mode, val);
16622
16623   gcc_assert (CONST_INT_P (part));
16624
16625   if (!const_ok_for_arm (INTVAL (part)))
16626     return false;
16627
16628   part = gen_lowpart (SImode, val);
16629
16630   gcc_assert (CONST_INT_P (part));
16631
16632   if (!const_ok_for_arm (INTVAL (part)))
16633     return false;
16634
16635   return true;
16636 }
16637
16638 /* Scan INSN and note any of its operands that need fixing.
16639    If DO_PUSHES is false we do not actually push any of the fixups
16640    needed.  */
16641 static void
16642 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16643 {
16644   int opno;
16645
16646   extract_constrain_insn (insn);
16647
16648   if (recog_data.n_alternatives == 0)
16649     return;
16650
16651   /* Fill in recog_op_alt with information about the constraints of
16652      this insn.  */
16653   preprocess_constraints (insn);
16654
16655   const operand_alternative *op_alt = which_op_alt ();
16656   for (opno = 0; opno < recog_data.n_operands; opno++)
16657     {
16658       /* Things we need to fix can only occur in inputs.  */
16659       if (recog_data.operand_type[opno] != OP_IN)
16660         continue;
16661
16662       /* If this alternative is a memory reference, then any mention
16663          of constants in this alternative is really to fool reload
16664          into allowing us to accept one there.  We need to fix them up
16665          now so that we output the right code.  */
16666       if (op_alt[opno].memory_ok)
16667         {
16668           rtx op = recog_data.operand[opno];
16669
16670           if (CONSTANT_P (op))
16671             {
16672               if (do_pushes)
16673                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16674                                    recog_data.operand_mode[opno], op);
16675             }
16676           else if (MEM_P (op)
16677                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16678                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16679             {
16680               if (do_pushes)
16681                 {
16682                   rtx cop = avoid_constant_pool_reference (op);
16683
16684                   /* Casting the address of something to a mode narrower
16685                      than a word can cause avoid_constant_pool_reference()
16686                      to return the pool reference itself.  That's no good to
16687                      us here.  Lets just hope that we can use the
16688                      constant pool value directly.  */
16689                   if (op == cop)
16690                     cop = get_pool_constant (XEXP (op, 0));
16691
16692                   push_minipool_fix (insn, address,
16693                                      recog_data.operand_loc[opno],
16694                                      recog_data.operand_mode[opno], cop);
16695                 }
16696
16697             }
16698         }
16699     }
16700
16701   return;
16702 }
16703
16704 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16705    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16706    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16707    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16708    or four masks, depending on whether it is being computed for a
16709    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16710    respectively.  The tree for the type of the argument or a field within an
16711    argument is passed in ARG_TYPE, the current register this argument or field
16712    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16713    argument or field starts at is passed in STARTING_BIT and the last used bit
16714    is kept in LAST_USED_BIT which is also updated accordingly.  */
16715
16716 static unsigned HOST_WIDE_INT
16717 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16718                                uint32_t * padding_bits_to_clear,
16719                                unsigned starting_bit, int * last_used_bit)
16720
16721 {
16722   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16723
16724   if (TREE_CODE (arg_type) == RECORD_TYPE)
16725     {
16726       unsigned current_bit = starting_bit;
16727       tree field;
16728       long int offset, size;
16729
16730
16731       field = TYPE_FIELDS (arg_type);
16732       while (field)
16733         {
16734           /* The offset within a structure is always an offset from
16735              the start of that structure.  Make sure we take that into the
16736              calculation of the register based offset that we use here.  */
16737           offset = starting_bit;
16738           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16739           offset %= 32;
16740
16741           /* This is the actual size of the field, for bitfields this is the
16742              bitfield width and not the container size.  */
16743           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16744
16745           if (*last_used_bit != offset)
16746             {
16747               if (offset < *last_used_bit)
16748                 {
16749                   /* This field's offset is before the 'last_used_bit', that
16750                      means this field goes on the next register.  So we need to
16751                      pad the rest of the current register and increase the
16752                      register number.  */
16753                   uint32_t mask;
16754                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16755                   mask++;
16756
16757                   padding_bits_to_clear[*regno] |= mask;
16758                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16759                   (*regno)++;
16760                 }
16761               else
16762                 {
16763                   /* Otherwise we pad the bits between the last field's end and
16764                      the start of the new field.  */
16765                   uint32_t mask;
16766
16767                   mask = ((uint32_t)-1) >> (32 - offset);
16768                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16769                   padding_bits_to_clear[*regno] |= mask;
16770                 }
16771               current_bit = offset;
16772             }
16773
16774           /* Calculate further padding bits for inner structs/unions too.  */
16775           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16776             {
16777               *last_used_bit = current_bit;
16778               not_to_clear_reg_mask
16779                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16780                                                   padding_bits_to_clear, offset,
16781                                                   last_used_bit);
16782             }
16783           else
16784             {
16785               /* Update 'current_bit' with this field's size.  If the
16786                  'current_bit' lies in a subsequent register, update 'regno' and
16787                  reset 'current_bit' to point to the current bit in that new
16788                  register.  */
16789               current_bit += size;
16790               while (current_bit >= 32)
16791                 {
16792                   current_bit-=32;
16793                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16794                   (*regno)++;
16795                 }
16796               *last_used_bit = current_bit;
16797             }
16798
16799           field = TREE_CHAIN (field);
16800         }
16801       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16802     }
16803   else if (TREE_CODE (arg_type) == UNION_TYPE)
16804     {
16805       tree field, field_t;
16806       int i, regno_t, field_size;
16807       int max_reg = -1;
16808       int max_bit = -1;
16809       uint32_t mask;
16810       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16811         = {-1, -1, -1, -1};
16812
16813       /* To compute the padding bits in a union we only consider bits as
16814          padding bits if they are always either a padding bit or fall outside a
16815          fields size for all fields in the union.  */
16816       field = TYPE_FIELDS (arg_type);
16817       while (field)
16818         {
16819           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16820             = {0U, 0U, 0U, 0U};
16821           int last_used_bit_t = *last_used_bit;
16822           regno_t = *regno;
16823           field_t = TREE_TYPE (field);
16824
16825           /* If the field's type is either a record or a union make sure to
16826              compute their padding bits too.  */
16827           if (RECORD_OR_UNION_TYPE_P (field_t))
16828             not_to_clear_reg_mask
16829               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16830                                                 &padding_bits_to_clear_t[0],
16831                                                 starting_bit, &last_used_bit_t);
16832           else
16833             {
16834               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16835               regno_t = (field_size / 32) + *regno;
16836               last_used_bit_t = (starting_bit + field_size) % 32;
16837             }
16838
16839           for (i = *regno; i < regno_t; i++)
16840             {
16841               /* For all but the last register used by this field only keep the
16842                  padding bits that were padding bits in this field.  */
16843               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16844             }
16845
16846             /* For the last register, keep all padding bits that were padding
16847                bits in this field and any padding bits that are still valid
16848                as padding bits but fall outside of this field's size.  */
16849             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16850             padding_bits_to_clear_res[regno_t]
16851               &= padding_bits_to_clear_t[regno_t] | mask;
16852
16853           /* Update the maximum size of the fields in terms of registers used
16854              ('max_reg') and the 'last_used_bit' in said register.  */
16855           if (max_reg < regno_t)
16856             {
16857               max_reg = regno_t;
16858               max_bit = last_used_bit_t;
16859             }
16860           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16861             max_bit = last_used_bit_t;
16862
16863           field = TREE_CHAIN (field);
16864         }
16865
16866       /* Update the current padding_bits_to_clear using the intersection of the
16867          padding bits of all the fields.  */
16868       for (i=*regno; i < max_reg; i++)
16869         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16870
16871       /* Do not keep trailing padding bits, we do not know yet whether this
16872          is the end of the argument.  */
16873       mask = ((uint32_t) 1 << max_bit) - 1;
16874       padding_bits_to_clear[max_reg]
16875         |= padding_bits_to_clear_res[max_reg] & mask;
16876
16877       *regno = max_reg;
16878       *last_used_bit = max_bit;
16879     }
16880   else
16881     /* This function should only be used for structs and unions.  */
16882     gcc_unreachable ();
16883
16884   return not_to_clear_reg_mask;
16885 }
16886
16887 /* In the context of ARMv8-M Security Extensions, this function is used for both
16888    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16889    registers are used when returning or passing arguments, which is then
16890    returned as a mask.  It will also compute a mask to indicate padding/unused
16891    bits for each of these registers, and passes this through the
16892    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16893    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16894    the starting register used to pass this argument or return value is passed
16895    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16896    for struct and union types.  */
16897
16898 static unsigned HOST_WIDE_INT
16899 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16900                              uint32_t * padding_bits_to_clear)
16901
16902 {
16903   int last_used_bit = 0;
16904   unsigned HOST_WIDE_INT not_to_clear_mask;
16905
16906   if (RECORD_OR_UNION_TYPE_P (arg_type))
16907     {
16908       not_to_clear_mask
16909         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16910                                          padding_bits_to_clear, 0,
16911                                          &last_used_bit);
16912
16913
16914       /* If the 'last_used_bit' is not zero, that means we are still using a
16915          part of the last 'regno'.  In such cases we must clear the trailing
16916          bits.  Otherwise we are not using regno and we should mark it as to
16917          clear.  */
16918       if (last_used_bit != 0)
16919         padding_bits_to_clear[regno]
16920           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16921       else
16922         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16923     }
16924   else
16925     {
16926       not_to_clear_mask = 0;
16927       /* We are not dealing with structs nor unions.  So these arguments may be
16928          passed in floating point registers too.  In some cases a BLKmode is
16929          used when returning or passing arguments in multiple VFP registers.  */
16930       if (GET_MODE (arg_rtx) == BLKmode)
16931         {
16932           int i, arg_regs;
16933           rtx reg;
16934
16935           /* This should really only occur when dealing with the hard-float
16936              ABI.  */
16937           gcc_assert (TARGET_HARD_FLOAT_ABI);
16938
16939           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16940             {
16941               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16942               gcc_assert (REG_P (reg));
16943
16944               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16945
16946               /* If we are dealing with DF mode, make sure we don't
16947                  clear either of the registers it addresses.  */
16948               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16949               if (arg_regs > 1)
16950                 {
16951                   unsigned HOST_WIDE_INT mask;
16952                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16953                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16954                   not_to_clear_mask |= mask;
16955                 }
16956             }
16957         }
16958       else
16959         {
16960           /* Otherwise we can rely on the MODE to determine how many registers
16961              are being used by this argument.  */
16962           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16963           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16964           if (arg_regs > 1)
16965             {
16966               unsigned HOST_WIDE_INT
16967               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16968               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16969               not_to_clear_mask |= mask;
16970             }
16971         }
16972     }
16973
16974   return not_to_clear_mask;
16975 }
16976
16977 /* Clears caller saved registers not used to pass arguments before a
16978    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16979    registers is done in __gnu_cmse_nonsecure_call libcall.
16980    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16981
16982 static void
16983 cmse_nonsecure_call_clear_caller_saved (void)
16984 {
16985   basic_block bb;
16986
16987   FOR_EACH_BB_FN (bb, cfun)
16988     {
16989       rtx_insn *insn;
16990
16991       FOR_BB_INSNS (bb, insn)
16992         {
16993           uint64_t to_clear_mask, float_mask;
16994           rtx_insn *seq;
16995           rtx pat, call, unspec, reg, cleared_reg, tmp;
16996           unsigned int regno, maxregno;
16997           rtx address;
16998           CUMULATIVE_ARGS args_so_far_v;
16999           cumulative_args_t args_so_far;
17000           tree arg_type, fntype;
17001           bool using_r4, first_param = true;
17002           function_args_iterator args_iter;
17003           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17004           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17005
17006           if (!NONDEBUG_INSN_P (insn))
17007             continue;
17008
17009           if (!CALL_P (insn))
17010             continue;
17011
17012           pat = PATTERN (insn);
17013           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17014           call = XVECEXP (pat, 0, 0);
17015
17016           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17017           if (GET_CODE (call) == SET)
17018               call = SET_SRC (call);
17019
17020           /* Check if it is a cmse_nonsecure_call.  */
17021           unspec = XEXP (call, 0);
17022           if (GET_CODE (unspec) != UNSPEC
17023               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17024             continue;
17025
17026           /* Determine the caller-saved registers we need to clear.  */
17027           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17028           maxregno = NUM_ARG_REGS - 1;
17029           /* Only look at the caller-saved floating point registers in case of
17030              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17031              lazy store and loads which clear both caller- and callee-saved
17032              registers.  */
17033           if (TARGET_HARD_FLOAT_ABI)
17034             {
17035               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17036               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17037               to_clear_mask |= float_mask;
17038               maxregno = D7_VFP_REGNUM;
17039             }
17040
17041           /* Make sure the register used to hold the function address is not
17042              cleared.  */
17043           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17044           gcc_assert (MEM_P (address));
17045           gcc_assert (REG_P (XEXP (address, 0)));
17046           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17047
17048           /* Set basic block of call insn so that df rescan is performed on
17049              insns inserted here.  */
17050           set_block_for_insn (insn, bb);
17051           df_set_flags (DF_DEFER_INSN_RESCAN);
17052           start_sequence ();
17053
17054           /* Make sure the scheduler doesn't schedule other insns beyond
17055              here.  */
17056           emit_insn (gen_blockage ());
17057
17058           /* Walk through all arguments and clear registers appropriately.
17059           */
17060           fntype = TREE_TYPE (MEM_EXPR (address));
17061           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17062                                     NULL_TREE);
17063           args_so_far = pack_cumulative_args (&args_so_far_v);
17064           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17065             {
17066               rtx arg_rtx;
17067               machine_mode arg_mode = TYPE_MODE (arg_type);
17068
17069               if (VOID_TYPE_P (arg_type))
17070                 continue;
17071
17072               if (!first_param)
17073                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17074                                           true);
17075
17076               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17077                                           true);
17078               gcc_assert (REG_P (arg_rtx));
17079               to_clear_mask
17080                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17081                                                REGNO (arg_rtx),
17082                                                padding_bits_to_clear_ptr);
17083
17084               first_param = false;
17085             }
17086
17087           /* Clear padding bits where needed.  */
17088           cleared_reg = XEXP (address, 0);
17089           reg = gen_rtx_REG (SImode, IP_REGNUM);
17090           using_r4 = false;
17091           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17092             {
17093               if (padding_bits_to_clear[regno] == 0)
17094                 continue;
17095
17096               /* If this is a Thumb-1 target copy the address of the function
17097                  we are calling from 'r4' into 'ip' such that we can use r4 to
17098                  clear the unused bits in the arguments.  */
17099               if (TARGET_THUMB1 && !using_r4)
17100                 {
17101                   using_r4 =  true;
17102                   reg = cleared_reg;
17103                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17104                                           reg);
17105                 }
17106
17107               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17108               emit_move_insn (reg, tmp);
17109               /* Also fill the top half of the negated
17110                  padding_bits_to_clear.  */
17111               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17112                 {
17113                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17114                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17115                                                                 GEN_INT (16),
17116                                                                 GEN_INT (16)),
17117                                           tmp));
17118                 }
17119
17120               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17121                                      gen_rtx_REG (SImode, regno),
17122                                      reg));
17123
17124             }
17125           if (using_r4)
17126             emit_move_insn (cleared_reg,
17127                             gen_rtx_REG (SImode, IP_REGNUM));
17128
17129           /* We use right shift and left shift to clear the LSB of the address
17130              we jump to instead of using bic, to avoid having to use an extra
17131              register on Thumb-1.  */
17132           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17133           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17134           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17135           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17136
17137           /* Clearing all registers that leak before doing a non-secure
17138              call.  */
17139           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17140             {
17141               if (!(to_clear_mask & (1LL << regno)))
17142                 continue;
17143
17144               /* If regno is an even vfp register and its successor is also to
17145                  be cleared, use vmov.  */
17146               if (IS_VFP_REGNUM (regno))
17147                 {
17148                   if (TARGET_VFP_DOUBLE
17149                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17150                       && to_clear_mask & (1LL << (regno + 1)))
17151                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17152                                     CONST0_RTX (DFmode));
17153                   else
17154                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17155                                     CONST0_RTX (SFmode));
17156                 }
17157               else
17158                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17159             }
17160
17161           seq = get_insns ();
17162           end_sequence ();
17163           emit_insn_before (seq, insn);
17164
17165         }
17166     }
17167 }
17168
17169 /* Rewrite move insn into subtract of 0 if the condition codes will
17170    be useful in next conditional jump insn.  */
17171
17172 static void
17173 thumb1_reorg (void)
17174 {
17175   basic_block bb;
17176
17177   FOR_EACH_BB_FN (bb, cfun)
17178     {
17179       rtx dest, src;
17180       rtx cmp, op0, op1, set = NULL;
17181       rtx_insn *prev, *insn = BB_END (bb);
17182       bool insn_clobbered = false;
17183
17184       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17185         insn = PREV_INSN (insn);
17186
17187       /* Find the last cbranchsi4_insn in basic block BB.  */
17188       if (insn == BB_HEAD (bb)
17189           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17190         continue;
17191
17192       /* Get the register with which we are comparing.  */
17193       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17194       op0 = XEXP (cmp, 0);
17195       op1 = XEXP (cmp, 1);
17196
17197       /* Check that comparison is against ZERO.  */
17198       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17199         continue;
17200
17201       /* Find the first flag setting insn before INSN in basic block BB.  */
17202       gcc_assert (insn != BB_HEAD (bb));
17203       for (prev = PREV_INSN (insn);
17204            (!insn_clobbered
17205             && prev != BB_HEAD (bb)
17206             && (NOTE_P (prev)
17207                 || DEBUG_INSN_P (prev)
17208                 || ((set = single_set (prev)) != NULL
17209                     && get_attr_conds (prev) == CONDS_NOCOND)));
17210            prev = PREV_INSN (prev))
17211         {
17212           if (reg_set_p (op0, prev))
17213             insn_clobbered = true;
17214         }
17215
17216       /* Skip if op0 is clobbered by insn other than prev. */
17217       if (insn_clobbered)
17218         continue;
17219
17220       if (!set)
17221         continue;
17222
17223       dest = SET_DEST (set);
17224       src = SET_SRC (set);
17225       if (!low_register_operand (dest, SImode)
17226           || !low_register_operand (src, SImode))
17227         continue;
17228
17229       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17230          in INSN.  Both src and dest of the move insn are checked.  */
17231       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17232         {
17233           dest = copy_rtx (dest);
17234           src = copy_rtx (src);
17235           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17236           PATTERN (prev) = gen_rtx_SET (dest, src);
17237           INSN_CODE (prev) = -1;
17238           /* Set test register in INSN to dest.  */
17239           XEXP (cmp, 0) = copy_rtx (dest);
17240           INSN_CODE (insn) = -1;
17241         }
17242     }
17243 }
17244
17245 /* Convert instructions to their cc-clobbering variant if possible, since
17246    that allows us to use smaller encodings.  */
17247
17248 static void
17249 thumb2_reorg (void)
17250 {
17251   basic_block bb;
17252   regset_head live;
17253
17254   INIT_REG_SET (&live);
17255
17256   /* We are freeing block_for_insn in the toplev to keep compatibility
17257      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17258   compute_bb_for_insn ();
17259   df_analyze ();
17260
17261   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17262
17263   FOR_EACH_BB_FN (bb, cfun)
17264     {
17265       if ((current_tune->disparage_flag_setting_t16_encodings
17266            == tune_params::DISPARAGE_FLAGS_ALL)
17267           && optimize_bb_for_speed_p (bb))
17268         continue;
17269
17270       rtx_insn *insn;
17271       Convert_Action action = SKIP;
17272       Convert_Action action_for_partial_flag_setting
17273         = ((current_tune->disparage_flag_setting_t16_encodings
17274             != tune_params::DISPARAGE_FLAGS_NEITHER)
17275            && optimize_bb_for_speed_p (bb))
17276           ? SKIP : CONV;
17277
17278       COPY_REG_SET (&live, DF_LR_OUT (bb));
17279       df_simulate_initialize_backwards (bb, &live);
17280       FOR_BB_INSNS_REVERSE (bb, insn)
17281         {
17282           if (NONJUMP_INSN_P (insn)
17283               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17284               && GET_CODE (PATTERN (insn)) == SET)
17285             {
17286               action = SKIP;
17287               rtx pat = PATTERN (insn);
17288               rtx dst = XEXP (pat, 0);
17289               rtx src = XEXP (pat, 1);
17290               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17291
17292               if (UNARY_P (src) || BINARY_P (src))
17293                   op0 = XEXP (src, 0);
17294
17295               if (BINARY_P (src))
17296                   op1 = XEXP (src, 1);
17297
17298               if (low_register_operand (dst, SImode))
17299                 {
17300                   switch (GET_CODE (src))
17301                     {
17302                     case PLUS:
17303                       /* Adding two registers and storing the result
17304                          in the first source is already a 16-bit
17305                          operation.  */
17306                       if (rtx_equal_p (dst, op0)
17307                           && register_operand (op1, SImode))
17308                         break;
17309
17310                       if (low_register_operand (op0, SImode))
17311                         {
17312                           /* ADDS <Rd>,<Rn>,<Rm>  */
17313                           if (low_register_operand (op1, SImode))
17314                             action = CONV;
17315                           /* ADDS <Rdn>,#<imm8>  */
17316                           /* SUBS <Rdn>,#<imm8>  */
17317                           else if (rtx_equal_p (dst, op0)
17318                                    && CONST_INT_P (op1)
17319                                    && IN_RANGE (INTVAL (op1), -255, 255))
17320                             action = CONV;
17321                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17322                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17323                           else if (CONST_INT_P (op1)
17324                                    && IN_RANGE (INTVAL (op1), -7, 7))
17325                             action = CONV;
17326                         }
17327                       /* ADCS <Rd>, <Rn>  */
17328                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17329                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17330                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17331                                                        SImode)
17332                               && COMPARISON_P (op1)
17333                               && cc_register (XEXP (op1, 0), VOIDmode)
17334                               && maybe_get_arm_condition_code (op1) == ARM_CS
17335                               && XEXP (op1, 1) == const0_rtx)
17336                         action = CONV;
17337                       break;
17338
17339                     case MINUS:
17340                       /* RSBS <Rd>,<Rn>,#0
17341                          Not handled here: see NEG below.  */
17342                       /* SUBS <Rd>,<Rn>,#<imm3>
17343                          SUBS <Rdn>,#<imm8>
17344                          Not handled here: see PLUS above.  */
17345                       /* SUBS <Rd>,<Rn>,<Rm>  */
17346                       if (low_register_operand (op0, SImode)
17347                           && low_register_operand (op1, SImode))
17348                             action = CONV;
17349                       break;
17350
17351                     case MULT:
17352                       /* MULS <Rdm>,<Rn>,<Rdm>
17353                          As an exception to the rule, this is only used
17354                          when optimizing for size since MULS is slow on all
17355                          known implementations.  We do not even want to use
17356                          MULS in cold code, if optimizing for speed, so we
17357                          test the global flag here.  */
17358                       if (!optimize_size)
17359                         break;
17360                       /* Fall through.  */
17361                     case AND:
17362                     case IOR:
17363                     case XOR:
17364                       /* ANDS <Rdn>,<Rm>  */
17365                       if (rtx_equal_p (dst, op0)
17366                           && low_register_operand (op1, SImode))
17367                         action = action_for_partial_flag_setting;
17368                       else if (rtx_equal_p (dst, op1)
17369                                && low_register_operand (op0, SImode))
17370                         action = action_for_partial_flag_setting == SKIP
17371                                  ? SKIP : SWAP_CONV;
17372                       break;
17373
17374                     case ASHIFTRT:
17375                     case ASHIFT:
17376                     case LSHIFTRT:
17377                       /* ASRS <Rdn>,<Rm> */
17378                       /* LSRS <Rdn>,<Rm> */
17379                       /* LSLS <Rdn>,<Rm> */
17380                       if (rtx_equal_p (dst, op0)
17381                           && low_register_operand (op1, SImode))
17382                         action = action_for_partial_flag_setting;
17383                       /* ASRS <Rd>,<Rm>,#<imm5> */
17384                       /* LSRS <Rd>,<Rm>,#<imm5> */
17385                       /* LSLS <Rd>,<Rm>,#<imm5> */
17386                       else if (low_register_operand (op0, SImode)
17387                                && CONST_INT_P (op1)
17388                                && IN_RANGE (INTVAL (op1), 0, 31))
17389                         action = action_for_partial_flag_setting;
17390                       break;
17391
17392                     case ROTATERT:
17393                       /* RORS <Rdn>,<Rm>  */
17394                       if (rtx_equal_p (dst, op0)
17395                           && low_register_operand (op1, SImode))
17396                         action = action_for_partial_flag_setting;
17397                       break;
17398
17399                     case NOT:
17400                       /* MVNS <Rd>,<Rm>  */
17401                       if (low_register_operand (op0, SImode))
17402                         action = action_for_partial_flag_setting;
17403                       break;
17404
17405                     case NEG:
17406                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17407                       if (low_register_operand (op0, SImode))
17408                         action = CONV;
17409                       break;
17410
17411                     case CONST_INT:
17412                       /* MOVS <Rd>,#<imm8>  */
17413                       if (CONST_INT_P (src)
17414                           && IN_RANGE (INTVAL (src), 0, 255))
17415                         action = action_for_partial_flag_setting;
17416                       break;
17417
17418                     case REG:
17419                       /* MOVS and MOV<c> with registers have different
17420                          encodings, so are not relevant here.  */
17421                       break;
17422
17423                     default:
17424                       break;
17425                     }
17426                 }
17427
17428               if (action != SKIP)
17429                 {
17430                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17431                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17432                   rtvec vec;
17433
17434                   if (action == SWAP_CONV)
17435                     {
17436                       src = copy_rtx (src);
17437                       XEXP (src, 0) = op1;
17438                       XEXP (src, 1) = op0;
17439                       pat = gen_rtx_SET (dst, src);
17440                       vec = gen_rtvec (2, pat, clobber);
17441                     }
17442                   else /* action == CONV */
17443                     vec = gen_rtvec (2, pat, clobber);
17444
17445                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17446                   INSN_CODE (insn) = -1;
17447                 }
17448             }
17449
17450           if (NONDEBUG_INSN_P (insn))
17451             df_simulate_one_insn_backwards (bb, insn, &live);
17452         }
17453     }
17454
17455   CLEAR_REG_SET (&live);
17456 }
17457
17458 /* Gcc puts the pool in the wrong place for ARM, since we can only
17459    load addresses a limited distance around the pc.  We do some
17460    special munging to move the constant pool values to the correct
17461    point in the code.  */
17462 static void
17463 arm_reorg (void)
17464 {
17465   rtx_insn *insn;
17466   HOST_WIDE_INT address = 0;
17467   Mfix * fix;
17468
17469   if (use_cmse)
17470     cmse_nonsecure_call_clear_caller_saved ();
17471   if (TARGET_THUMB1)
17472     thumb1_reorg ();
17473   else if (TARGET_THUMB2)
17474     thumb2_reorg ();
17475
17476   /* Ensure all insns that must be split have been split at this point.
17477      Otherwise, the pool placement code below may compute incorrect
17478      insn lengths.  Note that when optimizing, all insns have already
17479      been split at this point.  */
17480   if (!optimize)
17481     split_all_insns_noflow ();
17482
17483   /* Make sure we do not attempt to create a literal pool even though it should
17484      no longer be necessary to create any.  */
17485   if (arm_disable_literal_pool)
17486     return ;
17487
17488   minipool_fix_head = minipool_fix_tail = NULL;
17489
17490   /* The first insn must always be a note, or the code below won't
17491      scan it properly.  */
17492   insn = get_insns ();
17493   gcc_assert (NOTE_P (insn));
17494   minipool_pad = 0;
17495
17496   /* Scan all the insns and record the operands that will need fixing.  */
17497   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17498     {
17499       if (BARRIER_P (insn))
17500         push_minipool_barrier (insn, address);
17501       else if (INSN_P (insn))
17502         {
17503           rtx_jump_table_data *table;
17504
17505           note_invalid_constants (insn, address, true);
17506           address += get_attr_length (insn);
17507
17508           /* If the insn is a vector jump, add the size of the table
17509              and skip the table.  */
17510           if (tablejump_p (insn, NULL, &table))
17511             {
17512               address += get_jump_table_size (table);
17513               insn = table;
17514             }
17515         }
17516       else if (LABEL_P (insn))
17517         /* Add the worst-case padding due to alignment.  We don't add
17518            the _current_ padding because the minipool insertions
17519            themselves might change it.  */
17520         address += get_label_padding (insn);
17521     }
17522
17523   fix = minipool_fix_head;
17524
17525   /* Now scan the fixups and perform the required changes.  */
17526   while (fix)
17527     {
17528       Mfix * ftmp;
17529       Mfix * fdel;
17530       Mfix *  last_added_fix;
17531       Mfix * last_barrier = NULL;
17532       Mfix * this_fix;
17533
17534       /* Skip any further barriers before the next fix.  */
17535       while (fix && BARRIER_P (fix->insn))
17536         fix = fix->next;
17537
17538       /* No more fixes.  */
17539       if (fix == NULL)
17540         break;
17541
17542       last_added_fix = NULL;
17543
17544       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17545         {
17546           if (BARRIER_P (ftmp->insn))
17547             {
17548               if (ftmp->address >= minipool_vector_head->max_address)
17549                 break;
17550
17551               last_barrier = ftmp;
17552             }
17553           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17554             break;
17555
17556           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17557         }
17558
17559       /* If we found a barrier, drop back to that; any fixes that we
17560          could have reached but come after the barrier will now go in
17561          the next mini-pool.  */
17562       if (last_barrier != NULL)
17563         {
17564           /* Reduce the refcount for those fixes that won't go into this
17565              pool after all.  */
17566           for (fdel = last_barrier->next;
17567                fdel && fdel != ftmp;
17568                fdel = fdel->next)
17569             {
17570               fdel->minipool->refcount--;
17571               fdel->minipool = NULL;
17572             }
17573
17574           ftmp = last_barrier;
17575         }
17576       else
17577         {
17578           /* ftmp is first fix that we can't fit into this pool and
17579              there no natural barriers that we could use.  Insert a
17580              new barrier in the code somewhere between the previous
17581              fix and this one, and arrange to jump around it.  */
17582           HOST_WIDE_INT max_address;
17583
17584           /* The last item on the list of fixes must be a barrier, so
17585              we can never run off the end of the list of fixes without
17586              last_barrier being set.  */
17587           gcc_assert (ftmp);
17588
17589           max_address = minipool_vector_head->max_address;
17590           /* Check that there isn't another fix that is in range that
17591              we couldn't fit into this pool because the pool was
17592              already too large: we need to put the pool before such an
17593              instruction.  The pool itself may come just after the
17594              fix because create_fix_barrier also allows space for a
17595              jump instruction.  */
17596           if (ftmp->address < max_address)
17597             max_address = ftmp->address + 1;
17598
17599           last_barrier = create_fix_barrier (last_added_fix, max_address);
17600         }
17601
17602       assign_minipool_offsets (last_barrier);
17603
17604       while (ftmp)
17605         {
17606           if (!BARRIER_P (ftmp->insn)
17607               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17608                   == NULL))
17609             break;
17610
17611           ftmp = ftmp->next;
17612         }
17613
17614       /* Scan over the fixes we have identified for this pool, fixing them
17615          up and adding the constants to the pool itself.  */
17616       for (this_fix = fix; this_fix && ftmp != this_fix;
17617            this_fix = this_fix->next)
17618         if (!BARRIER_P (this_fix->insn))
17619           {
17620             rtx addr
17621               = plus_constant (Pmode,
17622                                gen_rtx_LABEL_REF (VOIDmode,
17623                                                   minipool_vector_label),
17624                                this_fix->minipool->offset);
17625             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17626           }
17627
17628       dump_minipool (last_barrier->insn);
17629       fix = ftmp;
17630     }
17631
17632   /* From now on we must synthesize any constants that we can't handle
17633      directly.  This can happen if the RTL gets split during final
17634      instruction generation.  */
17635   cfun->machine->after_arm_reorg = 1;
17636
17637   /* Free the minipool memory.  */
17638   obstack_free (&minipool_obstack, minipool_startobj);
17639 }
17640 \f
17641 /* Routines to output assembly language.  */
17642
17643 /* Return string representation of passed in real value.  */
17644 static const char *
17645 fp_const_from_val (REAL_VALUE_TYPE *r)
17646 {
17647   if (!fp_consts_inited)
17648     init_fp_table ();
17649
17650   gcc_assert (real_equal (r, &value_fp0));
17651   return "0";
17652 }
17653
17654 /* OPERANDS[0] is the entire list of insns that constitute pop,
17655    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17656    is in the list, UPDATE is true iff the list contains explicit
17657    update of base register.  */
17658 void
17659 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17660                          bool update)
17661 {
17662   int i;
17663   char pattern[100];
17664   int offset;
17665   const char *conditional;
17666   int num_saves = XVECLEN (operands[0], 0);
17667   unsigned int regno;
17668   unsigned int regno_base = REGNO (operands[1]);
17669   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17670
17671   offset = 0;
17672   offset += update ? 1 : 0;
17673   offset += return_pc ? 1 : 0;
17674
17675   /* Is the base register in the list?  */
17676   for (i = offset; i < num_saves; i++)
17677     {
17678       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17679       /* If SP is in the list, then the base register must be SP.  */
17680       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17681       /* If base register is in the list, there must be no explicit update.  */
17682       if (regno == regno_base)
17683         gcc_assert (!update);
17684     }
17685
17686   conditional = reverse ? "%?%D0" : "%?%d0";
17687   /* Can't use POP if returning from an interrupt.  */
17688   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17689     sprintf (pattern, "pop%s\t{", conditional);
17690   else
17691     {
17692       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17693          It's just a convention, their semantics are identical.  */
17694       if (regno_base == SP_REGNUM)
17695         sprintf (pattern, "ldmfd%s\t", conditional);
17696       else if (update)
17697         sprintf (pattern, "ldmia%s\t", conditional);
17698       else
17699         sprintf (pattern, "ldm%s\t", conditional);
17700
17701       strcat (pattern, reg_names[regno_base]);
17702       if (update)
17703         strcat (pattern, "!, {");
17704       else
17705         strcat (pattern, ", {");
17706     }
17707
17708   /* Output the first destination register.  */
17709   strcat (pattern,
17710           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17711
17712   /* Output the rest of the destination registers.  */
17713   for (i = offset + 1; i < num_saves; i++)
17714     {
17715       strcat (pattern, ", ");
17716       strcat (pattern,
17717               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17718     }
17719
17720   strcat (pattern, "}");
17721
17722   if (interrupt_p && return_pc)
17723     strcat (pattern, "^");
17724
17725   output_asm_insn (pattern, &cond);
17726 }
17727
17728
17729 /* Output the assembly for a store multiple.  */
17730
17731 const char *
17732 vfp_output_vstmd (rtx * operands)
17733 {
17734   char pattern[100];
17735   int p;
17736   int base;
17737   int i;
17738   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17739                    ? XEXP (operands[0], 0)
17740                    : XEXP (XEXP (operands[0], 0), 0);
17741   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17742
17743   if (push_p)
17744     strcpy (pattern, "vpush%?.64\t{%P1");
17745   else
17746     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17747
17748   p = strlen (pattern);
17749
17750   gcc_assert (REG_P (operands[1]));
17751
17752   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17753   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17754     {
17755       p += sprintf (&pattern[p], ", d%d", base + i);
17756     }
17757   strcpy (&pattern[p], "}");
17758
17759   output_asm_insn (pattern, operands);
17760   return "";
17761 }
17762
17763
17764 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17765    number of bytes pushed.  */
17766
17767 static int
17768 vfp_emit_fstmd (int base_reg, int count)
17769 {
17770   rtx par;
17771   rtx dwarf;
17772   rtx tmp, reg;
17773   int i;
17774
17775   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17776      register pairs are stored by a store multiple insn.  We avoid this
17777      by pushing an extra pair.  */
17778   if (count == 2 && !arm_arch6)
17779     {
17780       if (base_reg == LAST_VFP_REGNUM - 3)
17781         base_reg -= 2;
17782       count++;
17783     }
17784
17785   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17786      larger stores into multiple parts (up to a maximum of two, in
17787      practice).  */
17788   if (count > 16)
17789     {
17790       int saved;
17791       /* NOTE: base_reg is an internal register number, so each D register
17792          counts as 2.  */
17793       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17794       saved += vfp_emit_fstmd (base_reg, 16);
17795       return saved;
17796     }
17797
17798   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17799   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17800
17801   reg = gen_rtx_REG (DFmode, base_reg);
17802   base_reg += 2;
17803
17804   XVECEXP (par, 0, 0)
17805     = gen_rtx_SET (gen_frame_mem
17806                    (BLKmode,
17807                     gen_rtx_PRE_MODIFY (Pmode,
17808                                         stack_pointer_rtx,
17809                                         plus_constant
17810                                         (Pmode, stack_pointer_rtx,
17811                                          - (count * 8)))
17812                     ),
17813                    gen_rtx_UNSPEC (BLKmode,
17814                                    gen_rtvec (1, reg),
17815                                    UNSPEC_PUSH_MULT));
17816
17817   tmp = gen_rtx_SET (stack_pointer_rtx,
17818                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17819   RTX_FRAME_RELATED_P (tmp) = 1;
17820   XVECEXP (dwarf, 0, 0) = tmp;
17821
17822   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17823   RTX_FRAME_RELATED_P (tmp) = 1;
17824   XVECEXP (dwarf, 0, 1) = tmp;
17825
17826   for (i = 1; i < count; i++)
17827     {
17828       reg = gen_rtx_REG (DFmode, base_reg);
17829       base_reg += 2;
17830       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17831
17832       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17833                                         plus_constant (Pmode,
17834                                                        stack_pointer_rtx,
17835                                                        i * 8)),
17836                          reg);
17837       RTX_FRAME_RELATED_P (tmp) = 1;
17838       XVECEXP (dwarf, 0, i + 1) = tmp;
17839     }
17840
17841   par = emit_insn (par);
17842   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17843   RTX_FRAME_RELATED_P (par) = 1;
17844
17845   return count * 8;
17846 }
17847
17848 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17849    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17850
17851 bool
17852 detect_cmse_nonsecure_call (tree addr)
17853 {
17854   if (!addr)
17855     return FALSE;
17856
17857   tree fntype = TREE_TYPE (addr);
17858   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17859                                     TYPE_ATTRIBUTES (fntype)))
17860     return TRUE;
17861   return FALSE;
17862 }
17863
17864
17865 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17866    the call target.  */
17867
17868 void
17869 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17870 {
17871   rtx insn;
17872
17873   insn = emit_call_insn (pat);
17874
17875   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17876      If the call might use such an entry, add a use of the PIC register
17877      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17878   if (TARGET_VXWORKS_RTP
17879       && flag_pic
17880       && !sibcall
17881       && GET_CODE (addr) == SYMBOL_REF
17882       && (SYMBOL_REF_DECL (addr)
17883           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17884           : !SYMBOL_REF_LOCAL_P (addr)))
17885     {
17886       require_pic_register ();
17887       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17888     }
17889
17890   if (TARGET_AAPCS_BASED)
17891     {
17892       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17893          linker.  We need to add an IP clobber to allow setting
17894          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17895          is not needed since it's a fixed register.  */
17896       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17897       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17898     }
17899 }
17900
17901 /* Output a 'call' insn.  */
17902 const char *
17903 output_call (rtx *operands)
17904 {
17905   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17906
17907   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17908   if (REGNO (operands[0]) == LR_REGNUM)
17909     {
17910       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17911       output_asm_insn ("mov%?\t%0, %|lr", operands);
17912     }
17913
17914   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17915
17916   if (TARGET_INTERWORK || arm_arch4t)
17917     output_asm_insn ("bx%?\t%0", operands);
17918   else
17919     output_asm_insn ("mov%?\t%|pc, %0", operands);
17920
17921   return "";
17922 }
17923
17924 /* Output a move from arm registers to arm registers of a long double
17925    OPERANDS[0] is the destination.
17926    OPERANDS[1] is the source.  */
17927 const char *
17928 output_mov_long_double_arm_from_arm (rtx *operands)
17929 {
17930   /* We have to be careful here because the two might overlap.  */
17931   int dest_start = REGNO (operands[0]);
17932   int src_start = REGNO (operands[1]);
17933   rtx ops[2];
17934   int i;
17935
17936   if (dest_start < src_start)
17937     {
17938       for (i = 0; i < 3; i++)
17939         {
17940           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17941           ops[1] = gen_rtx_REG (SImode, src_start + i);
17942           output_asm_insn ("mov%?\t%0, %1", ops);
17943         }
17944     }
17945   else
17946     {
17947       for (i = 2; i >= 0; i--)
17948         {
17949           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17950           ops[1] = gen_rtx_REG (SImode, src_start + i);
17951           output_asm_insn ("mov%?\t%0, %1", ops);
17952         }
17953     }
17954
17955   return "";
17956 }
17957
17958 void
17959 arm_emit_movpair (rtx dest, rtx src)
17960  {
17961   /* If the src is an immediate, simplify it.  */
17962   if (CONST_INT_P (src))
17963     {
17964       HOST_WIDE_INT val = INTVAL (src);
17965       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17966       if ((val >> 16) & 0x0000ffff)
17967         {
17968           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17969                                                GEN_INT (16)),
17970                          GEN_INT ((val >> 16) & 0x0000ffff));
17971           rtx_insn *insn = get_last_insn ();
17972           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17973         }
17974       return;
17975     }
17976    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17977    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17978    rtx_insn *insn = get_last_insn ();
17979    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17980  }
17981
17982 /* Output a move between double words.  It must be REG<-MEM
17983    or MEM<-REG.  */
17984 const char *
17985 output_move_double (rtx *operands, bool emit, int *count)
17986 {
17987   enum rtx_code code0 = GET_CODE (operands[0]);
17988   enum rtx_code code1 = GET_CODE (operands[1]);
17989   rtx otherops[3];
17990   if (count)
17991     *count = 1;
17992
17993   /* The only case when this might happen is when
17994      you are looking at the length of a DImode instruction
17995      that has an invalid constant in it.  */
17996   if (code0 == REG && code1 != MEM)
17997     {
17998       gcc_assert (!emit);
17999       *count = 2;
18000       return "";
18001     }
18002
18003   if (code0 == REG)
18004     {
18005       unsigned int reg0 = REGNO (operands[0]);
18006
18007       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18008
18009       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18010
18011       switch (GET_CODE (XEXP (operands[1], 0)))
18012         {
18013         case REG:
18014
18015           if (emit)
18016             {
18017               if (TARGET_LDRD
18018                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18019                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18020               else
18021                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18022             }
18023           break;
18024
18025         case PRE_INC:
18026           gcc_assert (TARGET_LDRD);
18027           if (emit)
18028             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18029           break;
18030
18031         case PRE_DEC:
18032           if (emit)
18033             {
18034               if (TARGET_LDRD)
18035                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18036               else
18037                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18038             }
18039           break;
18040
18041         case POST_INC:
18042           if (emit)
18043             {
18044               if (TARGET_LDRD)
18045                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18046               else
18047                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18048             }
18049           break;
18050
18051         case POST_DEC:
18052           gcc_assert (TARGET_LDRD);
18053           if (emit)
18054             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18055           break;
18056
18057         case PRE_MODIFY:
18058         case POST_MODIFY:
18059           /* Autoicrement addressing modes should never have overlapping
18060              base and destination registers, and overlapping index registers
18061              are already prohibited, so this doesn't need to worry about
18062              fix_cm3_ldrd.  */
18063           otherops[0] = operands[0];
18064           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18065           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18066
18067           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18068             {
18069               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18070                 {
18071                   /* Registers overlap so split out the increment.  */
18072                   if (emit)
18073                     {
18074                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18075                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18076                     }
18077                   if (count)
18078                     *count = 2;
18079                 }
18080               else
18081                 {
18082                   /* Use a single insn if we can.
18083                      FIXME: IWMMXT allows offsets larger than ldrd can
18084                      handle, fix these up with a pair of ldr.  */
18085                   if (TARGET_THUMB2
18086                       || !CONST_INT_P (otherops[2])
18087                       || (INTVAL (otherops[2]) > -256
18088                           && INTVAL (otherops[2]) < 256))
18089                     {
18090                       if (emit)
18091                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18092                     }
18093                   else
18094                     {
18095                       if (emit)
18096                         {
18097                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18098                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18099                         }
18100                       if (count)
18101                         *count = 2;
18102
18103                     }
18104                 }
18105             }
18106           else
18107             {
18108               /* Use a single insn if we can.
18109                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18110                  fix these up with a pair of ldr.  */
18111               if (TARGET_THUMB2
18112                   || !CONST_INT_P (otherops[2])
18113                   || (INTVAL (otherops[2]) > -256
18114                       && INTVAL (otherops[2]) < 256))
18115                 {
18116                   if (emit)
18117                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18118                 }
18119               else
18120                 {
18121                   if (emit)
18122                     {
18123                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18124                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18125                     }
18126                   if (count)
18127                     *count = 2;
18128                 }
18129             }
18130           break;
18131
18132         case LABEL_REF:
18133         case CONST:
18134           /* We might be able to use ldrd %0, %1 here.  However the range is
18135              different to ldr/adr, and it is broken on some ARMv7-M
18136              implementations.  */
18137           /* Use the second register of the pair to avoid problematic
18138              overlap.  */
18139           otherops[1] = operands[1];
18140           if (emit)
18141             output_asm_insn ("adr%?\t%0, %1", otherops);
18142           operands[1] = otherops[0];
18143           if (emit)
18144             {
18145               if (TARGET_LDRD)
18146                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18147               else
18148                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18149             }
18150
18151           if (count)
18152             *count = 2;
18153           break;
18154
18155           /* ??? This needs checking for thumb2.  */
18156         default:
18157           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18158                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18159             {
18160               otherops[0] = operands[0];
18161               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18162               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18163
18164               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18165                 {
18166                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18167                     {
18168                       switch ((int) INTVAL (otherops[2]))
18169                         {
18170                         case -8:
18171                           if (emit)
18172                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18173                           return "";
18174                         case -4:
18175                           if (TARGET_THUMB2)
18176                             break;
18177                           if (emit)
18178                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18179                           return "";
18180                         case 4:
18181                           if (TARGET_THUMB2)
18182                             break;
18183                           if (emit)
18184                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18185                           return "";
18186                         }
18187                     }
18188                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18189                   operands[1] = otherops[0];
18190                   if (TARGET_LDRD
18191                       && (REG_P (otherops[2])
18192                           || TARGET_THUMB2
18193                           || (CONST_INT_P (otherops[2])
18194                               && INTVAL (otherops[2]) > -256
18195                               && INTVAL (otherops[2]) < 256)))
18196                     {
18197                       if (reg_overlap_mentioned_p (operands[0],
18198                                                    otherops[2]))
18199                         {
18200                           /* Swap base and index registers over to
18201                              avoid a conflict.  */
18202                           std::swap (otherops[1], otherops[2]);
18203                         }
18204                       /* If both registers conflict, it will usually
18205                          have been fixed by a splitter.  */
18206                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18207                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18208                         {
18209                           if (emit)
18210                             {
18211                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18212                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18213                             }
18214                           if (count)
18215                             *count = 2;
18216                         }
18217                       else
18218                         {
18219                           otherops[0] = operands[0];
18220                           if (emit)
18221                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18222                         }
18223                       return "";
18224                     }
18225
18226                   if (CONST_INT_P (otherops[2]))
18227                     {
18228                       if (emit)
18229                         {
18230                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18231                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18232                           else
18233                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18234                         }
18235                     }
18236                   else
18237                     {
18238                       if (emit)
18239                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18240                     }
18241                 }
18242               else
18243                 {
18244                   if (emit)
18245                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18246                 }
18247
18248               if (count)
18249                 *count = 2;
18250
18251               if (TARGET_LDRD)
18252                 return "ldrd%?\t%0, [%1]";
18253
18254               return "ldmia%?\t%1, %M0";
18255             }
18256           else
18257             {
18258               otherops[1] = adjust_address (operands[1], SImode, 4);
18259               /* Take care of overlapping base/data reg.  */
18260               if (reg_mentioned_p (operands[0], operands[1]))
18261                 {
18262                   if (emit)
18263                     {
18264                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18265                       output_asm_insn ("ldr%?\t%0, %1", operands);
18266                     }
18267                   if (count)
18268                     *count = 2;
18269
18270                 }
18271               else
18272                 {
18273                   if (emit)
18274                     {
18275                       output_asm_insn ("ldr%?\t%0, %1", operands);
18276                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18277                     }
18278                   if (count)
18279                     *count = 2;
18280                 }
18281             }
18282         }
18283     }
18284   else
18285     {
18286       /* Constraints should ensure this.  */
18287       gcc_assert (code0 == MEM && code1 == REG);
18288       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18289                   || (TARGET_ARM && TARGET_LDRD));
18290
18291       switch (GET_CODE (XEXP (operands[0], 0)))
18292         {
18293         case REG:
18294           if (emit)
18295             {
18296               if (TARGET_LDRD)
18297                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18298               else
18299                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18300             }
18301           break;
18302
18303         case PRE_INC:
18304           gcc_assert (TARGET_LDRD);
18305           if (emit)
18306             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18307           break;
18308
18309         case PRE_DEC:
18310           if (emit)
18311             {
18312               if (TARGET_LDRD)
18313                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18314               else
18315                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18316             }
18317           break;
18318
18319         case POST_INC:
18320           if (emit)
18321             {
18322               if (TARGET_LDRD)
18323                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18324               else
18325                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18326             }
18327           break;
18328
18329         case POST_DEC:
18330           gcc_assert (TARGET_LDRD);
18331           if (emit)
18332             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18333           break;
18334
18335         case PRE_MODIFY:
18336         case POST_MODIFY:
18337           otherops[0] = operands[1];
18338           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18339           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18340
18341           /* IWMMXT allows offsets larger than ldrd can handle,
18342              fix these up with a pair of ldr.  */
18343           if (!TARGET_THUMB2
18344               && CONST_INT_P (otherops[2])
18345               && (INTVAL(otherops[2]) <= -256
18346                   || INTVAL(otherops[2]) >= 256))
18347             {
18348               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18349                 {
18350                   if (emit)
18351                     {
18352                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18353                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18354                     }
18355                   if (count)
18356                     *count = 2;
18357                 }
18358               else
18359                 {
18360                   if (emit)
18361                     {
18362                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18363                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18364                     }
18365                   if (count)
18366                     *count = 2;
18367                 }
18368             }
18369           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18370             {
18371               if (emit)
18372                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18373             }
18374           else
18375             {
18376               if (emit)
18377                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18378             }
18379           break;
18380
18381         case PLUS:
18382           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18383           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18384             {
18385               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18386                 {
18387                 case -8:
18388                   if (emit)
18389                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18390                   return "";
18391
18392                 case -4:
18393                   if (TARGET_THUMB2)
18394                     break;
18395                   if (emit)
18396                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18397                   return "";
18398
18399                 case 4:
18400                   if (TARGET_THUMB2)
18401                     break;
18402                   if (emit)
18403                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18404                   return "";
18405                 }
18406             }
18407           if (TARGET_LDRD
18408               && (REG_P (otherops[2])
18409                   || TARGET_THUMB2
18410                   || (CONST_INT_P (otherops[2])
18411                       && INTVAL (otherops[2]) > -256
18412                       && INTVAL (otherops[2]) < 256)))
18413             {
18414               otherops[0] = operands[1];
18415               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18416               if (emit)
18417                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18418               return "";
18419             }
18420           /* Fall through */
18421
18422         default:
18423           otherops[0] = adjust_address (operands[0], SImode, 4);
18424           otherops[1] = operands[1];
18425           if (emit)
18426             {
18427               output_asm_insn ("str%?\t%1, %0", operands);
18428               output_asm_insn ("str%?\t%H1, %0", otherops);
18429             }
18430           if (count)
18431             *count = 2;
18432         }
18433     }
18434
18435   return "";
18436 }
18437
18438 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18439    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18440
18441 const char *
18442 output_move_quad (rtx *operands)
18443 {
18444   if (REG_P (operands[0]))
18445     {
18446       /* Load, or reg->reg move.  */
18447
18448       if (MEM_P (operands[1]))
18449         {
18450           switch (GET_CODE (XEXP (operands[1], 0)))
18451             {
18452             case REG:
18453               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18454               break;
18455
18456             case LABEL_REF:
18457             case CONST:
18458               output_asm_insn ("adr%?\t%0, %1", operands);
18459               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18460               break;
18461
18462             default:
18463               gcc_unreachable ();
18464             }
18465         }
18466       else
18467         {
18468           rtx ops[2];
18469           int dest, src, i;
18470
18471           gcc_assert (REG_P (operands[1]));
18472
18473           dest = REGNO (operands[0]);
18474           src = REGNO (operands[1]);
18475
18476           /* This seems pretty dumb, but hopefully GCC won't try to do it
18477              very often.  */
18478           if (dest < src)
18479             for (i = 0; i < 4; i++)
18480               {
18481                 ops[0] = gen_rtx_REG (SImode, dest + i);
18482                 ops[1] = gen_rtx_REG (SImode, src + i);
18483                 output_asm_insn ("mov%?\t%0, %1", ops);
18484               }
18485           else
18486             for (i = 3; i >= 0; i--)
18487               {
18488                 ops[0] = gen_rtx_REG (SImode, dest + i);
18489                 ops[1] = gen_rtx_REG (SImode, src + i);
18490                 output_asm_insn ("mov%?\t%0, %1", ops);
18491               }
18492         }
18493     }
18494   else
18495     {
18496       gcc_assert (MEM_P (operands[0]));
18497       gcc_assert (REG_P (operands[1]));
18498       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18499
18500       switch (GET_CODE (XEXP (operands[0], 0)))
18501         {
18502         case REG:
18503           output_asm_insn ("stm%?\t%m0, %M1", operands);
18504           break;
18505
18506         default:
18507           gcc_unreachable ();
18508         }
18509     }
18510
18511   return "";
18512 }
18513
18514 /* Output a VFP load or store instruction.  */
18515
18516 const char *
18517 output_move_vfp (rtx *operands)
18518 {
18519   rtx reg, mem, addr, ops[2];
18520   int load = REG_P (operands[0]);
18521   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18522   int sp = (!TARGET_VFP_FP16INST
18523             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18524   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18525   const char *templ;
18526   char buff[50];
18527   machine_mode mode;
18528
18529   reg = operands[!load];
18530   mem = operands[load];
18531
18532   mode = GET_MODE (reg);
18533
18534   gcc_assert (REG_P (reg));
18535   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18536   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18537               || mode == SFmode
18538               || mode == DFmode
18539               || mode == HImode
18540               || mode == SImode
18541               || mode == DImode
18542               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18543   gcc_assert (MEM_P (mem));
18544
18545   addr = XEXP (mem, 0);
18546
18547   switch (GET_CODE (addr))
18548     {
18549     case PRE_DEC:
18550       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18551       ops[0] = XEXP (addr, 0);
18552       ops[1] = reg;
18553       break;
18554
18555     case POST_INC:
18556       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18557       ops[0] = XEXP (addr, 0);
18558       ops[1] = reg;
18559       break;
18560
18561     default:
18562       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18563       ops[0] = reg;
18564       ops[1] = mem;
18565       break;
18566     }
18567
18568   sprintf (buff, templ,
18569            load ? "ld" : "st",
18570            dp ? "64" : sp ? "32" : "16",
18571            dp ? "P" : "",
18572            integer_p ? "\t%@ int" : "");
18573   output_asm_insn (buff, ops);
18574
18575   return "";
18576 }
18577
18578 /* Output a Neon double-word or quad-word load or store, or a load
18579    or store for larger structure modes.
18580
18581    WARNING: The ordering of elements is weird in big-endian mode,
18582    because the EABI requires that vectors stored in memory appear
18583    as though they were stored by a VSTM, as required by the EABI.
18584    GCC RTL defines element ordering based on in-memory order.
18585    This can be different from the architectural ordering of elements
18586    within a NEON register. The intrinsics defined in arm_neon.h use the
18587    NEON register element ordering, not the GCC RTL element ordering.
18588
18589    For example, the in-memory ordering of a big-endian a quadword
18590    vector with 16-bit elements when stored from register pair {d0,d1}
18591    will be (lowest address first, d0[N] is NEON register element N):
18592
18593      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18594
18595    When necessary, quadword registers (dN, dN+1) are moved to ARM
18596    registers from rN in the order:
18597
18598      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18599
18600    So that STM/LDM can be used on vectors in ARM registers, and the
18601    same memory layout will result as if VSTM/VLDM were used.
18602
18603    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18604    possible, which allows use of appropriate alignment tags.
18605    Note that the choice of "64" is independent of the actual vector
18606    element size; this size simply ensures that the behavior is
18607    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18608
18609    Due to limitations of those instructions, use of VST1.64/VLD1.64
18610    is not possible if:
18611     - the address contains PRE_DEC, or
18612     - the mode refers to more than 4 double-word registers
18613
18614    In those cases, it would be possible to replace VSTM/VLDM by a
18615    sequence of instructions; this is not currently implemented since
18616    this is not certain to actually improve performance.  */
18617
18618 const char *
18619 output_move_neon (rtx *operands)
18620 {
18621   rtx reg, mem, addr, ops[2];
18622   int regno, nregs, load = REG_P (operands[0]);
18623   const char *templ;
18624   char buff[50];
18625   machine_mode mode;
18626
18627   reg = operands[!load];
18628   mem = operands[load];
18629
18630   mode = GET_MODE (reg);
18631
18632   gcc_assert (REG_P (reg));
18633   regno = REGNO (reg);
18634   nregs = REG_NREGS (reg) / 2;
18635   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18636               || NEON_REGNO_OK_FOR_QUAD (regno));
18637   gcc_assert (VALID_NEON_DREG_MODE (mode)
18638               || VALID_NEON_QREG_MODE (mode)
18639               || VALID_NEON_STRUCT_MODE (mode));
18640   gcc_assert (MEM_P (mem));
18641
18642   addr = XEXP (mem, 0);
18643
18644   /* Strip off const from addresses like (const (plus (...))).  */
18645   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18646     addr = XEXP (addr, 0);
18647
18648   switch (GET_CODE (addr))
18649     {
18650     case POST_INC:
18651       /* We have to use vldm / vstm for too-large modes.  */
18652       if (nregs > 4)
18653         {
18654           templ = "v%smia%%?\t%%0!, %%h1";
18655           ops[0] = XEXP (addr, 0);
18656         }
18657       else
18658         {
18659           templ = "v%s1.64\t%%h1, %%A0";
18660           ops[0] = mem;
18661         }
18662       ops[1] = reg;
18663       break;
18664
18665     case PRE_DEC:
18666       /* We have to use vldm / vstm in this case, since there is no
18667          pre-decrement form of the vld1 / vst1 instructions.  */
18668       templ = "v%smdb%%?\t%%0!, %%h1";
18669       ops[0] = XEXP (addr, 0);
18670       ops[1] = reg;
18671       break;
18672
18673     case POST_MODIFY:
18674       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18675       gcc_unreachable ();
18676
18677     case REG:
18678       /* We have to use vldm / vstm for too-large modes.  */
18679       if (nregs > 1)
18680         {
18681           if (nregs > 4)
18682             templ = "v%smia%%?\t%%m0, %%h1";
18683           else
18684             templ = "v%s1.64\t%%h1, %%A0";
18685
18686           ops[0] = mem;
18687           ops[1] = reg;
18688           break;
18689         }
18690       /* Fall through.  */
18691     case LABEL_REF:
18692     case PLUS:
18693       {
18694         int i;
18695         int overlap = -1;
18696         for (i = 0; i < nregs; i++)
18697           {
18698             /* We're only using DImode here because it's a convenient size.  */
18699             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18700             ops[1] = adjust_address (mem, DImode, 8 * i);
18701             if (reg_overlap_mentioned_p (ops[0], mem))
18702               {
18703                 gcc_assert (overlap == -1);
18704                 overlap = i;
18705               }
18706             else
18707               {
18708                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18709                 output_asm_insn (buff, ops);
18710               }
18711           }
18712         if (overlap != -1)
18713           {
18714             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18715             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18716             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18717             output_asm_insn (buff, ops);
18718           }
18719
18720         return "";
18721       }
18722
18723     default:
18724       gcc_unreachable ();
18725     }
18726
18727   sprintf (buff, templ, load ? "ld" : "st");
18728   output_asm_insn (buff, ops);
18729
18730   return "";
18731 }
18732
18733 /* Compute and return the length of neon_mov<mode>, where <mode> is
18734    one of VSTRUCT modes: EI, OI, CI or XI.  */
18735 int
18736 arm_attr_length_move_neon (rtx_insn *insn)
18737 {
18738   rtx reg, mem, addr;
18739   int load;
18740   machine_mode mode;
18741
18742   extract_insn_cached (insn);
18743
18744   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18745     {
18746       mode = GET_MODE (recog_data.operand[0]);
18747       switch (mode)
18748         {
18749         case E_EImode:
18750         case E_OImode:
18751           return 8;
18752         case E_CImode:
18753           return 12;
18754         case E_XImode:
18755           return 16;
18756         default:
18757           gcc_unreachable ();
18758         }
18759     }
18760
18761   load = REG_P (recog_data.operand[0]);
18762   reg = recog_data.operand[!load];
18763   mem = recog_data.operand[load];
18764
18765   gcc_assert (MEM_P (mem));
18766
18767   addr = XEXP (mem, 0);
18768
18769   /* Strip off const from addresses like (const (plus (...))).  */
18770   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18771     addr = XEXP (addr, 0);
18772
18773   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18774     {
18775       int insns = REG_NREGS (reg) / 2;
18776       return insns * 4;
18777     }
18778   else
18779     return 4;
18780 }
18781
18782 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18783    return zero.  */
18784
18785 int
18786 arm_address_offset_is_imm (rtx_insn *insn)
18787 {
18788   rtx mem, addr;
18789
18790   extract_insn_cached (insn);
18791
18792   if (REG_P (recog_data.operand[0]))
18793     return 0;
18794
18795   mem = recog_data.operand[0];
18796
18797   gcc_assert (MEM_P (mem));
18798
18799   addr = XEXP (mem, 0);
18800
18801   if (REG_P (addr)
18802       || (GET_CODE (addr) == PLUS
18803           && REG_P (XEXP (addr, 0))
18804           && CONST_INT_P (XEXP (addr, 1))))
18805     return 1;
18806   else
18807     return 0;
18808 }
18809
18810 /* Output an ADD r, s, #n where n may be too big for one instruction.
18811    If adding zero to one register, output nothing.  */
18812 const char *
18813 output_add_immediate (rtx *operands)
18814 {
18815   HOST_WIDE_INT n = INTVAL (operands[2]);
18816
18817   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18818     {
18819       if (n < 0)
18820         output_multi_immediate (operands,
18821                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18822                                 -n);
18823       else
18824         output_multi_immediate (operands,
18825                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18826                                 n);
18827     }
18828
18829   return "";
18830 }
18831
18832 /* Output a multiple immediate operation.
18833    OPERANDS is the vector of operands referred to in the output patterns.
18834    INSTR1 is the output pattern to use for the first constant.
18835    INSTR2 is the output pattern to use for subsequent constants.
18836    IMMED_OP is the index of the constant slot in OPERANDS.
18837    N is the constant value.  */
18838 static const char *
18839 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18840                         int immed_op, HOST_WIDE_INT n)
18841 {
18842 #if HOST_BITS_PER_WIDE_INT > 32
18843   n &= 0xffffffff;
18844 #endif
18845
18846   if (n == 0)
18847     {
18848       /* Quick and easy output.  */
18849       operands[immed_op] = const0_rtx;
18850       output_asm_insn (instr1, operands);
18851     }
18852   else
18853     {
18854       int i;
18855       const char * instr = instr1;
18856
18857       /* Note that n is never zero here (which would give no output).  */
18858       for (i = 0; i < 32; i += 2)
18859         {
18860           if (n & (3 << i))
18861             {
18862               operands[immed_op] = GEN_INT (n & (255 << i));
18863               output_asm_insn (instr, operands);
18864               instr = instr2;
18865               i += 6;
18866             }
18867         }
18868     }
18869
18870   return "";
18871 }
18872
18873 /* Return the name of a shifter operation.  */
18874 static const char *
18875 arm_shift_nmem(enum rtx_code code)
18876 {
18877   switch (code)
18878     {
18879     case ASHIFT:
18880       return ARM_LSL_NAME;
18881
18882     case ASHIFTRT:
18883       return "asr";
18884
18885     case LSHIFTRT:
18886       return "lsr";
18887
18888     case ROTATERT:
18889       return "ror";
18890
18891     default:
18892       abort();
18893     }
18894 }
18895
18896 /* Return the appropriate ARM instruction for the operation code.
18897    The returned result should not be overwritten.  OP is the rtx of the
18898    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18899    was shifted.  */
18900 const char *
18901 arithmetic_instr (rtx op, int shift_first_arg)
18902 {
18903   switch (GET_CODE (op))
18904     {
18905     case PLUS:
18906       return "add";
18907
18908     case MINUS:
18909       return shift_first_arg ? "rsb" : "sub";
18910
18911     case IOR:
18912       return "orr";
18913
18914     case XOR:
18915       return "eor";
18916
18917     case AND:
18918       return "and";
18919
18920     case ASHIFT:
18921     case ASHIFTRT:
18922     case LSHIFTRT:
18923     case ROTATERT:
18924       return arm_shift_nmem(GET_CODE(op));
18925
18926     default:
18927       gcc_unreachable ();
18928     }
18929 }
18930
18931 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18932    for the operation code.  The returned result should not be overwritten.
18933    OP is the rtx code of the shift.
18934    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18935    shift.  */
18936 static const char *
18937 shift_op (rtx op, HOST_WIDE_INT *amountp)
18938 {
18939   const char * mnem;
18940   enum rtx_code code = GET_CODE (op);
18941
18942   switch (code)
18943     {
18944     case ROTATE:
18945       if (!CONST_INT_P (XEXP (op, 1)))
18946         {
18947           output_operand_lossage ("invalid shift operand");
18948           return NULL;
18949         }
18950
18951       code = ROTATERT;
18952       *amountp = 32 - INTVAL (XEXP (op, 1));
18953       mnem = "ror";
18954       break;
18955
18956     case ASHIFT:
18957     case ASHIFTRT:
18958     case LSHIFTRT:
18959     case ROTATERT:
18960       mnem = arm_shift_nmem(code);
18961       if (CONST_INT_P (XEXP (op, 1)))
18962         {
18963           *amountp = INTVAL (XEXP (op, 1));
18964         }
18965       else if (REG_P (XEXP (op, 1)))
18966         {
18967           *amountp = -1;
18968           return mnem;
18969         }
18970       else
18971         {
18972           output_operand_lossage ("invalid shift operand");
18973           return NULL;
18974         }
18975       break;
18976
18977     case MULT:
18978       /* We never have to worry about the amount being other than a
18979          power of 2, since this case can never be reloaded from a reg.  */
18980       if (!CONST_INT_P (XEXP (op, 1)))
18981         {
18982           output_operand_lossage ("invalid shift operand");
18983           return NULL;
18984         }
18985
18986       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18987
18988       /* Amount must be a power of two.  */
18989       if (*amountp & (*amountp - 1))
18990         {
18991           output_operand_lossage ("invalid shift operand");
18992           return NULL;
18993         }
18994
18995       *amountp = exact_log2 (*amountp);
18996       gcc_assert (IN_RANGE (*amountp, 0, 31));
18997       return ARM_LSL_NAME;
18998
18999     default:
19000       output_operand_lossage ("invalid shift operand");
19001       return NULL;
19002     }
19003
19004   /* This is not 100% correct, but follows from the desire to merge
19005      multiplication by a power of 2 with the recognizer for a
19006      shift.  >=32 is not a valid shift for "lsl", so we must try and
19007      output a shift that produces the correct arithmetical result.
19008      Using lsr #32 is identical except for the fact that the carry bit
19009      is not set correctly if we set the flags; but we never use the
19010      carry bit from such an operation, so we can ignore that.  */
19011   if (code == ROTATERT)
19012     /* Rotate is just modulo 32.  */
19013     *amountp &= 31;
19014   else if (*amountp != (*amountp & 31))
19015     {
19016       if (code == ASHIFT)
19017         mnem = "lsr";
19018       *amountp = 32;
19019     }
19020
19021   /* Shifts of 0 are no-ops.  */
19022   if (*amountp == 0)
19023     return NULL;
19024
19025   return mnem;
19026 }
19027
19028 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19029    because /bin/as is horribly restrictive.  The judgement about
19030    whether or not each character is 'printable' (and can be output as
19031    is) or not (and must be printed with an octal escape) must be made
19032    with reference to the *host* character set -- the situation is
19033    similar to that discussed in the comments above pp_c_char in
19034    c-pretty-print.c.  */
19035
19036 #define MAX_ASCII_LEN 51
19037
19038 void
19039 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19040 {
19041   int i;
19042   int len_so_far = 0;
19043
19044   fputs ("\t.ascii\t\"", stream);
19045
19046   for (i = 0; i < len; i++)
19047     {
19048       int c = p[i];
19049
19050       if (len_so_far >= MAX_ASCII_LEN)
19051         {
19052           fputs ("\"\n\t.ascii\t\"", stream);
19053           len_so_far = 0;
19054         }
19055
19056       if (ISPRINT (c))
19057         {
19058           if (c == '\\' || c == '\"')
19059             {
19060               putc ('\\', stream);
19061               len_so_far++;
19062             }
19063           putc (c, stream);
19064           len_so_far++;
19065         }
19066       else
19067         {
19068           fprintf (stream, "\\%03o", c);
19069           len_so_far += 4;
19070         }
19071     }
19072
19073   fputs ("\"\n", stream);
19074 }
19075 \f
19076 /* Whether a register is callee saved or not.  This is necessary because high
19077    registers are marked as caller saved when optimizing for size on Thumb-1
19078    targets despite being callee saved in order to avoid using them.  */
19079 #define callee_saved_reg_p(reg) \
19080   (!call_used_regs[reg] \
19081    || (TARGET_THUMB1 && optimize_size \
19082        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19083
19084 /* Compute the register save mask for registers 0 through 12
19085    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19086
19087 static unsigned long
19088 arm_compute_save_reg0_reg12_mask (void)
19089 {
19090   unsigned long func_type = arm_current_func_type ();
19091   unsigned long save_reg_mask = 0;
19092   unsigned int reg;
19093
19094   if (IS_INTERRUPT (func_type))
19095     {
19096       unsigned int max_reg;
19097       /* Interrupt functions must not corrupt any registers,
19098          even call clobbered ones.  If this is a leaf function
19099          we can just examine the registers used by the RTL, but
19100          otherwise we have to assume that whatever function is
19101          called might clobber anything, and so we have to save
19102          all the call-clobbered registers as well.  */
19103       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19104         /* FIQ handlers have registers r8 - r12 banked, so
19105            we only need to check r0 - r7, Normal ISRs only
19106            bank r14 and r15, so we must check up to r12.
19107            r13 is the stack pointer which is always preserved,
19108            so we do not need to consider it here.  */
19109         max_reg = 7;
19110       else
19111         max_reg = 12;
19112
19113       for (reg = 0; reg <= max_reg; reg++)
19114         if (df_regs_ever_live_p (reg)
19115             || (! crtl->is_leaf && call_used_regs[reg]))
19116           save_reg_mask |= (1 << reg);
19117
19118       /* Also save the pic base register if necessary.  */
19119       if (flag_pic
19120           && !TARGET_SINGLE_PIC_BASE
19121           && arm_pic_register != INVALID_REGNUM
19122           && crtl->uses_pic_offset_table)
19123         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19124     }
19125   else if (IS_VOLATILE(func_type))
19126     {
19127       /* For noreturn functions we historically omitted register saves
19128          altogether.  However this really messes up debugging.  As a
19129          compromise save just the frame pointers.  Combined with the link
19130          register saved elsewhere this should be sufficient to get
19131          a backtrace.  */
19132       if (frame_pointer_needed)
19133         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19134       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19135         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19136       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19137         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19138     }
19139   else
19140     {
19141       /* In the normal case we only need to save those registers
19142          which are call saved and which are used by this function.  */
19143       for (reg = 0; reg <= 11; reg++)
19144         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19145           save_reg_mask |= (1 << reg);
19146
19147       /* Handle the frame pointer as a special case.  */
19148       if (frame_pointer_needed)
19149         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19150
19151       /* If we aren't loading the PIC register,
19152          don't stack it even though it may be live.  */
19153       if (flag_pic
19154           && !TARGET_SINGLE_PIC_BASE
19155           && arm_pic_register != INVALID_REGNUM
19156           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19157               || crtl->uses_pic_offset_table))
19158         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19159
19160       /* The prologue will copy SP into R0, so save it.  */
19161       if (IS_STACKALIGN (func_type))
19162         save_reg_mask |= 1;
19163     }
19164
19165   /* Save registers so the exception handler can modify them.  */
19166   if (crtl->calls_eh_return)
19167     {
19168       unsigned int i;
19169
19170       for (i = 0; ; i++)
19171         {
19172           reg = EH_RETURN_DATA_REGNO (i);
19173           if (reg == INVALID_REGNUM)
19174             break;
19175           save_reg_mask |= 1 << reg;
19176         }
19177     }
19178
19179   return save_reg_mask;
19180 }
19181
19182 /* Return true if r3 is live at the start of the function.  */
19183
19184 static bool
19185 arm_r3_live_at_start_p (void)
19186 {
19187   /* Just look at cfg info, which is still close enough to correct at this
19188      point.  This gives false positives for broken functions that might use
19189      uninitialized data that happens to be allocated in r3, but who cares?  */
19190   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19191 }
19192
19193 /* Compute the number of bytes used to store the static chain register on the
19194    stack, above the stack frame.  We need to know this accurately to get the
19195    alignment of the rest of the stack frame correct.  */
19196
19197 static int
19198 arm_compute_static_chain_stack_bytes (void)
19199 {
19200   /* See the defining assertion in arm_expand_prologue.  */
19201   if (IS_NESTED (arm_current_func_type ())
19202       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19203           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19204                || flag_stack_clash_protection)
19205               && !df_regs_ever_live_p (LR_REGNUM)))
19206       && arm_r3_live_at_start_p ()
19207       && crtl->args.pretend_args_size == 0)
19208     return 4;
19209
19210   return 0;
19211 }
19212
19213 /* Compute a bit mask of which core registers need to be
19214    saved on the stack for the current function.
19215    This is used by arm_compute_frame_layout, which may add extra registers.  */
19216
19217 static unsigned long
19218 arm_compute_save_core_reg_mask (void)
19219 {
19220   unsigned int save_reg_mask = 0;
19221   unsigned long func_type = arm_current_func_type ();
19222   unsigned int reg;
19223
19224   if (IS_NAKED (func_type))
19225     /* This should never really happen.  */
19226     return 0;
19227
19228   /* If we are creating a stack frame, then we must save the frame pointer,
19229      IP (which will hold the old stack pointer), LR and the PC.  */
19230   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19231     save_reg_mask |=
19232       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19233       | (1 << IP_REGNUM)
19234       | (1 << LR_REGNUM)
19235       | (1 << PC_REGNUM);
19236
19237   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19238
19239   /* Decide if we need to save the link register.
19240      Interrupt routines have their own banked link register,
19241      so they never need to save it.
19242      Otherwise if we do not use the link register we do not need to save
19243      it.  If we are pushing other registers onto the stack however, we
19244      can save an instruction in the epilogue by pushing the link register
19245      now and then popping it back into the PC.  This incurs extra memory
19246      accesses though, so we only do it when optimizing for size, and only
19247      if we know that we will not need a fancy return sequence.  */
19248   if (df_regs_ever_live_p (LR_REGNUM)
19249       || (save_reg_mask
19250           && optimize_size
19251           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19252           && !crtl->tail_call_emit
19253           && !crtl->calls_eh_return))
19254     save_reg_mask |= 1 << LR_REGNUM;
19255
19256   if (cfun->machine->lr_save_eliminated)
19257     save_reg_mask &= ~ (1 << LR_REGNUM);
19258
19259   if (TARGET_REALLY_IWMMXT
19260       && ((bit_count (save_reg_mask)
19261            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19262                            arm_compute_static_chain_stack_bytes())
19263            ) % 2) != 0)
19264     {
19265       /* The total number of registers that are going to be pushed
19266          onto the stack is odd.  We need to ensure that the stack
19267          is 64-bit aligned before we start to save iWMMXt registers,
19268          and also before we start to create locals.  (A local variable
19269          might be a double or long long which we will load/store using
19270          an iWMMXt instruction).  Therefore we need to push another
19271          ARM register, so that the stack will be 64-bit aligned.  We
19272          try to avoid using the arg registers (r0 -r3) as they might be
19273          used to pass values in a tail call.  */
19274       for (reg = 4; reg <= 12; reg++)
19275         if ((save_reg_mask & (1 << reg)) == 0)
19276           break;
19277
19278       if (reg <= 12)
19279         save_reg_mask |= (1 << reg);
19280       else
19281         {
19282           cfun->machine->sibcall_blocked = 1;
19283           save_reg_mask |= (1 << 3);
19284         }
19285     }
19286
19287   /* We may need to push an additional register for use initializing the
19288      PIC base register.  */
19289   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19290       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19291     {
19292       reg = thumb_find_work_register (1 << 4);
19293       if (!call_used_regs[reg])
19294         save_reg_mask |= (1 << reg);
19295     }
19296
19297   return save_reg_mask;
19298 }
19299
19300 /* Compute a bit mask of which core registers need to be
19301    saved on the stack for the current function.  */
19302 static unsigned long
19303 thumb1_compute_save_core_reg_mask (void)
19304 {
19305   unsigned long mask;
19306   unsigned reg;
19307
19308   mask = 0;
19309   for (reg = 0; reg < 12; reg ++)
19310     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19311       mask |= 1 << reg;
19312
19313   /* Handle the frame pointer as a special case.  */
19314   if (frame_pointer_needed)
19315     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19316
19317   if (flag_pic
19318       && !TARGET_SINGLE_PIC_BASE
19319       && arm_pic_register != INVALID_REGNUM
19320       && crtl->uses_pic_offset_table)
19321     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19322
19323   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19324   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19325     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19326
19327   /* LR will also be pushed if any lo regs are pushed.  */
19328   if (mask & 0xff || thumb_force_lr_save ())
19329     mask |= (1 << LR_REGNUM);
19330
19331   /* Make sure we have a low work register if we need one.
19332      We will need one if we are going to push a high register,
19333      but we are not currently intending to push a low register.  */
19334   if ((mask & 0xff) == 0
19335       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19336     {
19337       /* Use thumb_find_work_register to choose which register
19338          we will use.  If the register is live then we will
19339          have to push it.  Use LAST_LO_REGNUM as our fallback
19340          choice for the register to select.  */
19341       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19342       /* Make sure the register returned by thumb_find_work_register is
19343          not part of the return value.  */
19344       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19345         reg = LAST_LO_REGNUM;
19346
19347       if (callee_saved_reg_p (reg))
19348         mask |= 1 << reg;
19349     }
19350
19351   /* The 504 below is 8 bytes less than 512 because there are two possible
19352      alignment words.  We can't tell here if they will be present or not so we
19353      have to play it safe and assume that they are. */
19354   if ((CALLER_INTERWORKING_SLOT_SIZE +
19355        ROUND_UP_WORD (get_frame_size ()) +
19356        crtl->outgoing_args_size) >= 504)
19357     {
19358       /* This is the same as the code in thumb1_expand_prologue() which
19359          determines which register to use for stack decrement. */
19360       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19361         if (mask & (1 << reg))
19362           break;
19363
19364       if (reg > LAST_LO_REGNUM)
19365         {
19366           /* Make sure we have a register available for stack decrement. */
19367           mask |= 1 << LAST_LO_REGNUM;
19368         }
19369     }
19370
19371   return mask;
19372 }
19373
19374
19375 /* Return the number of bytes required to save VFP registers.  */
19376 static int
19377 arm_get_vfp_saved_size (void)
19378 {
19379   unsigned int regno;
19380   int count;
19381   int saved;
19382
19383   saved = 0;
19384   /* Space for saved VFP registers.  */
19385   if (TARGET_HARD_FLOAT)
19386     {
19387       count = 0;
19388       for (regno = FIRST_VFP_REGNUM;
19389            regno < LAST_VFP_REGNUM;
19390            regno += 2)
19391         {
19392           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19393               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19394             {
19395               if (count > 0)
19396                 {
19397                   /* Workaround ARM10 VFPr1 bug.  */
19398                   if (count == 2 && !arm_arch6)
19399                     count++;
19400                   saved += count * 8;
19401                 }
19402               count = 0;
19403             }
19404           else
19405             count++;
19406         }
19407       if (count > 0)
19408         {
19409           if (count == 2 && !arm_arch6)
19410             count++;
19411           saved += count * 8;
19412         }
19413     }
19414   return saved;
19415 }
19416
19417
19418 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19419    everything bar the final return instruction.  If simple_return is true,
19420    then do not output epilogue, because it has already been emitted in RTL.
19421
19422    Note: do not forget to update length attribute of corresponding insn pattern
19423    when changing assembly output (eg. length attribute of
19424    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19425    register clearing sequences).  */
19426 const char *
19427 output_return_instruction (rtx operand, bool really_return, bool reverse,
19428                            bool simple_return)
19429 {
19430   char conditional[10];
19431   char instr[100];
19432   unsigned reg;
19433   unsigned long live_regs_mask;
19434   unsigned long func_type;
19435   arm_stack_offsets *offsets;
19436
19437   func_type = arm_current_func_type ();
19438
19439   if (IS_NAKED (func_type))
19440     return "";
19441
19442   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19443     {
19444       /* If this function was declared non-returning, and we have
19445          found a tail call, then we have to trust that the called
19446          function won't return.  */
19447       if (really_return)
19448         {
19449           rtx ops[2];
19450
19451           /* Otherwise, trap an attempted return by aborting.  */
19452           ops[0] = operand;
19453           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19454                                        : "abort");
19455           assemble_external_libcall (ops[1]);
19456           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19457         }
19458
19459       return "";
19460     }
19461
19462   gcc_assert (!cfun->calls_alloca || really_return);
19463
19464   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19465
19466   cfun->machine->return_used_this_function = 1;
19467
19468   offsets = arm_get_frame_offsets ();
19469   live_regs_mask = offsets->saved_regs_mask;
19470
19471   if (!simple_return && live_regs_mask)
19472     {
19473       const char * return_reg;
19474
19475       /* If we do not have any special requirements for function exit
19476          (e.g. interworking) then we can load the return address
19477          directly into the PC.  Otherwise we must load it into LR.  */
19478       if (really_return
19479           && !IS_CMSE_ENTRY (func_type)
19480           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19481         return_reg = reg_names[PC_REGNUM];
19482       else
19483         return_reg = reg_names[LR_REGNUM];
19484
19485       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19486         {
19487           /* There are three possible reasons for the IP register
19488              being saved.  1) a stack frame was created, in which case
19489              IP contains the old stack pointer, or 2) an ISR routine
19490              corrupted it, or 3) it was saved to align the stack on
19491              iWMMXt.  In case 1, restore IP into SP, otherwise just
19492              restore IP.  */
19493           if (frame_pointer_needed)
19494             {
19495               live_regs_mask &= ~ (1 << IP_REGNUM);
19496               live_regs_mask |=   (1 << SP_REGNUM);
19497             }
19498           else
19499             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19500         }
19501
19502       /* On some ARM architectures it is faster to use LDR rather than
19503          LDM to load a single register.  On other architectures, the
19504          cost is the same.  In 26 bit mode, or for exception handlers,
19505          we have to use LDM to load the PC so that the CPSR is also
19506          restored.  */
19507       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19508         if (live_regs_mask == (1U << reg))
19509           break;
19510
19511       if (reg <= LAST_ARM_REGNUM
19512           && (reg != LR_REGNUM
19513               || ! really_return
19514               || ! IS_INTERRUPT (func_type)))
19515         {
19516           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19517                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19518         }
19519       else
19520         {
19521           char *p;
19522           int first = 1;
19523
19524           /* Generate the load multiple instruction to restore the
19525              registers.  Note we can get here, even if
19526              frame_pointer_needed is true, but only if sp already
19527              points to the base of the saved core registers.  */
19528           if (live_regs_mask & (1 << SP_REGNUM))
19529             {
19530               unsigned HOST_WIDE_INT stack_adjust;
19531
19532               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19533               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19534
19535               if (stack_adjust && arm_arch5 && TARGET_ARM)
19536                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19537               else
19538                 {
19539                   /* If we can't use ldmib (SA110 bug),
19540                      then try to pop r3 instead.  */
19541                   if (stack_adjust)
19542                     live_regs_mask |= 1 << 3;
19543
19544                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19545                 }
19546             }
19547           /* For interrupt returns we have to use an LDM rather than
19548              a POP so that we can use the exception return variant.  */
19549           else if (IS_INTERRUPT (func_type))
19550             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19551           else
19552             sprintf (instr, "pop%s\t{", conditional);
19553
19554           p = instr + strlen (instr);
19555
19556           for (reg = 0; reg <= SP_REGNUM; reg++)
19557             if (live_regs_mask & (1 << reg))
19558               {
19559                 int l = strlen (reg_names[reg]);
19560
19561                 if (first)
19562                   first = 0;
19563                 else
19564                   {
19565                     memcpy (p, ", ", 2);
19566                     p += 2;
19567                   }
19568
19569                 memcpy (p, "%|", 2);
19570                 memcpy (p + 2, reg_names[reg], l);
19571                 p += l + 2;
19572               }
19573
19574           if (live_regs_mask & (1 << LR_REGNUM))
19575             {
19576               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19577               /* If returning from an interrupt, restore the CPSR.  */
19578               if (IS_INTERRUPT (func_type))
19579                 strcat (p, "^");
19580             }
19581           else
19582             strcpy (p, "}");
19583         }
19584
19585       output_asm_insn (instr, & operand);
19586
19587       /* See if we need to generate an extra instruction to
19588          perform the actual function return.  */
19589       if (really_return
19590           && func_type != ARM_FT_INTERWORKED
19591           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19592         {
19593           /* The return has already been handled
19594              by loading the LR into the PC.  */
19595           return "";
19596         }
19597     }
19598
19599   if (really_return)
19600     {
19601       switch ((int) ARM_FUNC_TYPE (func_type))
19602         {
19603         case ARM_FT_ISR:
19604         case ARM_FT_FIQ:
19605           /* ??? This is wrong for unified assembly syntax.  */
19606           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19607           break;
19608
19609         case ARM_FT_INTERWORKED:
19610           gcc_assert (arm_arch5 || arm_arch4t);
19611           sprintf (instr, "bx%s\t%%|lr", conditional);
19612           break;
19613
19614         case ARM_FT_EXCEPTION:
19615           /* ??? This is wrong for unified assembly syntax.  */
19616           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19617           break;
19618
19619         default:
19620           if (IS_CMSE_ENTRY (func_type))
19621             {
19622               /* Check if we have to clear the 'GE bits' which is only used if
19623                  parallel add and subtraction instructions are available.  */
19624               if (TARGET_INT_SIMD)
19625                 snprintf (instr, sizeof (instr),
19626                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19627               else
19628                 snprintf (instr, sizeof (instr),
19629                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19630
19631               output_asm_insn (instr, & operand);
19632               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19633                 {
19634                   /* Clear the cumulative exception-status bits (0-4,7) and the
19635                      condition code bits (28-31) of the FPSCR.  We need to
19636                      remember to clear the first scratch register used (IP) and
19637                      save and restore the second (r4).  */
19638                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19639                   output_asm_insn (instr, & operand);
19640                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19641                   output_asm_insn (instr, & operand);
19642                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19643                   output_asm_insn (instr, & operand);
19644                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19645                   output_asm_insn (instr, & operand);
19646                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19647                   output_asm_insn (instr, & operand);
19648                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19649                   output_asm_insn (instr, & operand);
19650                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19651                   output_asm_insn (instr, & operand);
19652                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19653                   output_asm_insn (instr, & operand);
19654                 }
19655               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19656             }
19657           /* Use bx if it's available.  */
19658           else if (arm_arch5 || arm_arch4t)
19659             sprintf (instr, "bx%s\t%%|lr", conditional);
19660           else
19661             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19662           break;
19663         }
19664
19665       output_asm_insn (instr, & operand);
19666     }
19667
19668   return "";
19669 }
19670
19671 /* Output in FILE asm statements needed to declare the NAME of the function
19672    defined by its DECL node.  */
19673
19674 void
19675 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19676 {
19677   size_t cmse_name_len;
19678   char *cmse_name = 0;
19679   char cmse_prefix[] = "__acle_se_";
19680
19681   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19682      extra function label for each function with the 'cmse_nonsecure_entry'
19683      attribute.  This extra function label should be prepended with
19684      '__acle_se_', telling the linker that it needs to create secure gateway
19685      veneers for this function.  */
19686   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19687                                     DECL_ATTRIBUTES (decl)))
19688     {
19689       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19690       cmse_name = XALLOCAVEC (char, cmse_name_len);
19691       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19692       targetm.asm_out.globalize_label (file, cmse_name);
19693
19694       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19695       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19696     }
19697
19698   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19699   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19700   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19701   ASM_OUTPUT_LABEL (file, name);
19702
19703   if (cmse_name)
19704     ASM_OUTPUT_LABEL (file, cmse_name);
19705
19706   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19707 }
19708
19709 /* Write the function name into the code section, directly preceding
19710    the function prologue.
19711
19712    Code will be output similar to this:
19713      t0
19714          .ascii "arm_poke_function_name", 0
19715          .align
19716      t1
19717          .word 0xff000000 + (t1 - t0)
19718      arm_poke_function_name
19719          mov     ip, sp
19720          stmfd   sp!, {fp, ip, lr, pc}
19721          sub     fp, ip, #4
19722
19723    When performing a stack backtrace, code can inspect the value
19724    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19725    at location pc - 12 and the top 8 bits are set, then we know
19726    that there is a function name embedded immediately preceding this
19727    location and has length ((pc[-3]) & 0xff000000).
19728
19729    We assume that pc is declared as a pointer to an unsigned long.
19730
19731    It is of no benefit to output the function name if we are assembling
19732    a leaf function.  These function types will not contain a stack
19733    backtrace structure, therefore it is not possible to determine the
19734    function name.  */
19735 void
19736 arm_poke_function_name (FILE *stream, const char *name)
19737 {
19738   unsigned long alignlength;
19739   unsigned long length;
19740   rtx           x;
19741
19742   length      = strlen (name) + 1;
19743   alignlength = ROUND_UP_WORD (length);
19744
19745   ASM_OUTPUT_ASCII (stream, name, length);
19746   ASM_OUTPUT_ALIGN (stream, 2);
19747   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19748   assemble_aligned_integer (UNITS_PER_WORD, x);
19749 }
19750
19751 /* Place some comments into the assembler stream
19752    describing the current function.  */
19753 static void
19754 arm_output_function_prologue (FILE *f)
19755 {
19756   unsigned long func_type;
19757
19758   /* Sanity check.  */
19759   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19760
19761   func_type = arm_current_func_type ();
19762
19763   switch ((int) ARM_FUNC_TYPE (func_type))
19764     {
19765     default:
19766     case ARM_FT_NORMAL:
19767       break;
19768     case ARM_FT_INTERWORKED:
19769       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19770       break;
19771     case ARM_FT_ISR:
19772       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19773       break;
19774     case ARM_FT_FIQ:
19775       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19776       break;
19777     case ARM_FT_EXCEPTION:
19778       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19779       break;
19780     }
19781
19782   if (IS_NAKED (func_type))
19783     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19784
19785   if (IS_VOLATILE (func_type))
19786     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19787
19788   if (IS_NESTED (func_type))
19789     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19790   if (IS_STACKALIGN (func_type))
19791     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19792   if (IS_CMSE_ENTRY (func_type))
19793     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19794
19795   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19796                crtl->args.size,
19797                crtl->args.pretend_args_size,
19798                (HOST_WIDE_INT) get_frame_size ());
19799
19800   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19801                frame_pointer_needed,
19802                cfun->machine->uses_anonymous_args);
19803
19804   if (cfun->machine->lr_save_eliminated)
19805     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19806
19807   if (crtl->calls_eh_return)
19808     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19809
19810 }
19811
19812 static void
19813 arm_output_function_epilogue (FILE *)
19814 {
19815   arm_stack_offsets *offsets;
19816
19817   if (TARGET_THUMB1)
19818     {
19819       int regno;
19820
19821       /* Emit any call-via-reg trampolines that are needed for v4t support
19822          of call_reg and call_value_reg type insns.  */
19823       for (regno = 0; regno < LR_REGNUM; regno++)
19824         {
19825           rtx label = cfun->machine->call_via[regno];
19826
19827           if (label != NULL)
19828             {
19829               switch_to_section (function_section (current_function_decl));
19830               targetm.asm_out.internal_label (asm_out_file, "L",
19831                                               CODE_LABEL_NUMBER (label));
19832               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19833             }
19834         }
19835
19836       /* ??? Probably not safe to set this here, since it assumes that a
19837          function will be emitted as assembly immediately after we generate
19838          RTL for it.  This does not happen for inline functions.  */
19839       cfun->machine->return_used_this_function = 0;
19840     }
19841   else /* TARGET_32BIT */
19842     {
19843       /* We need to take into account any stack-frame rounding.  */
19844       offsets = arm_get_frame_offsets ();
19845
19846       gcc_assert (!use_return_insn (FALSE, NULL)
19847                   || (cfun->machine->return_used_this_function != 0)
19848                   || offsets->saved_regs == offsets->outgoing_args
19849                   || frame_pointer_needed);
19850     }
19851 }
19852
19853 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19854    STR and STRD.  If an even number of registers are being pushed, one
19855    or more STRD patterns are created for each register pair.  If an
19856    odd number of registers are pushed, emit an initial STR followed by
19857    as many STRD instructions as are needed.  This works best when the
19858    stack is initially 64-bit aligned (the normal case), since it
19859    ensures that each STRD is also 64-bit aligned.  */
19860 static void
19861 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19862 {
19863   int num_regs = 0;
19864   int i;
19865   int regno;
19866   rtx par = NULL_RTX;
19867   rtx dwarf = NULL_RTX;
19868   rtx tmp;
19869   bool first = true;
19870
19871   num_regs = bit_count (saved_regs_mask);
19872
19873   /* Must be at least one register to save, and can't save SP or PC.  */
19874   gcc_assert (num_regs > 0 && num_regs <= 14);
19875   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19876   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19877
19878   /* Create sequence for DWARF info.  All the frame-related data for
19879      debugging is held in this wrapper.  */
19880   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19881
19882   /* Describe the stack adjustment.  */
19883   tmp = gen_rtx_SET (stack_pointer_rtx,
19884                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19885   RTX_FRAME_RELATED_P (tmp) = 1;
19886   XVECEXP (dwarf, 0, 0) = tmp;
19887
19888   /* Find the first register.  */
19889   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19890     ;
19891
19892   i = 0;
19893
19894   /* If there's an odd number of registers to push.  Start off by
19895      pushing a single register.  This ensures that subsequent strd
19896      operations are dword aligned (assuming that SP was originally
19897      64-bit aligned).  */
19898   if ((num_regs & 1) != 0)
19899     {
19900       rtx reg, mem, insn;
19901
19902       reg = gen_rtx_REG (SImode, regno);
19903       if (num_regs == 1)
19904         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19905                                                      stack_pointer_rtx));
19906       else
19907         mem = gen_frame_mem (Pmode,
19908                              gen_rtx_PRE_MODIFY
19909                              (Pmode, stack_pointer_rtx,
19910                               plus_constant (Pmode, stack_pointer_rtx,
19911                                              -4 * num_regs)));
19912
19913       tmp = gen_rtx_SET (mem, reg);
19914       RTX_FRAME_RELATED_P (tmp) = 1;
19915       insn = emit_insn (tmp);
19916       RTX_FRAME_RELATED_P (insn) = 1;
19917       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19918       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19919       RTX_FRAME_RELATED_P (tmp) = 1;
19920       i++;
19921       regno++;
19922       XVECEXP (dwarf, 0, i) = tmp;
19923       first = false;
19924     }
19925
19926   while (i < num_regs)
19927     if (saved_regs_mask & (1 << regno))
19928       {
19929         rtx reg1, reg2, mem1, mem2;
19930         rtx tmp0, tmp1, tmp2;
19931         int regno2;
19932
19933         /* Find the register to pair with this one.  */
19934         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19935              regno2++)
19936           ;
19937
19938         reg1 = gen_rtx_REG (SImode, regno);
19939         reg2 = gen_rtx_REG (SImode, regno2);
19940
19941         if (first)
19942           {
19943             rtx insn;
19944
19945             first = false;
19946             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19947                                                         stack_pointer_rtx,
19948                                                         -4 * num_regs));
19949             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19950                                                         stack_pointer_rtx,
19951                                                         -4 * (num_regs - 1)));
19952             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19953                                 plus_constant (Pmode, stack_pointer_rtx,
19954                                                -4 * (num_regs)));
19955             tmp1 = gen_rtx_SET (mem1, reg1);
19956             tmp2 = gen_rtx_SET (mem2, reg2);
19957             RTX_FRAME_RELATED_P (tmp0) = 1;
19958             RTX_FRAME_RELATED_P (tmp1) = 1;
19959             RTX_FRAME_RELATED_P (tmp2) = 1;
19960             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19961             XVECEXP (par, 0, 0) = tmp0;
19962             XVECEXP (par, 0, 1) = tmp1;
19963             XVECEXP (par, 0, 2) = tmp2;
19964             insn = emit_insn (par);
19965             RTX_FRAME_RELATED_P (insn) = 1;
19966             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19967           }
19968         else
19969           {
19970             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19971                                                         stack_pointer_rtx,
19972                                                         4 * i));
19973             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19974                                                         stack_pointer_rtx,
19975                                                         4 * (i + 1)));
19976             tmp1 = gen_rtx_SET (mem1, reg1);
19977             tmp2 = gen_rtx_SET (mem2, reg2);
19978             RTX_FRAME_RELATED_P (tmp1) = 1;
19979             RTX_FRAME_RELATED_P (tmp2) = 1;
19980             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19981             XVECEXP (par, 0, 0) = tmp1;
19982             XVECEXP (par, 0, 1) = tmp2;
19983             emit_insn (par);
19984           }
19985
19986         /* Create unwind information.  This is an approximation.  */
19987         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19988                                            plus_constant (Pmode,
19989                                                           stack_pointer_rtx,
19990                                                           4 * i)),
19991                             reg1);
19992         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19993                                            plus_constant (Pmode,
19994                                                           stack_pointer_rtx,
19995                                                           4 * (i + 1))),
19996                             reg2);
19997
19998         RTX_FRAME_RELATED_P (tmp1) = 1;
19999         RTX_FRAME_RELATED_P (tmp2) = 1;
20000         XVECEXP (dwarf, 0, i + 1) = tmp1;
20001         XVECEXP (dwarf, 0, i + 2) = tmp2;
20002         i += 2;
20003         regno = regno2 + 1;
20004       }
20005     else
20006       regno++;
20007
20008   return;
20009 }
20010
20011 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20012    whenever possible, otherwise it emits single-word stores.  The first store
20013    also allocates stack space for all saved registers, using writeback with
20014    post-addressing mode.  All other stores use offset addressing.  If no STRD
20015    can be emitted, this function emits a sequence of single-word stores,
20016    and not an STM as before, because single-word stores provide more freedom
20017    scheduling and can be turned into an STM by peephole optimizations.  */
20018 static void
20019 arm_emit_strd_push (unsigned long saved_regs_mask)
20020 {
20021   int num_regs = 0;
20022   int i, j, dwarf_index  = 0;
20023   int offset = 0;
20024   rtx dwarf = NULL_RTX;
20025   rtx insn = NULL_RTX;
20026   rtx tmp, mem;
20027
20028   /* TODO: A more efficient code can be emitted by changing the
20029      layout, e.g., first push all pairs that can use STRD to keep the
20030      stack aligned, and then push all other registers.  */
20031   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20032     if (saved_regs_mask & (1 << i))
20033       num_regs++;
20034
20035   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20036   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20037   gcc_assert (num_regs > 0);
20038
20039   /* Create sequence for DWARF info.  */
20040   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20041
20042   /* For dwarf info, we generate explicit stack update.  */
20043   tmp = gen_rtx_SET (stack_pointer_rtx,
20044                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20045   RTX_FRAME_RELATED_P (tmp) = 1;
20046   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20047
20048   /* Save registers.  */
20049   offset = - 4 * num_regs;
20050   j = 0;
20051   while (j <= LAST_ARM_REGNUM)
20052     if (saved_regs_mask & (1 << j))
20053       {
20054         if ((j % 2 == 0)
20055             && (saved_regs_mask & (1 << (j + 1))))
20056           {
20057             /* Current register and previous register form register pair for
20058                which STRD can be generated.  */
20059             if (offset < 0)
20060               {
20061                 /* Allocate stack space for all saved registers.  */
20062                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20063                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20064                 mem = gen_frame_mem (DImode, tmp);
20065                 offset = 0;
20066               }
20067             else if (offset > 0)
20068               mem = gen_frame_mem (DImode,
20069                                    plus_constant (Pmode,
20070                                                   stack_pointer_rtx,
20071                                                   offset));
20072             else
20073               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20074
20075             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20076             RTX_FRAME_RELATED_P (tmp) = 1;
20077             tmp = emit_insn (tmp);
20078
20079             /* Record the first store insn.  */
20080             if (dwarf_index == 1)
20081               insn = tmp;
20082
20083             /* Generate dwarf info.  */
20084             mem = gen_frame_mem (SImode,
20085                                  plus_constant (Pmode,
20086                                                 stack_pointer_rtx,
20087                                                 offset));
20088             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20089             RTX_FRAME_RELATED_P (tmp) = 1;
20090             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20091
20092             mem = gen_frame_mem (SImode,
20093                                  plus_constant (Pmode,
20094                                                 stack_pointer_rtx,
20095                                                 offset + 4));
20096             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20097             RTX_FRAME_RELATED_P (tmp) = 1;
20098             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20099
20100             offset += 8;
20101             j += 2;
20102           }
20103         else
20104           {
20105             /* Emit a single word store.  */
20106             if (offset < 0)
20107               {
20108                 /* Allocate stack space for all saved registers.  */
20109                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20110                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20111                 mem = gen_frame_mem (SImode, tmp);
20112                 offset = 0;
20113               }
20114             else if (offset > 0)
20115               mem = gen_frame_mem (SImode,
20116                                    plus_constant (Pmode,
20117                                                   stack_pointer_rtx,
20118                                                   offset));
20119             else
20120               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20121
20122             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20123             RTX_FRAME_RELATED_P (tmp) = 1;
20124             tmp = emit_insn (tmp);
20125
20126             /* Record the first store insn.  */
20127             if (dwarf_index == 1)
20128               insn = tmp;
20129
20130             /* Generate dwarf info.  */
20131             mem = gen_frame_mem (SImode,
20132                                  plus_constant(Pmode,
20133                                                stack_pointer_rtx,
20134                                                offset));
20135             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20136             RTX_FRAME_RELATED_P (tmp) = 1;
20137             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20138
20139             offset += 4;
20140             j += 1;
20141           }
20142       }
20143     else
20144       j++;
20145
20146   /* Attach dwarf info to the first insn we generate.  */
20147   gcc_assert (insn != NULL_RTX);
20148   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20149   RTX_FRAME_RELATED_P (insn) = 1;
20150 }
20151
20152 /* Generate and emit an insn that we will recognize as a push_multi.
20153    Unfortunately, since this insn does not reflect very well the actual
20154    semantics of the operation, we need to annotate the insn for the benefit
20155    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20156    MASK for registers that should be annotated for DWARF2 frame unwind
20157    information.  */
20158 static rtx
20159 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20160 {
20161   int num_regs = 0;
20162   int num_dwarf_regs = 0;
20163   int i, j;
20164   rtx par;
20165   rtx dwarf;
20166   int dwarf_par_index;
20167   rtx tmp, reg;
20168
20169   /* We don't record the PC in the dwarf frame information.  */
20170   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20171
20172   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20173     {
20174       if (mask & (1 << i))
20175         num_regs++;
20176       if (dwarf_regs_mask & (1 << i))
20177         num_dwarf_regs++;
20178     }
20179
20180   gcc_assert (num_regs && num_regs <= 16);
20181   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20182
20183   /* For the body of the insn we are going to generate an UNSPEC in
20184      parallel with several USEs.  This allows the insn to be recognized
20185      by the push_multi pattern in the arm.md file.
20186
20187      The body of the insn looks something like this:
20188
20189        (parallel [
20190            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20191                                         (const_int:SI <num>)))
20192                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20193            (use (reg:SI XX))
20194            (use (reg:SI YY))
20195            ...
20196         ])
20197
20198      For the frame note however, we try to be more explicit and actually
20199      show each register being stored into the stack frame, plus a (single)
20200      decrement of the stack pointer.  We do it this way in order to be
20201      friendly to the stack unwinding code, which only wants to see a single
20202      stack decrement per instruction.  The RTL we generate for the note looks
20203      something like this:
20204
20205       (sequence [
20206            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20207            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20208            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20209            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20210            ...
20211         ])
20212
20213      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20214      instead we'd have a parallel expression detailing all
20215      the stores to the various memory addresses so that debug
20216      information is more up-to-date. Remember however while writing
20217      this to take care of the constraints with the push instruction.
20218
20219      Note also that this has to be taken care of for the VFP registers.
20220
20221      For more see PR43399.  */
20222
20223   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20224   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20225   dwarf_par_index = 1;
20226
20227   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20228     {
20229       if (mask & (1 << i))
20230         {
20231           reg = gen_rtx_REG (SImode, i);
20232
20233           XVECEXP (par, 0, 0)
20234             = gen_rtx_SET (gen_frame_mem
20235                            (BLKmode,
20236                             gen_rtx_PRE_MODIFY (Pmode,
20237                                                 stack_pointer_rtx,
20238                                                 plus_constant
20239                                                 (Pmode, stack_pointer_rtx,
20240                                                  -4 * num_regs))
20241                             ),
20242                            gen_rtx_UNSPEC (BLKmode,
20243                                            gen_rtvec (1, reg),
20244                                            UNSPEC_PUSH_MULT));
20245
20246           if (dwarf_regs_mask & (1 << i))
20247             {
20248               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20249                                  reg);
20250               RTX_FRAME_RELATED_P (tmp) = 1;
20251               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20252             }
20253
20254           break;
20255         }
20256     }
20257
20258   for (j = 1, i++; j < num_regs; i++)
20259     {
20260       if (mask & (1 << i))
20261         {
20262           reg = gen_rtx_REG (SImode, i);
20263
20264           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20265
20266           if (dwarf_regs_mask & (1 << i))
20267             {
20268               tmp
20269                 = gen_rtx_SET (gen_frame_mem
20270                                (SImode,
20271                                 plus_constant (Pmode, stack_pointer_rtx,
20272                                                4 * j)),
20273                                reg);
20274               RTX_FRAME_RELATED_P (tmp) = 1;
20275               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20276             }
20277
20278           j++;
20279         }
20280     }
20281
20282   par = emit_insn (par);
20283
20284   tmp = gen_rtx_SET (stack_pointer_rtx,
20285                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20286   RTX_FRAME_RELATED_P (tmp) = 1;
20287   XVECEXP (dwarf, 0, 0) = tmp;
20288
20289   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20290
20291   return par;
20292 }
20293
20294 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20295    SIZE is the offset to be adjusted.
20296    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20297 static void
20298 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20299 {
20300   rtx dwarf;
20301
20302   RTX_FRAME_RELATED_P (insn) = 1;
20303   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20304   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20305 }
20306
20307 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20308    SAVED_REGS_MASK shows which registers need to be restored.
20309
20310    Unfortunately, since this insn does not reflect very well the actual
20311    semantics of the operation, we need to annotate the insn for the benefit
20312    of DWARF2 frame unwind information.  */
20313 static void
20314 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20315 {
20316   int num_regs = 0;
20317   int i, j;
20318   rtx par;
20319   rtx dwarf = NULL_RTX;
20320   rtx tmp, reg;
20321   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20322   int offset_adj;
20323   int emit_update;
20324
20325   offset_adj = return_in_pc ? 1 : 0;
20326   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20327     if (saved_regs_mask & (1 << i))
20328       num_regs++;
20329
20330   gcc_assert (num_regs && num_regs <= 16);
20331
20332   /* If SP is in reglist, then we don't emit SP update insn.  */
20333   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20334
20335   /* The parallel needs to hold num_regs SETs
20336      and one SET for the stack update.  */
20337   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20338
20339   if (return_in_pc)
20340     XVECEXP (par, 0, 0) = ret_rtx;
20341
20342   if (emit_update)
20343     {
20344       /* Increment the stack pointer, based on there being
20345          num_regs 4-byte registers to restore.  */
20346       tmp = gen_rtx_SET (stack_pointer_rtx,
20347                          plus_constant (Pmode,
20348                                         stack_pointer_rtx,
20349                                         4 * num_regs));
20350       RTX_FRAME_RELATED_P (tmp) = 1;
20351       XVECEXP (par, 0, offset_adj) = tmp;
20352     }
20353
20354   /* Now restore every reg, which may include PC.  */
20355   for (j = 0, i = 0; j < num_regs; i++)
20356     if (saved_regs_mask & (1 << i))
20357       {
20358         reg = gen_rtx_REG (SImode, i);
20359         if ((num_regs == 1) && emit_update && !return_in_pc)
20360           {
20361             /* Emit single load with writeback.  */
20362             tmp = gen_frame_mem (SImode,
20363                                  gen_rtx_POST_INC (Pmode,
20364                                                    stack_pointer_rtx));
20365             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20366             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20367             return;
20368           }
20369
20370         tmp = gen_rtx_SET (reg,
20371                            gen_frame_mem
20372                            (SImode,
20373                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20374         RTX_FRAME_RELATED_P (tmp) = 1;
20375         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20376
20377         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20378            should not have PC, skip PC.  */
20379         if (i != PC_REGNUM)
20380           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20381
20382         j++;
20383       }
20384
20385   if (return_in_pc)
20386     par = emit_jump_insn (par);
20387   else
20388     par = emit_insn (par);
20389
20390   REG_NOTES (par) = dwarf;
20391   if (!return_in_pc)
20392     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20393                                  stack_pointer_rtx, stack_pointer_rtx);
20394 }
20395
20396 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20397    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20398
20399    Unfortunately, since this insn does not reflect very well the actual
20400    semantics of the operation, we need to annotate the insn for the benefit
20401    of DWARF2 frame unwind information.  */
20402 static void
20403 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20404 {
20405   int i, j;
20406   rtx par;
20407   rtx dwarf = NULL_RTX;
20408   rtx tmp, reg;
20409
20410   gcc_assert (num_regs && num_regs <= 32);
20411
20412     /* Workaround ARM10 VFPr1 bug.  */
20413   if (num_regs == 2 && !arm_arch6)
20414     {
20415       if (first_reg == 15)
20416         first_reg--;
20417
20418       num_regs++;
20419     }
20420
20421   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20422      there could be up to 32 D-registers to restore.
20423      If there are more than 16 D-registers, make two recursive calls,
20424      each of which emits one pop_multi instruction.  */
20425   if (num_regs > 16)
20426     {
20427       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20428       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20429       return;
20430     }
20431
20432   /* The parallel needs to hold num_regs SETs
20433      and one SET for the stack update.  */
20434   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20435
20436   /* Increment the stack pointer, based on there being
20437      num_regs 8-byte registers to restore.  */
20438   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20439   RTX_FRAME_RELATED_P (tmp) = 1;
20440   XVECEXP (par, 0, 0) = tmp;
20441
20442   /* Now show every reg that will be restored, using a SET for each.  */
20443   for (j = 0, i=first_reg; j < num_regs; i += 2)
20444     {
20445       reg = gen_rtx_REG (DFmode, i);
20446
20447       tmp = gen_rtx_SET (reg,
20448                          gen_frame_mem
20449                          (DFmode,
20450                           plus_constant (Pmode, base_reg, 8 * j)));
20451       RTX_FRAME_RELATED_P (tmp) = 1;
20452       XVECEXP (par, 0, j + 1) = tmp;
20453
20454       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20455
20456       j++;
20457     }
20458
20459   par = emit_insn (par);
20460   REG_NOTES (par) = dwarf;
20461
20462   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20463   if (REGNO (base_reg) == IP_REGNUM)
20464     {
20465       RTX_FRAME_RELATED_P (par) = 1;
20466       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20467     }
20468   else
20469     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20470                                  base_reg, base_reg);
20471 }
20472
20473 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20474    number of registers are being popped, multiple LDRD patterns are created for
20475    all register pairs.  If odd number of registers are popped, last register is
20476    loaded by using LDR pattern.  */
20477 static void
20478 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20479 {
20480   int num_regs = 0;
20481   int i, j;
20482   rtx par = NULL_RTX;
20483   rtx dwarf = NULL_RTX;
20484   rtx tmp, reg, tmp1;
20485   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20486
20487   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20488     if (saved_regs_mask & (1 << i))
20489       num_regs++;
20490
20491   gcc_assert (num_regs && num_regs <= 16);
20492
20493   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20494      to be popped.  So, if num_regs is even, now it will become odd,
20495      and we can generate pop with PC.  If num_regs is odd, it will be
20496      even now, and ldr with return can be generated for PC.  */
20497   if (return_in_pc)
20498     num_regs--;
20499
20500   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20501
20502   /* Var j iterates over all the registers to gather all the registers in
20503      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20504      A PARALLEL RTX of register-pair is created here, so that pattern for
20505      LDRD can be matched.  As PC is always last register to be popped, and
20506      we have already decremented num_regs if PC, we don't have to worry
20507      about PC in this loop.  */
20508   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20509     if (saved_regs_mask & (1 << j))
20510       {
20511         /* Create RTX for memory load.  */
20512         reg = gen_rtx_REG (SImode, j);
20513         tmp = gen_rtx_SET (reg,
20514                            gen_frame_mem (SImode,
20515                                plus_constant (Pmode,
20516                                               stack_pointer_rtx, 4 * i)));
20517         RTX_FRAME_RELATED_P (tmp) = 1;
20518
20519         if (i % 2 == 0)
20520           {
20521             /* When saved-register index (i) is even, the RTX to be emitted is
20522                yet to be created.  Hence create it first.  The LDRD pattern we
20523                are generating is :
20524                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20525                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20526                where target registers need not be consecutive.  */
20527             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20528             dwarf = NULL_RTX;
20529           }
20530
20531         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20532            added as 0th element and if i is odd, reg_i is added as 1st element
20533            of LDRD pattern shown above.  */
20534         XVECEXP (par, 0, (i % 2)) = tmp;
20535         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20536
20537         if ((i % 2) == 1)
20538           {
20539             /* When saved-register index (i) is odd, RTXs for both the registers
20540                to be loaded are generated in above given LDRD pattern, and the
20541                pattern can be emitted now.  */
20542             par = emit_insn (par);
20543             REG_NOTES (par) = dwarf;
20544             RTX_FRAME_RELATED_P (par) = 1;
20545           }
20546
20547         i++;
20548       }
20549
20550   /* If the number of registers pushed is odd AND return_in_pc is false OR
20551      number of registers are even AND return_in_pc is true, last register is
20552      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20553      then LDR with post increment.  */
20554
20555   /* Increment the stack pointer, based on there being
20556      num_regs 4-byte registers to restore.  */
20557   tmp = gen_rtx_SET (stack_pointer_rtx,
20558                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20559   RTX_FRAME_RELATED_P (tmp) = 1;
20560   tmp = emit_insn (tmp);
20561   if (!return_in_pc)
20562     {
20563       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20564                                    stack_pointer_rtx, stack_pointer_rtx);
20565     }
20566
20567   dwarf = NULL_RTX;
20568
20569   if (((num_regs % 2) == 1 && !return_in_pc)
20570       || ((num_regs % 2) == 0 && return_in_pc))
20571     {
20572       /* Scan for the single register to be popped.  Skip until the saved
20573          register is found.  */
20574       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20575
20576       /* Gen LDR with post increment here.  */
20577       tmp1 = gen_rtx_MEM (SImode,
20578                           gen_rtx_POST_INC (SImode,
20579                                             stack_pointer_rtx));
20580       set_mem_alias_set (tmp1, get_frame_alias_set ());
20581
20582       reg = gen_rtx_REG (SImode, j);
20583       tmp = gen_rtx_SET (reg, tmp1);
20584       RTX_FRAME_RELATED_P (tmp) = 1;
20585       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20586
20587       if (return_in_pc)
20588         {
20589           /* If return_in_pc, j must be PC_REGNUM.  */
20590           gcc_assert (j == PC_REGNUM);
20591           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20592           XVECEXP (par, 0, 0) = ret_rtx;
20593           XVECEXP (par, 0, 1) = tmp;
20594           par = emit_jump_insn (par);
20595         }
20596       else
20597         {
20598           par = emit_insn (tmp);
20599           REG_NOTES (par) = dwarf;
20600           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20601                                        stack_pointer_rtx, stack_pointer_rtx);
20602         }
20603
20604     }
20605   else if ((num_regs % 2) == 1 && return_in_pc)
20606     {
20607       /* There are 2 registers to be popped.  So, generate the pattern
20608          pop_multiple_with_stack_update_and_return to pop in PC.  */
20609       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20610     }
20611
20612   return;
20613 }
20614
20615 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20616    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20617    offset addressing and then generates one separate stack udpate. This provides
20618    more scheduling freedom, compared to writeback on every load.  However,
20619    if the function returns using load into PC directly
20620    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20621    before the last load.  TODO: Add a peephole optimization to recognize
20622    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20623    peephole optimization to merge the load at stack-offset zero
20624    with the stack update instruction using load with writeback
20625    in post-index addressing mode.  */
20626 static void
20627 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20628 {
20629   int j = 0;
20630   int offset = 0;
20631   rtx par = NULL_RTX;
20632   rtx dwarf = NULL_RTX;
20633   rtx tmp, mem;
20634
20635   /* Restore saved registers.  */
20636   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20637   j = 0;
20638   while (j <= LAST_ARM_REGNUM)
20639     if (saved_regs_mask & (1 << j))
20640       {
20641         if ((j % 2) == 0
20642             && (saved_regs_mask & (1 << (j + 1)))
20643             && (j + 1) != PC_REGNUM)
20644           {
20645             /* Current register and next register form register pair for which
20646                LDRD can be generated. PC is always the last register popped, and
20647                we handle it separately.  */
20648             if (offset > 0)
20649               mem = gen_frame_mem (DImode,
20650                                    plus_constant (Pmode,
20651                                                   stack_pointer_rtx,
20652                                                   offset));
20653             else
20654               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20655
20656             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20657             tmp = emit_insn (tmp);
20658             RTX_FRAME_RELATED_P (tmp) = 1;
20659
20660             /* Generate dwarf info.  */
20661
20662             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20663                                     gen_rtx_REG (SImode, j),
20664                                     NULL_RTX);
20665             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20666                                     gen_rtx_REG (SImode, j + 1),
20667                                     dwarf);
20668
20669             REG_NOTES (tmp) = dwarf;
20670
20671             offset += 8;
20672             j += 2;
20673           }
20674         else if (j != PC_REGNUM)
20675           {
20676             /* Emit a single word load.  */
20677             if (offset > 0)
20678               mem = gen_frame_mem (SImode,
20679                                    plus_constant (Pmode,
20680                                                   stack_pointer_rtx,
20681                                                   offset));
20682             else
20683               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20684
20685             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20686             tmp = emit_insn (tmp);
20687             RTX_FRAME_RELATED_P (tmp) = 1;
20688
20689             /* Generate dwarf info.  */
20690             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20691                                               gen_rtx_REG (SImode, j),
20692                                               NULL_RTX);
20693
20694             offset += 4;
20695             j += 1;
20696           }
20697         else /* j == PC_REGNUM */
20698           j++;
20699       }
20700     else
20701       j++;
20702
20703   /* Update the stack.  */
20704   if (offset > 0)
20705     {
20706       tmp = gen_rtx_SET (stack_pointer_rtx,
20707                          plus_constant (Pmode,
20708                                         stack_pointer_rtx,
20709                                         offset));
20710       tmp = emit_insn (tmp);
20711       arm_add_cfa_adjust_cfa_note (tmp, offset,
20712                                    stack_pointer_rtx, stack_pointer_rtx);
20713       offset = 0;
20714     }
20715
20716   if (saved_regs_mask & (1 << PC_REGNUM))
20717     {
20718       /* Only PC is to be popped.  */
20719       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20720       XVECEXP (par, 0, 0) = ret_rtx;
20721       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20722                          gen_frame_mem (SImode,
20723                                         gen_rtx_POST_INC (SImode,
20724                                                           stack_pointer_rtx)));
20725       RTX_FRAME_RELATED_P (tmp) = 1;
20726       XVECEXP (par, 0, 1) = tmp;
20727       par = emit_jump_insn (par);
20728
20729       /* Generate dwarf info.  */
20730       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20731                               gen_rtx_REG (SImode, PC_REGNUM),
20732                               NULL_RTX);
20733       REG_NOTES (par) = dwarf;
20734       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20735                                    stack_pointer_rtx, stack_pointer_rtx);
20736     }
20737 }
20738
20739 /* Calculate the size of the return value that is passed in registers.  */
20740 static unsigned
20741 arm_size_return_regs (void)
20742 {
20743   machine_mode mode;
20744
20745   if (crtl->return_rtx != 0)
20746     mode = GET_MODE (crtl->return_rtx);
20747   else
20748     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20749
20750   return GET_MODE_SIZE (mode);
20751 }
20752
20753 /* Return true if the current function needs to save/restore LR.  */
20754 static bool
20755 thumb_force_lr_save (void)
20756 {
20757   return !cfun->machine->lr_save_eliminated
20758          && (!crtl->is_leaf
20759              || thumb_far_jump_used_p ()
20760              || df_regs_ever_live_p (LR_REGNUM));
20761 }
20762
20763 /* We do not know if r3 will be available because
20764    we do have an indirect tailcall happening in this
20765    particular case.  */
20766 static bool
20767 is_indirect_tailcall_p (rtx call)
20768 {
20769   rtx pat = PATTERN (call);
20770
20771   /* Indirect tail call.  */
20772   pat = XVECEXP (pat, 0, 0);
20773   if (GET_CODE (pat) == SET)
20774     pat = SET_SRC (pat);
20775
20776   pat = XEXP (XEXP (pat, 0), 0);
20777   return REG_P (pat);
20778 }
20779
20780 /* Return true if r3 is used by any of the tail call insns in the
20781    current function.  */
20782 static bool
20783 any_sibcall_could_use_r3 (void)
20784 {
20785   edge_iterator ei;
20786   edge e;
20787
20788   if (!crtl->tail_call_emit)
20789     return false;
20790   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20791     if (e->flags & EDGE_SIBCALL)
20792       {
20793         rtx_insn *call = BB_END (e->src);
20794         if (!CALL_P (call))
20795           call = prev_nonnote_nondebug_insn (call);
20796         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20797         if (find_regno_fusage (call, USE, 3)
20798             || is_indirect_tailcall_p (call))
20799           return true;
20800       }
20801   return false;
20802 }
20803
20804
20805 /* Compute the distance from register FROM to register TO.
20806    These can be the arg pointer (26), the soft frame pointer (25),
20807    the stack pointer (13) or the hard frame pointer (11).
20808    In thumb mode r7 is used as the soft frame pointer, if needed.
20809    Typical stack layout looks like this:
20810
20811        old stack pointer -> |    |
20812                              ----
20813                             |    | \
20814                             |    |   saved arguments for
20815                             |    |   vararg functions
20816                             |    | /
20817                               --
20818    hard FP & arg pointer -> |    | \
20819                             |    |   stack
20820                             |    |   frame
20821                             |    | /
20822                               --
20823                             |    | \
20824                             |    |   call saved
20825                             |    |   registers
20826       soft frame pointer -> |    | /
20827                               --
20828                             |    | \
20829                             |    |   local
20830                             |    |   variables
20831      locals base pointer -> |    | /
20832                               --
20833                             |    | \
20834                             |    |   outgoing
20835                             |    |   arguments
20836    current stack pointer -> |    | /
20837                               --
20838
20839   For a given function some or all of these stack components
20840   may not be needed, giving rise to the possibility of
20841   eliminating some of the registers.
20842
20843   The values returned by this function must reflect the behavior
20844   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20845
20846   The sign of the number returned reflects the direction of stack
20847   growth, so the values are positive for all eliminations except
20848   from the soft frame pointer to the hard frame pointer.
20849
20850   SFP may point just inside the local variables block to ensure correct
20851   alignment.  */
20852
20853
20854 /* Return cached stack offsets.  */
20855
20856 static arm_stack_offsets *
20857 arm_get_frame_offsets (void)
20858 {
20859   struct arm_stack_offsets *offsets;
20860
20861   offsets = &cfun->machine->stack_offsets;
20862
20863   return offsets;
20864 }
20865
20866
20867 /* Calculate stack offsets.  These are used to calculate register elimination
20868    offsets and in prologue/epilogue code.  Also calculates which registers
20869    should be saved.  */
20870
20871 static void
20872 arm_compute_frame_layout (void)
20873 {
20874   struct arm_stack_offsets *offsets;
20875   unsigned long func_type;
20876   int saved;
20877   int core_saved;
20878   HOST_WIDE_INT frame_size;
20879   int i;
20880
20881   offsets = &cfun->machine->stack_offsets;
20882
20883   /* Initially this is the size of the local variables.  It will translated
20884      into an offset once we have determined the size of preceding data.  */
20885   frame_size = ROUND_UP_WORD (get_frame_size ());
20886
20887   /* Space for variadic functions.  */
20888   offsets->saved_args = crtl->args.pretend_args_size;
20889
20890   /* In Thumb mode this is incorrect, but never used.  */
20891   offsets->frame
20892     = (offsets->saved_args
20893        + arm_compute_static_chain_stack_bytes ()
20894        + (frame_pointer_needed ? 4 : 0));
20895
20896   if (TARGET_32BIT)
20897     {
20898       unsigned int regno;
20899
20900       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20901       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20902       saved = core_saved;
20903
20904       /* We know that SP will be doubleword aligned on entry, and we must
20905          preserve that condition at any subroutine call.  We also require the
20906          soft frame pointer to be doubleword aligned.  */
20907
20908       if (TARGET_REALLY_IWMMXT)
20909         {
20910           /* Check for the call-saved iWMMXt registers.  */
20911           for (regno = FIRST_IWMMXT_REGNUM;
20912                regno <= LAST_IWMMXT_REGNUM;
20913                regno++)
20914             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20915               saved += 8;
20916         }
20917
20918       func_type = arm_current_func_type ();
20919       /* Space for saved VFP registers.  */
20920       if (! IS_VOLATILE (func_type)
20921           && TARGET_HARD_FLOAT)
20922         saved += arm_get_vfp_saved_size ();
20923     }
20924   else /* TARGET_THUMB1 */
20925     {
20926       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20927       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20928       saved = core_saved;
20929       if (TARGET_BACKTRACE)
20930         saved += 16;
20931     }
20932
20933   /* Saved registers include the stack frame.  */
20934   offsets->saved_regs
20935     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20936   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20937
20938   /* A leaf function does not need any stack alignment if it has nothing
20939      on the stack.  */
20940   if (crtl->is_leaf && frame_size == 0
20941       /* However if it calls alloca(), we have a dynamically allocated
20942          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20943       && ! cfun->calls_alloca)
20944     {
20945       offsets->outgoing_args = offsets->soft_frame;
20946       offsets->locals_base = offsets->soft_frame;
20947       return;
20948     }
20949
20950   /* Ensure SFP has the correct alignment.  */
20951   if (ARM_DOUBLEWORD_ALIGN
20952       && (offsets->soft_frame & 7))
20953     {
20954       offsets->soft_frame += 4;
20955       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20956          when there is a stack frame as the alignment will be rolled into
20957          the normal stack adjustment.  */
20958       if (frame_size + crtl->outgoing_args_size == 0)
20959         {
20960           int reg = -1;
20961
20962           /* Register r3 is caller-saved.  Normally it does not need to be
20963              saved on entry by the prologue.  However if we choose to save
20964              it for padding then we may confuse the compiler into thinking
20965              a prologue sequence is required when in fact it is not.  This
20966              will occur when shrink-wrapping if r3 is used as a scratch
20967              register and there are no other callee-saved writes.
20968
20969              This situation can be avoided when other callee-saved registers
20970              are available and r3 is not mandatory if we choose a callee-saved
20971              register for padding.  */
20972           bool prefer_callee_reg_p = false;
20973
20974           /* If it is safe to use r3, then do so.  This sometimes
20975              generates better code on Thumb-2 by avoiding the need to
20976              use 32-bit push/pop instructions.  */
20977           if (! any_sibcall_could_use_r3 ()
20978               && arm_size_return_regs () <= 12
20979               && (offsets->saved_regs_mask & (1 << 3)) == 0
20980               && (TARGET_THUMB2
20981                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20982             {
20983               reg = 3;
20984               if (!TARGET_THUMB2)
20985                 prefer_callee_reg_p = true;
20986             }
20987           if (reg == -1
20988               || prefer_callee_reg_p)
20989             {
20990               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20991                 {
20992                   /* Avoid fixed registers; they may be changed at
20993                      arbitrary times so it's unsafe to restore them
20994                      during the epilogue.  */
20995                   if (!fixed_regs[i]
20996                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20997                     {
20998                       reg = i;
20999                       break;
21000                     }
21001                 }
21002             }
21003
21004           if (reg != -1)
21005             {
21006               offsets->saved_regs += 4;
21007               offsets->saved_regs_mask |= (1 << reg);
21008             }
21009         }
21010     }
21011
21012   offsets->locals_base = offsets->soft_frame + frame_size;
21013   offsets->outgoing_args = (offsets->locals_base
21014                             + crtl->outgoing_args_size);
21015
21016   if (ARM_DOUBLEWORD_ALIGN)
21017     {
21018       /* Ensure SP remains doubleword aligned.  */
21019       if (offsets->outgoing_args & 7)
21020         offsets->outgoing_args += 4;
21021       gcc_assert (!(offsets->outgoing_args & 7));
21022     }
21023 }
21024
21025
21026 /* Calculate the relative offsets for the different stack pointers.  Positive
21027    offsets are in the direction of stack growth.  */
21028
21029 HOST_WIDE_INT
21030 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21031 {
21032   arm_stack_offsets *offsets;
21033
21034   offsets = arm_get_frame_offsets ();
21035
21036   /* OK, now we have enough information to compute the distances.
21037      There must be an entry in these switch tables for each pair
21038      of registers in ELIMINABLE_REGS, even if some of the entries
21039      seem to be redundant or useless.  */
21040   switch (from)
21041     {
21042     case ARG_POINTER_REGNUM:
21043       switch (to)
21044         {
21045         case THUMB_HARD_FRAME_POINTER_REGNUM:
21046           return 0;
21047
21048         case FRAME_POINTER_REGNUM:
21049           /* This is the reverse of the soft frame pointer
21050              to hard frame pointer elimination below.  */
21051           return offsets->soft_frame - offsets->saved_args;
21052
21053         case ARM_HARD_FRAME_POINTER_REGNUM:
21054           /* This is only non-zero in the case where the static chain register
21055              is stored above the frame.  */
21056           return offsets->frame - offsets->saved_args - 4;
21057
21058         case STACK_POINTER_REGNUM:
21059           /* If nothing has been pushed on the stack at all
21060              then this will return -4.  This *is* correct!  */
21061           return offsets->outgoing_args - (offsets->saved_args + 4);
21062
21063         default:
21064           gcc_unreachable ();
21065         }
21066       gcc_unreachable ();
21067
21068     case FRAME_POINTER_REGNUM:
21069       switch (to)
21070         {
21071         case THUMB_HARD_FRAME_POINTER_REGNUM:
21072           return 0;
21073
21074         case ARM_HARD_FRAME_POINTER_REGNUM:
21075           /* The hard frame pointer points to the top entry in the
21076              stack frame.  The soft frame pointer to the bottom entry
21077              in the stack frame.  If there is no stack frame at all,
21078              then they are identical.  */
21079
21080           return offsets->frame - offsets->soft_frame;
21081
21082         case STACK_POINTER_REGNUM:
21083           return offsets->outgoing_args - offsets->soft_frame;
21084
21085         default:
21086           gcc_unreachable ();
21087         }
21088       gcc_unreachable ();
21089
21090     default:
21091       /* You cannot eliminate from the stack pointer.
21092          In theory you could eliminate from the hard frame
21093          pointer to the stack pointer, but this will never
21094          happen, since if a stack frame is not needed the
21095          hard frame pointer will never be used.  */
21096       gcc_unreachable ();
21097     }
21098 }
21099
21100 /* Given FROM and TO register numbers, say whether this elimination is
21101    allowed.  Frame pointer elimination is automatically handled.
21102
21103    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21104    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21105    pointer, we must eliminate FRAME_POINTER_REGNUM into
21106    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21107    ARG_POINTER_REGNUM.  */
21108
21109 bool
21110 arm_can_eliminate (const int from, const int to)
21111 {
21112   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21113           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21114           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21115           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21116            true);
21117 }
21118
21119 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21120    number of bytes pushed.  */
21121
21122 static int
21123 arm_save_coproc_regs(void)
21124 {
21125   int saved_size = 0;
21126   unsigned reg;
21127   unsigned start_reg;
21128   rtx insn;
21129
21130   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21131     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21132       {
21133         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21134         insn = gen_rtx_MEM (V2SImode, insn);
21135         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21136         RTX_FRAME_RELATED_P (insn) = 1;
21137         saved_size += 8;
21138       }
21139
21140   if (TARGET_HARD_FLOAT)
21141     {
21142       start_reg = FIRST_VFP_REGNUM;
21143
21144       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21145         {
21146           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21147               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21148             {
21149               if (start_reg != reg)
21150                 saved_size += vfp_emit_fstmd (start_reg,
21151                                               (reg - start_reg) / 2);
21152               start_reg = reg + 2;
21153             }
21154         }
21155       if (start_reg != reg)
21156         saved_size += vfp_emit_fstmd (start_reg,
21157                                       (reg - start_reg) / 2);
21158     }
21159   return saved_size;
21160 }
21161
21162
21163 /* Set the Thumb frame pointer from the stack pointer.  */
21164
21165 static void
21166 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21167 {
21168   HOST_WIDE_INT amount;
21169   rtx insn, dwarf;
21170
21171   amount = offsets->outgoing_args - offsets->locals_base;
21172   if (amount < 1024)
21173     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21174                                   stack_pointer_rtx, GEN_INT (amount)));
21175   else
21176     {
21177       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21178       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21179          expects the first two operands to be the same.  */
21180       if (TARGET_THUMB2)
21181         {
21182           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21183                                         stack_pointer_rtx,
21184                                         hard_frame_pointer_rtx));
21185         }
21186       else
21187         {
21188           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21189                                         hard_frame_pointer_rtx,
21190                                         stack_pointer_rtx));
21191         }
21192       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21193                            plus_constant (Pmode, stack_pointer_rtx, amount));
21194       RTX_FRAME_RELATED_P (dwarf) = 1;
21195       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21196     }
21197
21198   RTX_FRAME_RELATED_P (insn) = 1;
21199 }
21200
21201 struct scratch_reg {
21202   rtx reg;
21203   bool saved;
21204 };
21205
21206 /* Return a short-lived scratch register for use as a 2nd scratch register on
21207    function entry after the registers are saved in the prologue.  This register
21208    must be released by means of release_scratch_register_on_entry.  IP is not
21209    considered since it is always used as the 1st scratch register if available.
21210
21211    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21212    mask of live registers.  */
21213
21214 static void
21215 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21216                                unsigned long live_regs)
21217 {
21218   int regno = -1;
21219
21220   sr->saved = false;
21221
21222   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21223     regno = LR_REGNUM;
21224   else
21225     {
21226       unsigned int i;
21227
21228       for (i = 4; i < 11; i++)
21229         if (regno1 != i && (live_regs & (1 << i)) != 0)
21230           {
21231             regno = i;
21232             break;
21233           }
21234
21235       if (regno < 0)
21236         {
21237           /* If IP is used as the 1st scratch register for a nested function,
21238              then either r3 wasn't available or is used to preserve IP.  */
21239           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21240             regno1 = 3;
21241           regno = (regno1 == 3 ? 2 : 3);
21242           sr->saved
21243             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21244                                regno);
21245         }
21246     }
21247
21248   sr->reg = gen_rtx_REG (SImode, regno);
21249   if (sr->saved)
21250     {
21251       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21252       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21253       rtx x = gen_rtx_SET (stack_pointer_rtx,
21254                            plus_constant (Pmode, stack_pointer_rtx, -4));
21255       RTX_FRAME_RELATED_P (insn) = 1;
21256       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21257     }
21258 }
21259
21260 /* Release a scratch register obtained from the preceding function.  */
21261
21262 static void
21263 release_scratch_register_on_entry (struct scratch_reg *sr)
21264 {
21265   if (sr->saved)
21266     {
21267       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21268       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21269       rtx x = gen_rtx_SET (stack_pointer_rtx,
21270                            plus_constant (Pmode, stack_pointer_rtx, 4));
21271       RTX_FRAME_RELATED_P (insn) = 1;
21272       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21273     }
21274 }
21275
21276 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21277
21278 #if PROBE_INTERVAL > 4096
21279 #error Cannot use indexed addressing mode for stack probing
21280 #endif
21281
21282 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21283    inclusive.  These are offsets from the current stack pointer.  REGNO1
21284    is the index number of the 1st scratch register and LIVE_REGS is the
21285    mask of live registers.  */
21286
21287 static void
21288 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21289                             unsigned int regno1, unsigned long live_regs)
21290 {
21291   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21292
21293   /* See if we have a constant small number of probes to generate.  If so,
21294      that's the easy case.  */
21295   if (size <= PROBE_INTERVAL)
21296     {
21297       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21298       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21299       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21300     }
21301
21302   /* The run-time loop is made up of 10 insns in the generic case while the
21303      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21304   else if (size <= 5 * PROBE_INTERVAL)
21305     {
21306       HOST_WIDE_INT i, rem;
21307
21308       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21309       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21310       emit_stack_probe (reg1);
21311
21312       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21313          it exceeds SIZE.  If only two probes are needed, this will not
21314          generate any code.  Then probe at FIRST + SIZE.  */
21315       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21316         {
21317           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21318           emit_stack_probe (reg1);
21319         }
21320
21321       rem = size - (i - PROBE_INTERVAL);
21322       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21323         {
21324           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21325           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21326         }
21327       else
21328         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21329     }
21330
21331   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21332      extra careful with variables wrapping around because we might be at
21333      the very top (or the very bottom) of the address space and we have
21334      to be able to handle this case properly; in particular, we use an
21335      equality test for the loop condition.  */
21336   else
21337     {
21338       HOST_WIDE_INT rounded_size;
21339       struct scratch_reg sr;
21340
21341       get_scratch_register_on_entry (&sr, regno1, live_regs);
21342
21343       emit_move_insn (reg1, GEN_INT (first));
21344
21345
21346       /* Step 1: round SIZE to the previous multiple of the interval.  */
21347
21348       rounded_size = size & -PROBE_INTERVAL;
21349       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21350
21351
21352       /* Step 2: compute initial and final value of the loop counter.  */
21353
21354       /* TEST_ADDR = SP + FIRST.  */
21355       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21356
21357       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21358       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21359
21360
21361       /* Step 3: the loop
21362
21363          do
21364            {
21365              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21366              probe at TEST_ADDR
21367            }
21368          while (TEST_ADDR != LAST_ADDR)
21369
21370          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21371          until it is equal to ROUNDED_SIZE.  */
21372
21373       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21374
21375
21376       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21377          that SIZE is equal to ROUNDED_SIZE.  */
21378
21379       if (size != rounded_size)
21380         {
21381           HOST_WIDE_INT rem = size - rounded_size;
21382
21383           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21384             {
21385               emit_set_insn (sr.reg,
21386                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21387               emit_stack_probe (plus_constant (Pmode, sr.reg,
21388                                                PROBE_INTERVAL - rem));
21389             }
21390           else
21391             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21392         }
21393
21394       release_scratch_register_on_entry (&sr);
21395     }
21396
21397   /* Make sure nothing is scheduled before we are done.  */
21398   emit_insn (gen_blockage ());
21399 }
21400
21401 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21402    absolute addresses.  */
21403
21404 const char *
21405 output_probe_stack_range (rtx reg1, rtx reg2)
21406 {
21407   static int labelno = 0;
21408   char loop_lab[32];
21409   rtx xops[2];
21410
21411   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21412
21413   /* Loop.  */
21414   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21415
21416   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21417   xops[0] = reg1;
21418   xops[1] = GEN_INT (PROBE_INTERVAL);
21419   output_asm_insn ("sub\t%0, %0, %1", xops);
21420
21421   /* Probe at TEST_ADDR.  */
21422   output_asm_insn ("str\tr0, [%0, #0]", xops);
21423
21424   /* Test if TEST_ADDR == LAST_ADDR.  */
21425   xops[1] = reg2;
21426   output_asm_insn ("cmp\t%0, %1", xops);
21427
21428   /* Branch.  */
21429   fputs ("\tbne\t", asm_out_file);
21430   assemble_name_raw (asm_out_file, loop_lab);
21431   fputc ('\n', asm_out_file);
21432
21433   return "";
21434 }
21435
21436 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21437    function.  */
21438 void
21439 arm_expand_prologue (void)
21440 {
21441   rtx amount;
21442   rtx insn;
21443   rtx ip_rtx;
21444   unsigned long live_regs_mask;
21445   unsigned long func_type;
21446   int fp_offset = 0;
21447   int saved_pretend_args = 0;
21448   int saved_regs = 0;
21449   unsigned HOST_WIDE_INT args_to_push;
21450   HOST_WIDE_INT size;
21451   arm_stack_offsets *offsets;
21452   bool clobber_ip;
21453
21454   func_type = arm_current_func_type ();
21455
21456   /* Naked functions don't have prologues.  */
21457   if (IS_NAKED (func_type))
21458     {
21459       if (flag_stack_usage_info)
21460         current_function_static_stack_size = 0;
21461       return;
21462     }
21463
21464   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21465   args_to_push = crtl->args.pretend_args_size;
21466
21467   /* Compute which register we will have to save onto the stack.  */
21468   offsets = arm_get_frame_offsets ();
21469   live_regs_mask = offsets->saved_regs_mask;
21470
21471   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21472
21473   if (IS_STACKALIGN (func_type))
21474     {
21475       rtx r0, r1;
21476
21477       /* Handle a word-aligned stack pointer.  We generate the following:
21478
21479           mov r0, sp
21480           bic r1, r0, #7
21481           mov sp, r1
21482           <save and restore r0 in normal prologue/epilogue>
21483           mov sp, r0
21484           bx lr
21485
21486          The unwinder doesn't need to know about the stack realignment.
21487          Just tell it we saved SP in r0.  */
21488       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21489
21490       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21491       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21492
21493       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21494       RTX_FRAME_RELATED_P (insn) = 1;
21495       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21496
21497       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21498
21499       /* ??? The CFA changes here, which may cause GDB to conclude that it
21500          has entered a different function.  That said, the unwind info is
21501          correct, individually, before and after this instruction because
21502          we've described the save of SP, which will override the default
21503          handling of SP as restoring from the CFA.  */
21504       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21505     }
21506
21507   /* The static chain register is the same as the IP register.  If it is
21508      clobbered when creating the frame, we need to save and restore it.  */
21509   clobber_ip = IS_NESTED (func_type)
21510                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21511                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21512                         || flag_stack_clash_protection)
21513                        && !df_regs_ever_live_p (LR_REGNUM)
21514                        && arm_r3_live_at_start_p ()));
21515
21516   /* Find somewhere to store IP whilst the frame is being created.
21517      We try the following places in order:
21518
21519        1. The last argument register r3 if it is available.
21520        2. A slot on the stack above the frame if there are no
21521           arguments to push onto the stack.
21522        3. Register r3 again, after pushing the argument registers
21523           onto the stack, if this is a varargs function.
21524        4. The last slot on the stack created for the arguments to
21525           push, if this isn't a varargs function.
21526
21527      Note - we only need to tell the dwarf2 backend about the SP
21528      adjustment in the second variant; the static chain register
21529      doesn't need to be unwound, as it doesn't contain a value
21530      inherited from the caller.  */
21531   if (clobber_ip)
21532     {
21533       if (!arm_r3_live_at_start_p ())
21534         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21535       else if (args_to_push == 0)
21536         {
21537           rtx addr, dwarf;
21538
21539           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21540           saved_regs += 4;
21541
21542           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21543           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21544           fp_offset = 4;
21545
21546           /* Just tell the dwarf backend that we adjusted SP.  */
21547           dwarf = gen_rtx_SET (stack_pointer_rtx,
21548                                plus_constant (Pmode, stack_pointer_rtx,
21549                                               -fp_offset));
21550           RTX_FRAME_RELATED_P (insn) = 1;
21551           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21552         }
21553       else
21554         {
21555           /* Store the args on the stack.  */
21556           if (cfun->machine->uses_anonymous_args)
21557             {
21558               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21559                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21560               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21561               saved_pretend_args = 1;
21562             }
21563           else
21564             {
21565               rtx addr, dwarf;
21566
21567               if (args_to_push == 4)
21568                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21569               else
21570                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21571                                            plus_constant (Pmode,
21572                                                           stack_pointer_rtx,
21573                                                           -args_to_push));
21574
21575               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21576
21577               /* Just tell the dwarf backend that we adjusted SP.  */
21578               dwarf = gen_rtx_SET (stack_pointer_rtx,
21579                                    plus_constant (Pmode, stack_pointer_rtx,
21580                                                   -args_to_push));
21581               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21582             }
21583
21584           RTX_FRAME_RELATED_P (insn) = 1;
21585           fp_offset = args_to_push;
21586           args_to_push = 0;
21587         }
21588     }
21589
21590   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21591     {
21592       if (IS_INTERRUPT (func_type))
21593         {
21594           /* Interrupt functions must not corrupt any registers.
21595              Creating a frame pointer however, corrupts the IP
21596              register, so we must push it first.  */
21597           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21598
21599           /* Do not set RTX_FRAME_RELATED_P on this insn.
21600              The dwarf stack unwinding code only wants to see one
21601              stack decrement per function, and this is not it.  If
21602              this instruction is labeled as being part of the frame
21603              creation sequence then dwarf2out_frame_debug_expr will
21604              die when it encounters the assignment of IP to FP
21605              later on, since the use of SP here establishes SP as
21606              the CFA register and not IP.
21607
21608              Anyway this instruction is not really part of the stack
21609              frame creation although it is part of the prologue.  */
21610         }
21611
21612       insn = emit_set_insn (ip_rtx,
21613                             plus_constant (Pmode, stack_pointer_rtx,
21614                                            fp_offset));
21615       RTX_FRAME_RELATED_P (insn) = 1;
21616     }
21617
21618   if (args_to_push)
21619     {
21620       /* Push the argument registers, or reserve space for them.  */
21621       if (cfun->machine->uses_anonymous_args)
21622         insn = emit_multi_reg_push
21623           ((0xf0 >> (args_to_push / 4)) & 0xf,
21624            (0xf0 >> (args_to_push / 4)) & 0xf);
21625       else
21626         insn = emit_insn
21627           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21628                        GEN_INT (- args_to_push)));
21629       RTX_FRAME_RELATED_P (insn) = 1;
21630     }
21631
21632   /* If this is an interrupt service routine, and the link register
21633      is going to be pushed, and we're not generating extra
21634      push of IP (needed when frame is needed and frame layout if apcs),
21635      subtracting four from LR now will mean that the function return
21636      can be done with a single instruction.  */
21637   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21638       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21639       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21640       && TARGET_ARM)
21641     {
21642       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21643
21644       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21645     }
21646
21647   if (live_regs_mask)
21648     {
21649       unsigned long dwarf_regs_mask = live_regs_mask;
21650
21651       saved_regs += bit_count (live_regs_mask) * 4;
21652       if (optimize_size && !frame_pointer_needed
21653           && saved_regs == offsets->saved_regs - offsets->saved_args)
21654         {
21655           /* If no coprocessor registers are being pushed and we don't have
21656              to worry about a frame pointer then push extra registers to
21657              create the stack frame.  This is done in a way that does not
21658              alter the frame layout, so is independent of the epilogue.  */
21659           int n;
21660           int frame;
21661           n = 0;
21662           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21663             n++;
21664           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21665           if (frame && n * 4 >= frame)
21666             {
21667               n = frame / 4;
21668               live_regs_mask |= (1 << n) - 1;
21669               saved_regs += frame;
21670             }
21671         }
21672
21673       if (TARGET_LDRD
21674           && current_tune->prefer_ldrd_strd
21675           && !optimize_function_for_size_p (cfun))
21676         {
21677           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21678           if (TARGET_THUMB2)
21679             thumb2_emit_strd_push (live_regs_mask);
21680           else if (TARGET_ARM
21681                    && !TARGET_APCS_FRAME
21682                    && !IS_INTERRUPT (func_type))
21683             arm_emit_strd_push (live_regs_mask);
21684           else
21685             {
21686               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21687               RTX_FRAME_RELATED_P (insn) = 1;
21688             }
21689         }
21690       else
21691         {
21692           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21693           RTX_FRAME_RELATED_P (insn) = 1;
21694         }
21695     }
21696
21697   if (! IS_VOLATILE (func_type))
21698     saved_regs += arm_save_coproc_regs ();
21699
21700   if (frame_pointer_needed && TARGET_ARM)
21701     {
21702       /* Create the new frame pointer.  */
21703       if (TARGET_APCS_FRAME)
21704         {
21705           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21706           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21707           RTX_FRAME_RELATED_P (insn) = 1;
21708         }
21709       else
21710         {
21711           insn = GEN_INT (saved_regs - (4 + fp_offset));
21712           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21713                                         stack_pointer_rtx, insn));
21714           RTX_FRAME_RELATED_P (insn) = 1;
21715         }
21716     }
21717
21718   size = offsets->outgoing_args - offsets->saved_args;
21719   if (flag_stack_usage_info)
21720     current_function_static_stack_size = size;
21721
21722   /* If this isn't an interrupt service routine and we have a frame, then do
21723      stack checking.  We use IP as the first scratch register, except for the
21724      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21725   if (!IS_INTERRUPT (func_type)
21726       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21727           || flag_stack_clash_protection))
21728     {
21729       unsigned int regno;
21730
21731       if (!IS_NESTED (func_type) || clobber_ip)
21732         regno = IP_REGNUM;
21733       else if (df_regs_ever_live_p (LR_REGNUM))
21734         regno = LR_REGNUM;
21735       else
21736         regno = 3;
21737
21738       if (crtl->is_leaf && !cfun->calls_alloca)
21739         {
21740           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21741             arm_emit_probe_stack_range (get_stack_check_protect (),
21742                                         size - get_stack_check_protect (),
21743                                         regno, live_regs_mask);
21744         }
21745       else if (size > 0)
21746         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21747                                     regno, live_regs_mask);
21748     }
21749
21750   /* Recover the static chain register.  */
21751   if (clobber_ip)
21752     {
21753       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21754         insn = gen_rtx_REG (SImode, 3);
21755       else
21756         {
21757           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21758           insn = gen_frame_mem (SImode, insn);
21759         }
21760       emit_set_insn (ip_rtx, insn);
21761       emit_insn (gen_force_register_use (ip_rtx));
21762     }
21763
21764   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21765     {
21766       /* This add can produce multiple insns for a large constant, so we
21767          need to get tricky.  */
21768       rtx_insn *last = get_last_insn ();
21769
21770       amount = GEN_INT (offsets->saved_args + saved_regs
21771                         - offsets->outgoing_args);
21772
21773       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21774                                     amount));
21775       do
21776         {
21777           last = last ? NEXT_INSN (last) : get_insns ();
21778           RTX_FRAME_RELATED_P (last) = 1;
21779         }
21780       while (last != insn);
21781
21782       /* If the frame pointer is needed, emit a special barrier that
21783          will prevent the scheduler from moving stores to the frame
21784          before the stack adjustment.  */
21785       if (frame_pointer_needed)
21786         emit_insn (gen_stack_tie (stack_pointer_rtx,
21787                                   hard_frame_pointer_rtx));
21788     }
21789
21790
21791   if (frame_pointer_needed && TARGET_THUMB2)
21792     thumb_set_frame_pointer (offsets);
21793
21794   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21795     {
21796       unsigned long mask;
21797
21798       mask = live_regs_mask;
21799       mask &= THUMB2_WORK_REGS;
21800       if (!IS_NESTED (func_type))
21801         mask |= (1 << IP_REGNUM);
21802       arm_load_pic_register (mask);
21803     }
21804
21805   /* If we are profiling, make sure no instructions are scheduled before
21806      the call to mcount.  Similarly if the user has requested no
21807      scheduling in the prolog.  Similarly if we want non-call exceptions
21808      using the EABI unwinder, to prevent faulting instructions from being
21809      swapped with a stack adjustment.  */
21810   if (crtl->profile || !TARGET_SCHED_PROLOG
21811       || (arm_except_unwind_info (&global_options) == UI_TARGET
21812           && cfun->can_throw_non_call_exceptions))
21813     emit_insn (gen_blockage ());
21814
21815   /* If the link register is being kept alive, with the return address in it,
21816      then make sure that it does not get reused by the ce2 pass.  */
21817   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21818     cfun->machine->lr_save_eliminated = 1;
21819 }
21820 \f
21821 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21822 static void
21823 arm_print_condition (FILE *stream)
21824 {
21825   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21826     {
21827       /* Branch conversion is not implemented for Thumb-2.  */
21828       if (TARGET_THUMB)
21829         {
21830           output_operand_lossage ("predicated Thumb instruction");
21831           return;
21832         }
21833       if (current_insn_predicate != NULL)
21834         {
21835           output_operand_lossage
21836             ("predicated instruction in conditional sequence");
21837           return;
21838         }
21839
21840       fputs (arm_condition_codes[arm_current_cc], stream);
21841     }
21842   else if (current_insn_predicate)
21843     {
21844       enum arm_cond_code code;
21845
21846       if (TARGET_THUMB1)
21847         {
21848           output_operand_lossage ("predicated Thumb instruction");
21849           return;
21850         }
21851
21852       code = get_arm_condition_code (current_insn_predicate);
21853       fputs (arm_condition_codes[code], stream);
21854     }
21855 }
21856
21857
21858 /* Globally reserved letters: acln
21859    Puncutation letters currently used: @_|?().!#
21860    Lower case letters currently used: bcdefhimpqtvwxyz
21861    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21862    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21863
21864    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21865
21866    If CODE is 'd', then the X is a condition operand and the instruction
21867    should only be executed if the condition is true.
21868    if CODE is 'D', then the X is a condition operand and the instruction
21869    should only be executed if the condition is false: however, if the mode
21870    of the comparison is CCFPEmode, then always execute the instruction -- we
21871    do this because in these circumstances !GE does not necessarily imply LT;
21872    in these cases the instruction pattern will take care to make sure that
21873    an instruction containing %d will follow, thereby undoing the effects of
21874    doing this instruction unconditionally.
21875    If CODE is 'N' then X is a floating point operand that must be negated
21876    before output.
21877    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21878    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21879 static void
21880 arm_print_operand (FILE *stream, rtx x, int code)
21881 {
21882   switch (code)
21883     {
21884     case '@':
21885       fputs (ASM_COMMENT_START, stream);
21886       return;
21887
21888     case '_':
21889       fputs (user_label_prefix, stream);
21890       return;
21891
21892     case '|':
21893       fputs (REGISTER_PREFIX, stream);
21894       return;
21895
21896     case '?':
21897       arm_print_condition (stream);
21898       return;
21899
21900     case '.':
21901       /* The current condition code for a condition code setting instruction.
21902          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21903       fputc('s', stream);
21904       arm_print_condition (stream);
21905       return;
21906
21907     case '!':
21908       /* If the instruction is conditionally executed then print
21909          the current condition code, otherwise print 's'.  */
21910       gcc_assert (TARGET_THUMB2);
21911       if (current_insn_predicate)
21912         arm_print_condition (stream);
21913       else
21914         fputc('s', stream);
21915       break;
21916
21917     /* %# is a "break" sequence. It doesn't output anything, but is used to
21918        separate e.g. operand numbers from following text, if that text consists
21919        of further digits which we don't want to be part of the operand
21920        number.  */
21921     case '#':
21922       return;
21923
21924     case 'N':
21925       {
21926         REAL_VALUE_TYPE r;
21927         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21928         fprintf (stream, "%s", fp_const_from_val (&r));
21929       }
21930       return;
21931
21932     /* An integer or symbol address without a preceding # sign.  */
21933     case 'c':
21934       switch (GET_CODE (x))
21935         {
21936         case CONST_INT:
21937           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21938           break;
21939
21940         case SYMBOL_REF:
21941           output_addr_const (stream, x);
21942           break;
21943
21944         case CONST:
21945           if (GET_CODE (XEXP (x, 0)) == PLUS
21946               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21947             {
21948               output_addr_const (stream, x);
21949               break;
21950             }
21951           /* Fall through.  */
21952
21953         default:
21954           output_operand_lossage ("Unsupported operand for code '%c'", code);
21955         }
21956       return;
21957
21958     /* An integer that we want to print in HEX.  */
21959     case 'x':
21960       switch (GET_CODE (x))
21961         {
21962         case CONST_INT:
21963           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21964           break;
21965
21966         default:
21967           output_operand_lossage ("Unsupported operand for code '%c'", code);
21968         }
21969       return;
21970
21971     case 'B':
21972       if (CONST_INT_P (x))
21973         {
21974           HOST_WIDE_INT val;
21975           val = ARM_SIGN_EXTEND (~INTVAL (x));
21976           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21977         }
21978       else
21979         {
21980           putc ('~', stream);
21981           output_addr_const (stream, x);
21982         }
21983       return;
21984
21985     case 'b':
21986       /* Print the log2 of a CONST_INT.  */
21987       {
21988         HOST_WIDE_INT val;
21989
21990         if (!CONST_INT_P (x)
21991             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21992           output_operand_lossage ("Unsupported operand for code '%c'", code);
21993         else
21994           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21995       }
21996       return;
21997
21998     case 'L':
21999       /* The low 16 bits of an immediate constant.  */
22000       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22001       return;
22002
22003     case 'i':
22004       fprintf (stream, "%s", arithmetic_instr (x, 1));
22005       return;
22006
22007     case 'I':
22008       fprintf (stream, "%s", arithmetic_instr (x, 0));
22009       return;
22010
22011     case 'S':
22012       {
22013         HOST_WIDE_INT val;
22014         const char *shift;
22015
22016         shift = shift_op (x, &val);
22017
22018         if (shift)
22019           {
22020             fprintf (stream, ", %s ", shift);
22021             if (val == -1)
22022               arm_print_operand (stream, XEXP (x, 1), 0);
22023             else
22024               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22025           }
22026       }
22027       return;
22028
22029       /* An explanation of the 'Q', 'R' and 'H' register operands:
22030
22031          In a pair of registers containing a DI or DF value the 'Q'
22032          operand returns the register number of the register containing
22033          the least significant part of the value.  The 'R' operand returns
22034          the register number of the register containing the most
22035          significant part of the value.
22036
22037          The 'H' operand returns the higher of the two register numbers.
22038          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22039          same as the 'Q' operand, since the most significant part of the
22040          value is held in the lower number register.  The reverse is true
22041          on systems where WORDS_BIG_ENDIAN is false.
22042
22043          The purpose of these operands is to distinguish between cases
22044          where the endian-ness of the values is important (for example
22045          when they are added together), and cases where the endian-ness
22046          is irrelevant, but the order of register operations is important.
22047          For example when loading a value from memory into a register
22048          pair, the endian-ness does not matter.  Provided that the value
22049          from the lower memory address is put into the lower numbered
22050          register, and the value from the higher address is put into the
22051          higher numbered register, the load will work regardless of whether
22052          the value being loaded is big-wordian or little-wordian.  The
22053          order of the two register loads can matter however, if the address
22054          of the memory location is actually held in one of the registers
22055          being overwritten by the load.
22056
22057          The 'Q' and 'R' constraints are also available for 64-bit
22058          constants.  */
22059     case 'Q':
22060       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22061         {
22062           rtx part = gen_lowpart (SImode, x);
22063           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22064           return;
22065         }
22066
22067       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22068         {
22069           output_operand_lossage ("invalid operand for code '%c'", code);
22070           return;
22071         }
22072
22073       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22074       return;
22075
22076     case 'R':
22077       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22078         {
22079           machine_mode mode = GET_MODE (x);
22080           rtx part;
22081
22082           if (mode == VOIDmode)
22083             mode = DImode;
22084           part = gen_highpart_mode (SImode, mode, x);
22085           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22086           return;
22087         }
22088
22089       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22090         {
22091           output_operand_lossage ("invalid operand for code '%c'", code);
22092           return;
22093         }
22094
22095       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22096       return;
22097
22098     case 'H':
22099       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22100         {
22101           output_operand_lossage ("invalid operand for code '%c'", code);
22102           return;
22103         }
22104
22105       asm_fprintf (stream, "%r", REGNO (x) + 1);
22106       return;
22107
22108     case 'J':
22109       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22110         {
22111           output_operand_lossage ("invalid operand for code '%c'", code);
22112           return;
22113         }
22114
22115       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22116       return;
22117
22118     case 'K':
22119       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22120         {
22121           output_operand_lossage ("invalid operand for code '%c'", code);
22122           return;
22123         }
22124
22125       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22126       return;
22127
22128     case 'm':
22129       asm_fprintf (stream, "%r",
22130                    REG_P (XEXP (x, 0))
22131                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22132       return;
22133
22134     case 'M':
22135       asm_fprintf (stream, "{%r-%r}",
22136                    REGNO (x),
22137                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22138       return;
22139
22140     /* Like 'M', but writing doubleword vector registers, for use by Neon
22141        insns.  */
22142     case 'h':
22143       {
22144         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22145         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22146         if (numregs == 1)
22147           asm_fprintf (stream, "{d%d}", regno);
22148         else
22149           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22150       }
22151       return;
22152
22153     case 'd':
22154       /* CONST_TRUE_RTX means always -- that's the default.  */
22155       if (x == const_true_rtx)
22156         return;
22157
22158       if (!COMPARISON_P (x))
22159         {
22160           output_operand_lossage ("invalid operand for code '%c'", code);
22161           return;
22162         }
22163
22164       fputs (arm_condition_codes[get_arm_condition_code (x)],
22165              stream);
22166       return;
22167
22168     case 'D':
22169       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22170          want to do that.  */
22171       if (x == const_true_rtx)
22172         {
22173           output_operand_lossage ("instruction never executed");
22174           return;
22175         }
22176       if (!COMPARISON_P (x))
22177         {
22178           output_operand_lossage ("invalid operand for code '%c'", code);
22179           return;
22180         }
22181
22182       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22183                                  (get_arm_condition_code (x))],
22184              stream);
22185       return;
22186
22187     case 's':
22188     case 'V':
22189     case 'W':
22190     case 'X':
22191     case 'Y':
22192     case 'Z':
22193       /* Former Maverick support, removed after GCC-4.7.  */
22194       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22195       return;
22196
22197     case 'U':
22198       if (!REG_P (x)
22199           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22200           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22201         /* Bad value for wCG register number.  */
22202         {
22203           output_operand_lossage ("invalid operand for code '%c'", code);
22204           return;
22205         }
22206
22207       else
22208         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22209       return;
22210
22211       /* Print an iWMMXt control register name.  */
22212     case 'w':
22213       if (!CONST_INT_P (x)
22214           || INTVAL (x) < 0
22215           || INTVAL (x) >= 16)
22216         /* Bad value for wC register number.  */
22217         {
22218           output_operand_lossage ("invalid operand for code '%c'", code);
22219           return;
22220         }
22221
22222       else
22223         {
22224           static const char * wc_reg_names [16] =
22225             {
22226               "wCID",  "wCon",  "wCSSF", "wCASF",
22227               "wC4",   "wC5",   "wC6",   "wC7",
22228               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22229               "wC12",  "wC13",  "wC14",  "wC15"
22230             };
22231
22232           fputs (wc_reg_names [INTVAL (x)], stream);
22233         }
22234       return;
22235
22236     /* Print the high single-precision register of a VFP double-precision
22237        register.  */
22238     case 'p':
22239       {
22240         machine_mode mode = GET_MODE (x);
22241         int regno;
22242
22243         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22244           {
22245             output_operand_lossage ("invalid operand for code '%c'", code);
22246             return;
22247           }
22248
22249         regno = REGNO (x);
22250         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22251           {
22252             output_operand_lossage ("invalid operand for code '%c'", code);
22253             return;
22254           }
22255
22256         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22257       }
22258       return;
22259
22260     /* Print a VFP/Neon double precision or quad precision register name.  */
22261     case 'P':
22262     case 'q':
22263       {
22264         machine_mode mode = GET_MODE (x);
22265         int is_quad = (code == 'q');
22266         int regno;
22267
22268         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22269           {
22270             output_operand_lossage ("invalid operand for code '%c'", code);
22271             return;
22272           }
22273
22274         if (!REG_P (x)
22275             || !IS_VFP_REGNUM (REGNO (x)))
22276           {
22277             output_operand_lossage ("invalid operand for code '%c'", code);
22278             return;
22279           }
22280
22281         regno = REGNO (x);
22282         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22283             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22284           {
22285             output_operand_lossage ("invalid operand for code '%c'", code);
22286             return;
22287           }
22288
22289         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22290           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22291       }
22292       return;
22293
22294     /* These two codes print the low/high doubleword register of a Neon quad
22295        register, respectively.  For pair-structure types, can also print
22296        low/high quadword registers.  */
22297     case 'e':
22298     case 'f':
22299       {
22300         machine_mode mode = GET_MODE (x);
22301         int regno;
22302
22303         if ((GET_MODE_SIZE (mode) != 16
22304              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22305           {
22306             output_operand_lossage ("invalid operand for code '%c'", code);
22307             return;
22308           }
22309
22310         regno = REGNO (x);
22311         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22312           {
22313             output_operand_lossage ("invalid operand for code '%c'", code);
22314             return;
22315           }
22316
22317         if (GET_MODE_SIZE (mode) == 16)
22318           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22319                                   + (code == 'f' ? 1 : 0));
22320         else
22321           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22322                                   + (code == 'f' ? 1 : 0));
22323       }
22324       return;
22325
22326     /* Print a VFPv3 floating-point constant, represented as an integer
22327        index.  */
22328     case 'G':
22329       {
22330         int index = vfp3_const_double_index (x);
22331         gcc_assert (index != -1);
22332         fprintf (stream, "%d", index);
22333       }
22334       return;
22335
22336     /* Print bits representing opcode features for Neon.
22337
22338        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22339        and polynomials as unsigned.
22340
22341        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22342
22343        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22344
22345     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22346     case 'T':
22347       {
22348         HOST_WIDE_INT bits = INTVAL (x);
22349         fputc ("uspf"[bits & 3], stream);
22350       }
22351       return;
22352
22353     /* Likewise, but signed and unsigned integers are both 'i'.  */
22354     case 'F':
22355       {
22356         HOST_WIDE_INT bits = INTVAL (x);
22357         fputc ("iipf"[bits & 3], stream);
22358       }
22359       return;
22360
22361     /* As for 'T', but emit 'u' instead of 'p'.  */
22362     case 't':
22363       {
22364         HOST_WIDE_INT bits = INTVAL (x);
22365         fputc ("usuf"[bits & 3], stream);
22366       }
22367       return;
22368
22369     /* Bit 2: rounding (vs none).  */
22370     case 'O':
22371       {
22372         HOST_WIDE_INT bits = INTVAL (x);
22373         fputs ((bits & 4) != 0 ? "r" : "", stream);
22374       }
22375       return;
22376
22377     /* Memory operand for vld1/vst1 instruction.  */
22378     case 'A':
22379       {
22380         rtx addr;
22381         bool postinc = FALSE;
22382         rtx postinc_reg = NULL;
22383         unsigned align, memsize, align_bits;
22384
22385         gcc_assert (MEM_P (x));
22386         addr = XEXP (x, 0);
22387         if (GET_CODE (addr) == POST_INC)
22388           {
22389             postinc = 1;
22390             addr = XEXP (addr, 0);
22391           }
22392         if (GET_CODE (addr) == POST_MODIFY)
22393           {
22394             postinc_reg = XEXP( XEXP (addr, 1), 1);
22395             addr = XEXP (addr, 0);
22396           }
22397         asm_fprintf (stream, "[%r", REGNO (addr));
22398
22399         /* We know the alignment of this access, so we can emit a hint in the
22400            instruction (for some alignments) as an aid to the memory subsystem
22401            of the target.  */
22402         align = MEM_ALIGN (x) >> 3;
22403         memsize = MEM_SIZE (x);
22404
22405         /* Only certain alignment specifiers are supported by the hardware.  */
22406         if (memsize == 32 && (align % 32) == 0)
22407           align_bits = 256;
22408         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22409           align_bits = 128;
22410         else if (memsize >= 8 && (align % 8) == 0)
22411           align_bits = 64;
22412         else
22413           align_bits = 0;
22414
22415         if (align_bits != 0)
22416           asm_fprintf (stream, ":%d", align_bits);
22417
22418         asm_fprintf (stream, "]");
22419
22420         if (postinc)
22421           fputs("!", stream);
22422         if (postinc_reg)
22423           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22424       }
22425       return;
22426
22427     case 'C':
22428       {
22429         rtx addr;
22430
22431         gcc_assert (MEM_P (x));
22432         addr = XEXP (x, 0);
22433         gcc_assert (REG_P (addr));
22434         asm_fprintf (stream, "[%r]", REGNO (addr));
22435       }
22436       return;
22437
22438     /* Translate an S register number into a D register number and element index.  */
22439     case 'y':
22440       {
22441         machine_mode mode = GET_MODE (x);
22442         int regno;
22443
22444         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22445           {
22446             output_operand_lossage ("invalid operand for code '%c'", code);
22447             return;
22448           }
22449
22450         regno = REGNO (x);
22451         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22452           {
22453             output_operand_lossage ("invalid operand for code '%c'", code);
22454             return;
22455           }
22456
22457         regno = regno - FIRST_VFP_REGNUM;
22458         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22459       }
22460       return;
22461
22462     case 'v':
22463         gcc_assert (CONST_DOUBLE_P (x));
22464         int result;
22465         result = vfp3_const_double_for_fract_bits (x);
22466         if (result == 0)
22467           result = vfp3_const_double_for_bits (x);
22468         fprintf (stream, "#%d", result);
22469         return;
22470
22471     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22472        number into a D register number and element index.  */
22473     case 'z':
22474       {
22475         machine_mode mode = GET_MODE (x);
22476         int regno;
22477
22478         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22479           {
22480             output_operand_lossage ("invalid operand for code '%c'", code);
22481             return;
22482           }
22483
22484         regno = REGNO (x);
22485         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22486           {
22487             output_operand_lossage ("invalid operand for code '%c'", code);
22488             return;
22489           }
22490
22491         regno = regno - FIRST_VFP_REGNUM;
22492         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22493       }
22494       return;
22495
22496     default:
22497       if (x == 0)
22498         {
22499           output_operand_lossage ("missing operand");
22500           return;
22501         }
22502
22503       switch (GET_CODE (x))
22504         {
22505         case REG:
22506           asm_fprintf (stream, "%r", REGNO (x));
22507           break;
22508
22509         case MEM:
22510           output_address (GET_MODE (x), XEXP (x, 0));
22511           break;
22512
22513         case CONST_DOUBLE:
22514           {
22515             char fpstr[20];
22516             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22517                               sizeof (fpstr), 0, 1);
22518             fprintf (stream, "#%s", fpstr);
22519           }
22520           break;
22521
22522         default:
22523           gcc_assert (GET_CODE (x) != NEG);
22524           fputc ('#', stream);
22525           if (GET_CODE (x) == HIGH)
22526             {
22527               fputs (":lower16:", stream);
22528               x = XEXP (x, 0);
22529             }
22530
22531           output_addr_const (stream, x);
22532           break;
22533         }
22534     }
22535 }
22536 \f
22537 /* Target hook for printing a memory address.  */
22538 static void
22539 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22540 {
22541   if (TARGET_32BIT)
22542     {
22543       int is_minus = GET_CODE (x) == MINUS;
22544
22545       if (REG_P (x))
22546         asm_fprintf (stream, "[%r]", REGNO (x));
22547       else if (GET_CODE (x) == PLUS || is_minus)
22548         {
22549           rtx base = XEXP (x, 0);
22550           rtx index = XEXP (x, 1);
22551           HOST_WIDE_INT offset = 0;
22552           if (!REG_P (base)
22553               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22554             {
22555               /* Ensure that BASE is a register.  */
22556               /* (one of them must be).  */
22557               /* Also ensure the SP is not used as in index register.  */
22558               std::swap (base, index);
22559             }
22560           switch (GET_CODE (index))
22561             {
22562             case CONST_INT:
22563               offset = INTVAL (index);
22564               if (is_minus)
22565                 offset = -offset;
22566               asm_fprintf (stream, "[%r, #%wd]",
22567                            REGNO (base), offset);
22568               break;
22569
22570             case REG:
22571               asm_fprintf (stream, "[%r, %s%r]",
22572                            REGNO (base), is_minus ? "-" : "",
22573                            REGNO (index));
22574               break;
22575
22576             case MULT:
22577             case ASHIFTRT:
22578             case LSHIFTRT:
22579             case ASHIFT:
22580             case ROTATERT:
22581               {
22582                 asm_fprintf (stream, "[%r, %s%r",
22583                              REGNO (base), is_minus ? "-" : "",
22584                              REGNO (XEXP (index, 0)));
22585                 arm_print_operand (stream, index, 'S');
22586                 fputs ("]", stream);
22587                 break;
22588               }
22589
22590             default:
22591               gcc_unreachable ();
22592             }
22593         }
22594       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22595                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22596         {
22597           gcc_assert (REG_P (XEXP (x, 0)));
22598
22599           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22600             asm_fprintf (stream, "[%r, #%s%d]!",
22601                          REGNO (XEXP (x, 0)),
22602                          GET_CODE (x) == PRE_DEC ? "-" : "",
22603                          GET_MODE_SIZE (mode));
22604           else
22605             asm_fprintf (stream, "[%r], #%s%d",
22606                          REGNO (XEXP (x, 0)),
22607                          GET_CODE (x) == POST_DEC ? "-" : "",
22608                          GET_MODE_SIZE (mode));
22609         }
22610       else if (GET_CODE (x) == PRE_MODIFY)
22611         {
22612           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22613           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22614             asm_fprintf (stream, "#%wd]!",
22615                          INTVAL (XEXP (XEXP (x, 1), 1)));
22616           else
22617             asm_fprintf (stream, "%r]!",
22618                          REGNO (XEXP (XEXP (x, 1), 1)));
22619         }
22620       else if (GET_CODE (x) == POST_MODIFY)
22621         {
22622           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22623           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22624             asm_fprintf (stream, "#%wd",
22625                          INTVAL (XEXP (XEXP (x, 1), 1)));
22626           else
22627             asm_fprintf (stream, "%r",
22628                          REGNO (XEXP (XEXP (x, 1), 1)));
22629         }
22630       else output_addr_const (stream, x);
22631     }
22632   else
22633     {
22634       if (REG_P (x))
22635         asm_fprintf (stream, "[%r]", REGNO (x));
22636       else if (GET_CODE (x) == POST_INC)
22637         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22638       else if (GET_CODE (x) == PLUS)
22639         {
22640           gcc_assert (REG_P (XEXP (x, 0)));
22641           if (CONST_INT_P (XEXP (x, 1)))
22642             asm_fprintf (stream, "[%r, #%wd]",
22643                          REGNO (XEXP (x, 0)),
22644                          INTVAL (XEXP (x, 1)));
22645           else
22646             asm_fprintf (stream, "[%r, %r]",
22647                          REGNO (XEXP (x, 0)),
22648                          REGNO (XEXP (x, 1)));
22649         }
22650       else
22651         output_addr_const (stream, x);
22652     }
22653 }
22654 \f
22655 /* Target hook for indicating whether a punctuation character for
22656    TARGET_PRINT_OPERAND is valid.  */
22657 static bool
22658 arm_print_operand_punct_valid_p (unsigned char code)
22659 {
22660   return (code == '@' || code == '|' || code == '.'
22661           || code == '(' || code == ')' || code == '#'
22662           || (TARGET_32BIT && (code == '?'))
22663           || (TARGET_THUMB2 && (code == '!'))
22664           || (TARGET_THUMB && (code == '_')));
22665 }
22666 \f
22667 /* Target hook for assembling integer objects.  The ARM version needs to
22668    handle word-sized values specially.  */
22669 static bool
22670 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22671 {
22672   machine_mode mode;
22673
22674   if (size == UNITS_PER_WORD && aligned_p)
22675     {
22676       fputs ("\t.word\t", asm_out_file);
22677       output_addr_const (asm_out_file, x);
22678
22679       /* Mark symbols as position independent.  We only do this in the
22680          .text segment, not in the .data segment.  */
22681       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22682           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22683         {
22684           /* See legitimize_pic_address for an explanation of the
22685              TARGET_VXWORKS_RTP check.  */
22686           /* References to weak symbols cannot be resolved locally:
22687              they may be overridden by a non-weak definition at link
22688              time.  */
22689           if (!arm_pic_data_is_text_relative
22690               || (GET_CODE (x) == SYMBOL_REF
22691                   && (!SYMBOL_REF_LOCAL_P (x)
22692                       || (SYMBOL_REF_DECL (x)
22693                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22694             fputs ("(GOT)", asm_out_file);
22695           else
22696             fputs ("(GOTOFF)", asm_out_file);
22697         }
22698       fputc ('\n', asm_out_file);
22699       return true;
22700     }
22701
22702   mode = GET_MODE (x);
22703
22704   if (arm_vector_mode_supported_p (mode))
22705     {
22706       int i, units;
22707
22708       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22709
22710       units = CONST_VECTOR_NUNITS (x);
22711       size = GET_MODE_UNIT_SIZE (mode);
22712
22713       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22714         for (i = 0; i < units; i++)
22715           {
22716             rtx elt = CONST_VECTOR_ELT (x, i);
22717             assemble_integer
22718               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22719           }
22720       else
22721         for (i = 0; i < units; i++)
22722           {
22723             rtx elt = CONST_VECTOR_ELT (x, i);
22724             assemble_real
22725               (*CONST_DOUBLE_REAL_VALUE (elt),
22726                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22727                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22728           }
22729
22730       return true;
22731     }
22732
22733   return default_assemble_integer (x, size, aligned_p);
22734 }
22735
22736 static void
22737 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22738 {
22739   section *s;
22740
22741   if (!TARGET_AAPCS_BASED)
22742     {
22743       (is_ctor ?
22744        default_named_section_asm_out_constructor
22745        : default_named_section_asm_out_destructor) (symbol, priority);
22746       return;
22747     }
22748
22749   /* Put these in the .init_array section, using a special relocation.  */
22750   if (priority != DEFAULT_INIT_PRIORITY)
22751     {
22752       char buf[18];
22753       sprintf (buf, "%s.%.5u",
22754                is_ctor ? ".init_array" : ".fini_array",
22755                priority);
22756       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22757     }
22758   else if (is_ctor)
22759     s = ctors_section;
22760   else
22761     s = dtors_section;
22762
22763   switch_to_section (s);
22764   assemble_align (POINTER_SIZE);
22765   fputs ("\t.word\t", asm_out_file);
22766   output_addr_const (asm_out_file, symbol);
22767   fputs ("(target1)\n", asm_out_file);
22768 }
22769
22770 /* Add a function to the list of static constructors.  */
22771
22772 static void
22773 arm_elf_asm_constructor (rtx symbol, int priority)
22774 {
22775   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22776 }
22777
22778 /* Add a function to the list of static destructors.  */
22779
22780 static void
22781 arm_elf_asm_destructor (rtx symbol, int priority)
22782 {
22783   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22784 }
22785 \f
22786 /* A finite state machine takes care of noticing whether or not instructions
22787    can be conditionally executed, and thus decrease execution time and code
22788    size by deleting branch instructions.  The fsm is controlled by
22789    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22790
22791 /* The state of the fsm controlling condition codes are:
22792    0: normal, do nothing special
22793    1: make ASM_OUTPUT_OPCODE not output this instruction
22794    2: make ASM_OUTPUT_OPCODE not output this instruction
22795    3: make instructions conditional
22796    4: make instructions conditional
22797
22798    State transitions (state->state by whom under condition):
22799    0 -> 1 final_prescan_insn if the `target' is a label
22800    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22801    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22802    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22803    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22804           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22805    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22806           (the target insn is arm_target_insn).
22807
22808    If the jump clobbers the conditions then we use states 2 and 4.
22809
22810    A similar thing can be done with conditional return insns.
22811
22812    XXX In case the `target' is an unconditional branch, this conditionalising
22813    of the instructions always reduces code size, but not always execution
22814    time.  But then, I want to reduce the code size to somewhere near what
22815    /bin/cc produces.  */
22816
22817 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22818    instructions.  When a COND_EXEC instruction is seen the subsequent
22819    instructions are scanned so that multiple conditional instructions can be
22820    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22821    specify the length and true/false mask for the IT block.  These will be
22822    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22823
22824 /* Returns the index of the ARM condition code string in
22825    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22826    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22827
22828 enum arm_cond_code
22829 maybe_get_arm_condition_code (rtx comparison)
22830 {
22831   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22832   enum arm_cond_code code;
22833   enum rtx_code comp_code = GET_CODE (comparison);
22834
22835   if (GET_MODE_CLASS (mode) != MODE_CC)
22836     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22837                            XEXP (comparison, 1));
22838
22839   switch (mode)
22840     {
22841     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22842     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22843     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22844     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22845     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22846     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22847     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22848     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22849     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22850     case E_CC_DLTUmode: code = ARM_CC;
22851
22852     dominance:
22853       if (comp_code == EQ)
22854         return ARM_INVERSE_CONDITION_CODE (code);
22855       if (comp_code == NE)
22856         return code;
22857       return ARM_NV;
22858
22859     case E_CC_NOOVmode:
22860       switch (comp_code)
22861         {
22862         case NE: return ARM_NE;
22863         case EQ: return ARM_EQ;
22864         case GE: return ARM_PL;
22865         case LT: return ARM_MI;
22866         default: return ARM_NV;
22867         }
22868
22869     case E_CC_Zmode:
22870       switch (comp_code)
22871         {
22872         case NE: return ARM_NE;
22873         case EQ: return ARM_EQ;
22874         default: return ARM_NV;
22875         }
22876
22877     case E_CC_Nmode:
22878       switch (comp_code)
22879         {
22880         case NE: return ARM_MI;
22881         case EQ: return ARM_PL;
22882         default: return ARM_NV;
22883         }
22884
22885     case E_CCFPEmode:
22886     case E_CCFPmode:
22887       /* We can handle all cases except UNEQ and LTGT.  */
22888       switch (comp_code)
22889         {
22890         case GE: return ARM_GE;
22891         case GT: return ARM_GT;
22892         case LE: return ARM_LS;
22893         case LT: return ARM_MI;
22894         case NE: return ARM_NE;
22895         case EQ: return ARM_EQ;
22896         case ORDERED: return ARM_VC;
22897         case UNORDERED: return ARM_VS;
22898         case UNLT: return ARM_LT;
22899         case UNLE: return ARM_LE;
22900         case UNGT: return ARM_HI;
22901         case UNGE: return ARM_PL;
22902           /* UNEQ and LTGT do not have a representation.  */
22903         case UNEQ: /* Fall through.  */
22904         case LTGT: /* Fall through.  */
22905         default: return ARM_NV;
22906         }
22907
22908     case E_CC_SWPmode:
22909       switch (comp_code)
22910         {
22911         case NE: return ARM_NE;
22912         case EQ: return ARM_EQ;
22913         case GE: return ARM_LE;
22914         case GT: return ARM_LT;
22915         case LE: return ARM_GE;
22916         case LT: return ARM_GT;
22917         case GEU: return ARM_LS;
22918         case GTU: return ARM_CC;
22919         case LEU: return ARM_CS;
22920         case LTU: return ARM_HI;
22921         default: return ARM_NV;
22922         }
22923
22924     case E_CC_Cmode:
22925       switch (comp_code)
22926         {
22927         case LTU: return ARM_CS;
22928         case GEU: return ARM_CC;
22929         case NE: return ARM_CS;
22930         case EQ: return ARM_CC;
22931         default: return ARM_NV;
22932         }
22933
22934     case E_CC_CZmode:
22935       switch (comp_code)
22936         {
22937         case NE: return ARM_NE;
22938         case EQ: return ARM_EQ;
22939         case GEU: return ARM_CS;
22940         case GTU: return ARM_HI;
22941         case LEU: return ARM_LS;
22942         case LTU: return ARM_CC;
22943         default: return ARM_NV;
22944         }
22945
22946     case E_CC_NCVmode:
22947       switch (comp_code)
22948         {
22949         case GE: return ARM_GE;
22950         case LT: return ARM_LT;
22951         case GEU: return ARM_CS;
22952         case LTU: return ARM_CC;
22953         default: return ARM_NV;
22954         }
22955
22956     case E_CC_Vmode:
22957       switch (comp_code)
22958         {
22959         case NE: return ARM_VS;
22960         case EQ: return ARM_VC;
22961         default: return ARM_NV;
22962         }
22963
22964     case E_CCmode:
22965       switch (comp_code)
22966         {
22967         case NE: return ARM_NE;
22968         case EQ: return ARM_EQ;
22969         case GE: return ARM_GE;
22970         case GT: return ARM_GT;
22971         case LE: return ARM_LE;
22972         case LT: return ARM_LT;
22973         case GEU: return ARM_CS;
22974         case GTU: return ARM_HI;
22975         case LEU: return ARM_LS;
22976         case LTU: return ARM_CC;
22977         default: return ARM_NV;
22978         }
22979
22980     default: gcc_unreachable ();
22981     }
22982 }
22983
22984 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22985 static enum arm_cond_code
22986 get_arm_condition_code (rtx comparison)
22987 {
22988   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22989   gcc_assert (code != ARM_NV);
22990   return code;
22991 }
22992
22993 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22994    code registers when not targetting Thumb1.  The VFP condition register
22995    only exists when generating hard-float code.  */
22996 static bool
22997 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22998 {
22999   if (!TARGET_32BIT)
23000     return false;
23001
23002   *p1 = CC_REGNUM;
23003   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23004   return true;
23005 }
23006
23007 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23008    instructions.  */
23009 void
23010 thumb2_final_prescan_insn (rtx_insn *insn)
23011 {
23012   rtx_insn *first_insn = insn;
23013   rtx body = PATTERN (insn);
23014   rtx predicate;
23015   enum arm_cond_code code;
23016   int n;
23017   int mask;
23018   int max;
23019
23020   /* max_insns_skipped in the tune was already taken into account in the
23021      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23022      just emit the IT blocks as we can.  It does not make sense to split
23023      the IT blocks.  */
23024   max = MAX_INSN_PER_IT_BLOCK;
23025
23026   /* Remove the previous insn from the count of insns to be output.  */
23027   if (arm_condexec_count)
23028       arm_condexec_count--;
23029
23030   /* Nothing to do if we are already inside a conditional block.  */
23031   if (arm_condexec_count)
23032     return;
23033
23034   if (GET_CODE (body) != COND_EXEC)
23035     return;
23036
23037   /* Conditional jumps are implemented directly.  */
23038   if (JUMP_P (insn))
23039     return;
23040
23041   predicate = COND_EXEC_TEST (body);
23042   arm_current_cc = get_arm_condition_code (predicate);
23043
23044   n = get_attr_ce_count (insn);
23045   arm_condexec_count = 1;
23046   arm_condexec_mask = (1 << n) - 1;
23047   arm_condexec_masklen = n;
23048   /* See if subsequent instructions can be combined into the same block.  */
23049   for (;;)
23050     {
23051       insn = next_nonnote_insn (insn);
23052
23053       /* Jumping into the middle of an IT block is illegal, so a label or
23054          barrier terminates the block.  */
23055       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23056         break;
23057
23058       body = PATTERN (insn);
23059       /* USE and CLOBBER aren't really insns, so just skip them.  */
23060       if (GET_CODE (body) == USE
23061           || GET_CODE (body) == CLOBBER)
23062         continue;
23063
23064       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23065       if (GET_CODE (body) != COND_EXEC)
23066         break;
23067       /* Maximum number of conditionally executed instructions in a block.  */
23068       n = get_attr_ce_count (insn);
23069       if (arm_condexec_masklen + n > max)
23070         break;
23071
23072       predicate = COND_EXEC_TEST (body);
23073       code = get_arm_condition_code (predicate);
23074       mask = (1 << n) - 1;
23075       if (arm_current_cc == code)
23076         arm_condexec_mask |= (mask << arm_condexec_masklen);
23077       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23078         break;
23079
23080       arm_condexec_count++;
23081       arm_condexec_masklen += n;
23082
23083       /* A jump must be the last instruction in a conditional block.  */
23084       if (JUMP_P (insn))
23085         break;
23086     }
23087   /* Restore recog_data (getting the attributes of other insns can
23088      destroy this array, but final.c assumes that it remains intact
23089      across this call).  */
23090   extract_constrain_insn_cached (first_insn);
23091 }
23092
23093 void
23094 arm_final_prescan_insn (rtx_insn *insn)
23095 {
23096   /* BODY will hold the body of INSN.  */
23097   rtx body = PATTERN (insn);
23098
23099   /* This will be 1 if trying to repeat the trick, and things need to be
23100      reversed if it appears to fail.  */
23101   int reverse = 0;
23102
23103   /* If we start with a return insn, we only succeed if we find another one.  */
23104   int seeking_return = 0;
23105   enum rtx_code return_code = UNKNOWN;
23106
23107   /* START_INSN will hold the insn from where we start looking.  This is the
23108      first insn after the following code_label if REVERSE is true.  */
23109   rtx_insn *start_insn = insn;
23110
23111   /* If in state 4, check if the target branch is reached, in order to
23112      change back to state 0.  */
23113   if (arm_ccfsm_state == 4)
23114     {
23115       if (insn == arm_target_insn)
23116         {
23117           arm_target_insn = NULL;
23118           arm_ccfsm_state = 0;
23119         }
23120       return;
23121     }
23122
23123   /* If in state 3, it is possible to repeat the trick, if this insn is an
23124      unconditional branch to a label, and immediately following this branch
23125      is the previous target label which is only used once, and the label this
23126      branch jumps to is not too far off.  */
23127   if (arm_ccfsm_state == 3)
23128     {
23129       if (simplejump_p (insn))
23130         {
23131           start_insn = next_nonnote_insn (start_insn);
23132           if (BARRIER_P (start_insn))
23133             {
23134               /* XXX Isn't this always a barrier?  */
23135               start_insn = next_nonnote_insn (start_insn);
23136             }
23137           if (LABEL_P (start_insn)
23138               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23139               && LABEL_NUSES (start_insn) == 1)
23140             reverse = TRUE;
23141           else
23142             return;
23143         }
23144       else if (ANY_RETURN_P (body))
23145         {
23146           start_insn = next_nonnote_insn (start_insn);
23147           if (BARRIER_P (start_insn))
23148             start_insn = next_nonnote_insn (start_insn);
23149           if (LABEL_P (start_insn)
23150               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23151               && LABEL_NUSES (start_insn) == 1)
23152             {
23153               reverse = TRUE;
23154               seeking_return = 1;
23155               return_code = GET_CODE (body);
23156             }
23157           else
23158             return;
23159         }
23160       else
23161         return;
23162     }
23163
23164   gcc_assert (!arm_ccfsm_state || reverse);
23165   if (!JUMP_P (insn))
23166     return;
23167
23168   /* This jump might be paralleled with a clobber of the condition codes
23169      the jump should always come first */
23170   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23171     body = XVECEXP (body, 0, 0);
23172
23173   if (reverse
23174       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23175           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23176     {
23177       int insns_skipped;
23178       int fail = FALSE, succeed = FALSE;
23179       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23180       int then_not_else = TRUE;
23181       rtx_insn *this_insn = start_insn;
23182       rtx label = 0;
23183
23184       /* Register the insn jumped to.  */
23185       if (reverse)
23186         {
23187           if (!seeking_return)
23188             label = XEXP (SET_SRC (body), 0);
23189         }
23190       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23191         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23192       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23193         {
23194           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23195           then_not_else = FALSE;
23196         }
23197       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23198         {
23199           seeking_return = 1;
23200           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23201         }
23202       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23203         {
23204           seeking_return = 1;
23205           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23206           then_not_else = FALSE;
23207         }
23208       else
23209         gcc_unreachable ();
23210
23211       /* See how many insns this branch skips, and what kind of insns.  If all
23212          insns are okay, and the label or unconditional branch to the same
23213          label is not too far away, succeed.  */
23214       for (insns_skipped = 0;
23215            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23216         {
23217           rtx scanbody;
23218
23219           this_insn = next_nonnote_insn (this_insn);
23220           if (!this_insn)
23221             break;
23222
23223           switch (GET_CODE (this_insn))
23224             {
23225             case CODE_LABEL:
23226               /* Succeed if it is the target label, otherwise fail since
23227                  control falls in from somewhere else.  */
23228               if (this_insn == label)
23229                 {
23230                   arm_ccfsm_state = 1;
23231                   succeed = TRUE;
23232                 }
23233               else
23234                 fail = TRUE;
23235               break;
23236
23237             case BARRIER:
23238               /* Succeed if the following insn is the target label.
23239                  Otherwise fail.
23240                  If return insns are used then the last insn in a function
23241                  will be a barrier.  */
23242               this_insn = next_nonnote_insn (this_insn);
23243               if (this_insn && this_insn == label)
23244                 {
23245                   arm_ccfsm_state = 1;
23246                   succeed = TRUE;
23247                 }
23248               else
23249                 fail = TRUE;
23250               break;
23251
23252             case CALL_INSN:
23253               /* The AAPCS says that conditional calls should not be
23254                  used since they make interworking inefficient (the
23255                  linker can't transform BL<cond> into BLX).  That's
23256                  only a problem if the machine has BLX.  */
23257               if (arm_arch5)
23258                 {
23259                   fail = TRUE;
23260                   break;
23261                 }
23262
23263               /* Succeed if the following insn is the target label, or
23264                  if the following two insns are a barrier and the
23265                  target label.  */
23266               this_insn = next_nonnote_insn (this_insn);
23267               if (this_insn && BARRIER_P (this_insn))
23268                 this_insn = next_nonnote_insn (this_insn);
23269
23270               if (this_insn && this_insn == label
23271                   && insns_skipped < max_insns_skipped)
23272                 {
23273                   arm_ccfsm_state = 1;
23274                   succeed = TRUE;
23275                 }
23276               else
23277                 fail = TRUE;
23278               break;
23279
23280             case JUMP_INSN:
23281               /* If this is an unconditional branch to the same label, succeed.
23282                  If it is to another label, do nothing.  If it is conditional,
23283                  fail.  */
23284               /* XXX Probably, the tests for SET and the PC are
23285                  unnecessary.  */
23286
23287               scanbody = PATTERN (this_insn);
23288               if (GET_CODE (scanbody) == SET
23289                   && GET_CODE (SET_DEST (scanbody)) == PC)
23290                 {
23291                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23292                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23293                     {
23294                       arm_ccfsm_state = 2;
23295                       succeed = TRUE;
23296                     }
23297                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23298                     fail = TRUE;
23299                 }
23300               /* Fail if a conditional return is undesirable (e.g. on a
23301                  StrongARM), but still allow this if optimizing for size.  */
23302               else if (GET_CODE (scanbody) == return_code
23303                        && !use_return_insn (TRUE, NULL)
23304                        && !optimize_size)
23305                 fail = TRUE;
23306               else if (GET_CODE (scanbody) == return_code)
23307                 {
23308                   arm_ccfsm_state = 2;
23309                   succeed = TRUE;
23310                 }
23311               else if (GET_CODE (scanbody) == PARALLEL)
23312                 {
23313                   switch (get_attr_conds (this_insn))
23314                     {
23315                     case CONDS_NOCOND:
23316                       break;
23317                     default:
23318                       fail = TRUE;
23319                       break;
23320                     }
23321                 }
23322               else
23323                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23324
23325               break;
23326
23327             case INSN:
23328               /* Instructions using or affecting the condition codes make it
23329                  fail.  */
23330               scanbody = PATTERN (this_insn);
23331               if (!(GET_CODE (scanbody) == SET
23332                     || GET_CODE (scanbody) == PARALLEL)
23333                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23334                 fail = TRUE;
23335               break;
23336
23337             default:
23338               break;
23339             }
23340         }
23341       if (succeed)
23342         {
23343           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23344             arm_target_label = CODE_LABEL_NUMBER (label);
23345           else
23346             {
23347               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23348
23349               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23350                 {
23351                   this_insn = next_nonnote_insn (this_insn);
23352                   gcc_assert (!this_insn
23353                               || (!BARRIER_P (this_insn)
23354                                   && !LABEL_P (this_insn)));
23355                 }
23356               if (!this_insn)
23357                 {
23358                   /* Oh, dear! we ran off the end.. give up.  */
23359                   extract_constrain_insn_cached (insn);
23360                   arm_ccfsm_state = 0;
23361                   arm_target_insn = NULL;
23362                   return;
23363                 }
23364               arm_target_insn = this_insn;
23365             }
23366
23367           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23368              what it was.  */
23369           if (!reverse)
23370             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23371
23372           if (reverse || then_not_else)
23373             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23374         }
23375
23376       /* Restore recog_data (getting the attributes of other insns can
23377          destroy this array, but final.c assumes that it remains intact
23378          across this call.  */
23379       extract_constrain_insn_cached (insn);
23380     }
23381 }
23382
23383 /* Output IT instructions.  */
23384 void
23385 thumb2_asm_output_opcode (FILE * stream)
23386 {
23387   char buff[5];
23388   int n;
23389
23390   if (arm_condexec_mask)
23391     {
23392       for (n = 0; n < arm_condexec_masklen; n++)
23393         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23394       buff[n] = 0;
23395       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23396                   arm_condition_codes[arm_current_cc]);
23397       arm_condexec_mask = 0;
23398     }
23399 }
23400
23401 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23402    UNITS_PER_WORD bytes wide.  */
23403 static unsigned int
23404 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23405 {
23406   if (TARGET_32BIT
23407       && regno > PC_REGNUM
23408       && regno != FRAME_POINTER_REGNUM
23409       && regno != ARG_POINTER_REGNUM
23410       && !IS_VFP_REGNUM (regno))
23411     return 1;
23412
23413   return ARM_NUM_REGS (mode);
23414 }
23415
23416 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23417 static bool
23418 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23419 {
23420   if (GET_MODE_CLASS (mode) == MODE_CC)
23421     return (regno == CC_REGNUM
23422             || (TARGET_HARD_FLOAT
23423                 && regno == VFPCC_REGNUM));
23424
23425   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23426     return false;
23427
23428   if (TARGET_THUMB1)
23429     /* For the Thumb we only allow values bigger than SImode in
23430        registers 0 - 6, so that there is always a second low
23431        register available to hold the upper part of the value.
23432        We probably we ought to ensure that the register is the
23433        start of an even numbered register pair.  */
23434     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23435
23436   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23437     {
23438       if (mode == SFmode || mode == SImode)
23439         return VFP_REGNO_OK_FOR_SINGLE (regno);
23440
23441       if (mode == DFmode)
23442         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23443
23444       if (mode == HFmode)
23445         return VFP_REGNO_OK_FOR_SINGLE (regno);
23446
23447       /* VFP registers can hold HImode values.  */
23448       if (mode == HImode)
23449         return VFP_REGNO_OK_FOR_SINGLE (regno);
23450
23451       if (TARGET_NEON)
23452         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23453                || (VALID_NEON_QREG_MODE (mode)
23454                    && NEON_REGNO_OK_FOR_QUAD (regno))
23455                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23456                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23457                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23458                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23459                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23460
23461       return false;
23462     }
23463
23464   if (TARGET_REALLY_IWMMXT)
23465     {
23466       if (IS_IWMMXT_GR_REGNUM (regno))
23467         return mode == SImode;
23468
23469       if (IS_IWMMXT_REGNUM (regno))
23470         return VALID_IWMMXT_REG_MODE (mode);
23471     }
23472
23473   /* We allow almost any value to be stored in the general registers.
23474      Restrict doubleword quantities to even register pairs in ARM state
23475      so that we can use ldrd.  Do not allow very large Neon structure
23476      opaque modes in general registers; they would use too many.  */
23477   if (regno <= LAST_ARM_REGNUM)
23478     {
23479       if (ARM_NUM_REGS (mode) > 4)
23480         return false;
23481
23482       if (TARGET_THUMB2)
23483         return true;
23484
23485       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23486     }
23487
23488   if (regno == FRAME_POINTER_REGNUM
23489       || regno == ARG_POINTER_REGNUM)
23490     /* We only allow integers in the fake hard registers.  */
23491     return GET_MODE_CLASS (mode) == MODE_INT;
23492
23493   return false;
23494 }
23495
23496 /* Implement TARGET_MODES_TIEABLE_P.  */
23497
23498 static bool
23499 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23500 {
23501   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23502     return true;
23503
23504   /* We specifically want to allow elements of "structure" modes to
23505      be tieable to the structure.  This more general condition allows
23506      other rarer situations too.  */
23507   if (TARGET_NEON
23508       && (VALID_NEON_DREG_MODE (mode1)
23509           || VALID_NEON_QREG_MODE (mode1)
23510           || VALID_NEON_STRUCT_MODE (mode1))
23511       && (VALID_NEON_DREG_MODE (mode2)
23512           || VALID_NEON_QREG_MODE (mode2)
23513           || VALID_NEON_STRUCT_MODE (mode2)))
23514     return true;
23515
23516   return false;
23517 }
23518
23519 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23520    not used in arm mode.  */
23521
23522 enum reg_class
23523 arm_regno_class (int regno)
23524 {
23525   if (regno == PC_REGNUM)
23526     return NO_REGS;
23527
23528   if (TARGET_THUMB1)
23529     {
23530       if (regno == STACK_POINTER_REGNUM)
23531         return STACK_REG;
23532       if (regno == CC_REGNUM)
23533         return CC_REG;
23534       if (regno < 8)
23535         return LO_REGS;
23536       return HI_REGS;
23537     }
23538
23539   if (TARGET_THUMB2 && regno < 8)
23540     return LO_REGS;
23541
23542   if (   regno <= LAST_ARM_REGNUM
23543       || regno == FRAME_POINTER_REGNUM
23544       || regno == ARG_POINTER_REGNUM)
23545     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23546
23547   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23548     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23549
23550   if (IS_VFP_REGNUM (regno))
23551     {
23552       if (regno <= D7_VFP_REGNUM)
23553         return VFP_D0_D7_REGS;
23554       else if (regno <= LAST_LO_VFP_REGNUM)
23555         return VFP_LO_REGS;
23556       else
23557         return VFP_HI_REGS;
23558     }
23559
23560   if (IS_IWMMXT_REGNUM (regno))
23561     return IWMMXT_REGS;
23562
23563   if (IS_IWMMXT_GR_REGNUM (regno))
23564     return IWMMXT_GR_REGS;
23565
23566   return NO_REGS;
23567 }
23568
23569 /* Handle a special case when computing the offset
23570    of an argument from the frame pointer.  */
23571 int
23572 arm_debugger_arg_offset (int value, rtx addr)
23573 {
23574   rtx_insn *insn;
23575
23576   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23577   if (value != 0)
23578     return 0;
23579
23580   /* We can only cope with the case where the address is held in a register.  */
23581   if (!REG_P (addr))
23582     return 0;
23583
23584   /* If we are using the frame pointer to point at the argument, then
23585      an offset of 0 is correct.  */
23586   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23587     return 0;
23588
23589   /* If we are using the stack pointer to point at the
23590      argument, then an offset of 0 is correct.  */
23591   /* ??? Check this is consistent with thumb2 frame layout.  */
23592   if ((TARGET_THUMB || !frame_pointer_needed)
23593       && REGNO (addr) == SP_REGNUM)
23594     return 0;
23595
23596   /* Oh dear.  The argument is pointed to by a register rather
23597      than being held in a register, or being stored at a known
23598      offset from the frame pointer.  Since GDB only understands
23599      those two kinds of argument we must translate the address
23600      held in the register into an offset from the frame pointer.
23601      We do this by searching through the insns for the function
23602      looking to see where this register gets its value.  If the
23603      register is initialized from the frame pointer plus an offset
23604      then we are in luck and we can continue, otherwise we give up.
23605
23606      This code is exercised by producing debugging information
23607      for a function with arguments like this:
23608
23609            double func (double a, double b, int c, double d) {return d;}
23610
23611      Without this code the stab for parameter 'd' will be set to
23612      an offset of 0 from the frame pointer, rather than 8.  */
23613
23614   /* The if() statement says:
23615
23616      If the insn is a normal instruction
23617      and if the insn is setting the value in a register
23618      and if the register being set is the register holding the address of the argument
23619      and if the address is computing by an addition
23620      that involves adding to a register
23621      which is the frame pointer
23622      a constant integer
23623
23624      then...  */
23625
23626   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23627     {
23628       if (   NONJUMP_INSN_P (insn)
23629           && GET_CODE (PATTERN (insn)) == SET
23630           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23631           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23632           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23633           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23634           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23635              )
23636         {
23637           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23638
23639           break;
23640         }
23641     }
23642
23643   if (value == 0)
23644     {
23645       debug_rtx (addr);
23646       warning (0, "unable to compute real location of stacked parameter");
23647       value = 8; /* XXX magic hack */
23648     }
23649
23650   return value;
23651 }
23652 \f
23653 /* Implement TARGET_PROMOTED_TYPE.  */
23654
23655 static tree
23656 arm_promoted_type (const_tree t)
23657 {
23658   if (SCALAR_FLOAT_TYPE_P (t)
23659       && TYPE_PRECISION (t) == 16
23660       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23661     return float_type_node;
23662   return NULL_TREE;
23663 }
23664
23665 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23666    This simply adds HFmode as a supported mode; even though we don't
23667    implement arithmetic on this type directly, it's supported by
23668    optabs conversions, much the way the double-word arithmetic is
23669    special-cased in the default hook.  */
23670
23671 static bool
23672 arm_scalar_mode_supported_p (scalar_mode mode)
23673 {
23674   if (mode == HFmode)
23675     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23676   else if (ALL_FIXED_POINT_MODE_P (mode))
23677     return true;
23678   else
23679     return default_scalar_mode_supported_p (mode);
23680 }
23681
23682 /* Set the value of FLT_EVAL_METHOD.
23683    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23684
23685     0: evaluate all operations and constants, whose semantic type has at
23686        most the range and precision of type float, to the range and
23687        precision of float; evaluate all other operations and constants to
23688        the range and precision of the semantic type;
23689
23690     N, where _FloatN is a supported interchange floating type
23691        evaluate all operations and constants, whose semantic type has at
23692        most the range and precision of _FloatN type, to the range and
23693        precision of the _FloatN type; evaluate all other operations and
23694        constants to the range and precision of the semantic type;
23695
23696    If we have the ARMv8.2-A extensions then we support _Float16 in native
23697    precision, so we should set this to 16.  Otherwise, we support the type,
23698    but want to evaluate expressions in float precision, so set this to
23699    0.  */
23700
23701 static enum flt_eval_method
23702 arm_excess_precision (enum excess_precision_type type)
23703 {
23704   switch (type)
23705     {
23706       case EXCESS_PRECISION_TYPE_FAST:
23707       case EXCESS_PRECISION_TYPE_STANDARD:
23708         /* We can calculate either in 16-bit range and precision or
23709            32-bit range and precision.  Make that decision based on whether
23710            we have native support for the ARMv8.2-A 16-bit floating-point
23711            instructions or not.  */
23712         return (TARGET_VFP_FP16INST
23713                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23714                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23715       case EXCESS_PRECISION_TYPE_IMPLICIT:
23716         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23717       default:
23718         gcc_unreachable ();
23719     }
23720   return FLT_EVAL_METHOD_UNPREDICTABLE;
23721 }
23722
23723
23724 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23725    _Float16 if we are using anything other than ieee format for 16-bit
23726    floating point.  Otherwise, punt to the default implementation.  */
23727 static opt_scalar_float_mode
23728 arm_floatn_mode (int n, bool extended)
23729 {
23730   if (!extended && n == 16)
23731     {
23732       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23733         return HFmode;
23734       return opt_scalar_float_mode ();
23735     }
23736
23737   return default_floatn_mode (n, extended);
23738 }
23739
23740
23741 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23742    not to early-clobber SRC registers in the process.
23743
23744    We assume that the operands described by SRC and DEST represent a
23745    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23746    number of components into which the copy has been decomposed.  */
23747 void
23748 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23749 {
23750   unsigned int i;
23751
23752   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23753       || REGNO (operands[0]) < REGNO (operands[1]))
23754     {
23755       for (i = 0; i < count; i++)
23756         {
23757           operands[2 * i] = dest[i];
23758           operands[2 * i + 1] = src[i];
23759         }
23760     }
23761   else
23762     {
23763       for (i = 0; i < count; i++)
23764         {
23765           operands[2 * i] = dest[count - i - 1];
23766           operands[2 * i + 1] = src[count - i - 1];
23767         }
23768     }
23769 }
23770
23771 /* Split operands into moves from op[1] + op[2] into op[0].  */
23772
23773 void
23774 neon_split_vcombine (rtx operands[3])
23775 {
23776   unsigned int dest = REGNO (operands[0]);
23777   unsigned int src1 = REGNO (operands[1]);
23778   unsigned int src2 = REGNO (operands[2]);
23779   machine_mode halfmode = GET_MODE (operands[1]);
23780   unsigned int halfregs = REG_NREGS (operands[1]);
23781   rtx destlo, desthi;
23782
23783   if (src1 == dest && src2 == dest + halfregs)
23784     {
23785       /* No-op move.  Can't split to nothing; emit something.  */
23786       emit_note (NOTE_INSN_DELETED);
23787       return;
23788     }
23789
23790   /* Preserve register attributes for variable tracking.  */
23791   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23792   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23793                                GET_MODE_SIZE (halfmode));
23794
23795   /* Special case of reversed high/low parts.  Use VSWP.  */
23796   if (src2 == dest && src1 == dest + halfregs)
23797     {
23798       rtx x = gen_rtx_SET (destlo, operands[1]);
23799       rtx y = gen_rtx_SET (desthi, operands[2]);
23800       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23801       return;
23802     }
23803
23804   if (!reg_overlap_mentioned_p (operands[2], destlo))
23805     {
23806       /* Try to avoid unnecessary moves if part of the result
23807          is in the right place already.  */
23808       if (src1 != dest)
23809         emit_move_insn (destlo, operands[1]);
23810       if (src2 != dest + halfregs)
23811         emit_move_insn (desthi, operands[2]);
23812     }
23813   else
23814     {
23815       if (src2 != dest + halfregs)
23816         emit_move_insn (desthi, operands[2]);
23817       if (src1 != dest)
23818         emit_move_insn (destlo, operands[1]);
23819     }
23820 }
23821 \f
23822 /* Return the number (counting from 0) of
23823    the least significant set bit in MASK.  */
23824
23825 inline static int
23826 number_of_first_bit_set (unsigned mask)
23827 {
23828   return ctz_hwi (mask);
23829 }
23830
23831 /* Like emit_multi_reg_push, but allowing for a different set of
23832    registers to be described as saved.  MASK is the set of registers
23833    to be saved; REAL_REGS is the set of registers to be described as
23834    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23835
23836 static rtx_insn *
23837 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23838 {
23839   unsigned long regno;
23840   rtx par[10], tmp, reg;
23841   rtx_insn *insn;
23842   int i, j;
23843
23844   /* Build the parallel of the registers actually being stored.  */
23845   for (i = 0; mask; ++i, mask &= mask - 1)
23846     {
23847       regno = ctz_hwi (mask);
23848       reg = gen_rtx_REG (SImode, regno);
23849
23850       if (i == 0)
23851         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23852       else
23853         tmp = gen_rtx_USE (VOIDmode, reg);
23854
23855       par[i] = tmp;
23856     }
23857
23858   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23859   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23860   tmp = gen_frame_mem (BLKmode, tmp);
23861   tmp = gen_rtx_SET (tmp, par[0]);
23862   par[0] = tmp;
23863
23864   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23865   insn = emit_insn (tmp);
23866
23867   /* Always build the stack adjustment note for unwind info.  */
23868   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23869   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23870   par[0] = tmp;
23871
23872   /* Build the parallel of the registers recorded as saved for unwind.  */
23873   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23874     {
23875       regno = ctz_hwi (real_regs);
23876       reg = gen_rtx_REG (SImode, regno);
23877
23878       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23879       tmp = gen_frame_mem (SImode, tmp);
23880       tmp = gen_rtx_SET (tmp, reg);
23881       RTX_FRAME_RELATED_P (tmp) = 1;
23882       par[j + 1] = tmp;
23883     }
23884
23885   if (j == 0)
23886     tmp = par[0];
23887   else
23888     {
23889       RTX_FRAME_RELATED_P (par[0]) = 1;
23890       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23891     }
23892
23893   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23894
23895   return insn;
23896 }
23897
23898 /* Emit code to push or pop registers to or from the stack.  F is the
23899    assembly file.  MASK is the registers to pop.  */
23900 static void
23901 thumb_pop (FILE *f, unsigned long mask)
23902 {
23903   int regno;
23904   int lo_mask = mask & 0xFF;
23905
23906   gcc_assert (mask);
23907
23908   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23909     {
23910       /* Special case.  Do not generate a POP PC statement here, do it in
23911          thumb_exit() */
23912       thumb_exit (f, -1);
23913       return;
23914     }
23915
23916   fprintf (f, "\tpop\t{");
23917
23918   /* Look at the low registers first.  */
23919   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23920     {
23921       if (lo_mask & 1)
23922         {
23923           asm_fprintf (f, "%r", regno);
23924
23925           if ((lo_mask & ~1) != 0)
23926             fprintf (f, ", ");
23927         }
23928     }
23929
23930   if (mask & (1 << PC_REGNUM))
23931     {
23932       /* Catch popping the PC.  */
23933       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23934           || IS_CMSE_ENTRY (arm_current_func_type ()))
23935         {
23936           /* The PC is never poped directly, instead
23937              it is popped into r3 and then BX is used.  */
23938           fprintf (f, "}\n");
23939
23940           thumb_exit (f, -1);
23941
23942           return;
23943         }
23944       else
23945         {
23946           if (mask & 0xFF)
23947             fprintf (f, ", ");
23948
23949           asm_fprintf (f, "%r", PC_REGNUM);
23950         }
23951     }
23952
23953   fprintf (f, "}\n");
23954 }
23955
23956 /* Generate code to return from a thumb function.
23957    If 'reg_containing_return_addr' is -1, then the return address is
23958    actually on the stack, at the stack pointer.
23959
23960    Note: do not forget to update length attribute of corresponding insn pattern
23961    when changing assembly output (eg. length attribute of epilogue_insns when
23962    updating Armv8-M Baseline Security Extensions register clearing
23963    sequences).  */
23964 static void
23965 thumb_exit (FILE *f, int reg_containing_return_addr)
23966 {
23967   unsigned regs_available_for_popping;
23968   unsigned regs_to_pop;
23969   int pops_needed;
23970   unsigned available;
23971   unsigned required;
23972   machine_mode mode;
23973   int size;
23974   int restore_a4 = FALSE;
23975
23976   /* Compute the registers we need to pop.  */
23977   regs_to_pop = 0;
23978   pops_needed = 0;
23979
23980   if (reg_containing_return_addr == -1)
23981     {
23982       regs_to_pop |= 1 << LR_REGNUM;
23983       ++pops_needed;
23984     }
23985
23986   if (TARGET_BACKTRACE)
23987     {
23988       /* Restore the (ARM) frame pointer and stack pointer.  */
23989       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23990       pops_needed += 2;
23991     }
23992
23993   /* If there is nothing to pop then just emit the BX instruction and
23994      return.  */
23995   if (pops_needed == 0)
23996     {
23997       if (crtl->calls_eh_return)
23998         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23999
24000       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24001         {
24002           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24003                        reg_containing_return_addr);
24004           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24005         }
24006       else
24007         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24008       return;
24009     }
24010   /* Otherwise if we are not supporting interworking and we have not created
24011      a backtrace structure and the function was not entered in ARM mode then
24012      just pop the return address straight into the PC.  */
24013   else if (!TARGET_INTERWORK
24014            && !TARGET_BACKTRACE
24015            && !is_called_in_ARM_mode (current_function_decl)
24016            && !crtl->calls_eh_return
24017            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24018     {
24019       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24020       return;
24021     }
24022
24023   /* Find out how many of the (return) argument registers we can corrupt.  */
24024   regs_available_for_popping = 0;
24025
24026   /* If returning via __builtin_eh_return, the bottom three registers
24027      all contain information needed for the return.  */
24028   if (crtl->calls_eh_return)
24029     size = 12;
24030   else
24031     {
24032       /* If we can deduce the registers used from the function's
24033          return value.  This is more reliable that examining
24034          df_regs_ever_live_p () because that will be set if the register is
24035          ever used in the function, not just if the register is used
24036          to hold a return value.  */
24037
24038       if (crtl->return_rtx != 0)
24039         mode = GET_MODE (crtl->return_rtx);
24040       else
24041         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24042
24043       size = GET_MODE_SIZE (mode);
24044
24045       if (size == 0)
24046         {
24047           /* In a void function we can use any argument register.
24048              In a function that returns a structure on the stack
24049              we can use the second and third argument registers.  */
24050           if (mode == VOIDmode)
24051             regs_available_for_popping =
24052               (1 << ARG_REGISTER (1))
24053               | (1 << ARG_REGISTER (2))
24054               | (1 << ARG_REGISTER (3));
24055           else
24056             regs_available_for_popping =
24057               (1 << ARG_REGISTER (2))
24058               | (1 << ARG_REGISTER (3));
24059         }
24060       else if (size <= 4)
24061         regs_available_for_popping =
24062           (1 << ARG_REGISTER (2))
24063           | (1 << ARG_REGISTER (3));
24064       else if (size <= 8)
24065         regs_available_for_popping =
24066           (1 << ARG_REGISTER (3));
24067     }
24068
24069   /* Match registers to be popped with registers into which we pop them.  */
24070   for (available = regs_available_for_popping,
24071        required  = regs_to_pop;
24072        required != 0 && available != 0;
24073        available &= ~(available & - available),
24074        required  &= ~(required  & - required))
24075     -- pops_needed;
24076
24077   /* If we have any popping registers left over, remove them.  */
24078   if (available > 0)
24079     regs_available_for_popping &= ~available;
24080
24081   /* Otherwise if we need another popping register we can use
24082      the fourth argument register.  */
24083   else if (pops_needed)
24084     {
24085       /* If we have not found any free argument registers and
24086          reg a4 contains the return address, we must move it.  */
24087       if (regs_available_for_popping == 0
24088           && reg_containing_return_addr == LAST_ARG_REGNUM)
24089         {
24090           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24091           reg_containing_return_addr = LR_REGNUM;
24092         }
24093       else if (size > 12)
24094         {
24095           /* Register a4 is being used to hold part of the return value,
24096              but we have dire need of a free, low register.  */
24097           restore_a4 = TRUE;
24098
24099           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24100         }
24101
24102       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24103         {
24104           /* The fourth argument register is available.  */
24105           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24106
24107           --pops_needed;
24108         }
24109     }
24110
24111   /* Pop as many registers as we can.  */
24112   thumb_pop (f, regs_available_for_popping);
24113
24114   /* Process the registers we popped.  */
24115   if (reg_containing_return_addr == -1)
24116     {
24117       /* The return address was popped into the lowest numbered register.  */
24118       regs_to_pop &= ~(1 << LR_REGNUM);
24119
24120       reg_containing_return_addr =
24121         number_of_first_bit_set (regs_available_for_popping);
24122
24123       /* Remove this register for the mask of available registers, so that
24124          the return address will not be corrupted by further pops.  */
24125       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24126     }
24127
24128   /* If we popped other registers then handle them here.  */
24129   if (regs_available_for_popping)
24130     {
24131       int frame_pointer;
24132
24133       /* Work out which register currently contains the frame pointer.  */
24134       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24135
24136       /* Move it into the correct place.  */
24137       asm_fprintf (f, "\tmov\t%r, %r\n",
24138                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24139
24140       /* (Temporarily) remove it from the mask of popped registers.  */
24141       regs_available_for_popping &= ~(1 << frame_pointer);
24142       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24143
24144       if (regs_available_for_popping)
24145         {
24146           int stack_pointer;
24147
24148           /* We popped the stack pointer as well,
24149              find the register that contains it.  */
24150           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24151
24152           /* Move it into the stack register.  */
24153           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24154
24155           /* At this point we have popped all necessary registers, so
24156              do not worry about restoring regs_available_for_popping
24157              to its correct value:
24158
24159              assert (pops_needed == 0)
24160              assert (regs_available_for_popping == (1 << frame_pointer))
24161              assert (regs_to_pop == (1 << STACK_POINTER))  */
24162         }
24163       else
24164         {
24165           /* Since we have just move the popped value into the frame
24166              pointer, the popping register is available for reuse, and
24167              we know that we still have the stack pointer left to pop.  */
24168           regs_available_for_popping |= (1 << frame_pointer);
24169         }
24170     }
24171
24172   /* If we still have registers left on the stack, but we no longer have
24173      any registers into which we can pop them, then we must move the return
24174      address into the link register and make available the register that
24175      contained it.  */
24176   if (regs_available_for_popping == 0 && pops_needed > 0)
24177     {
24178       regs_available_for_popping |= 1 << reg_containing_return_addr;
24179
24180       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24181                    reg_containing_return_addr);
24182
24183       reg_containing_return_addr = LR_REGNUM;
24184     }
24185
24186   /* If we have registers left on the stack then pop some more.
24187      We know that at most we will want to pop FP and SP.  */
24188   if (pops_needed > 0)
24189     {
24190       int  popped_into;
24191       int  move_to;
24192
24193       thumb_pop (f, regs_available_for_popping);
24194
24195       /* We have popped either FP or SP.
24196          Move whichever one it is into the correct register.  */
24197       popped_into = number_of_first_bit_set (regs_available_for_popping);
24198       move_to     = number_of_first_bit_set (regs_to_pop);
24199
24200       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24201       --pops_needed;
24202     }
24203
24204   /* If we still have not popped everything then we must have only
24205      had one register available to us and we are now popping the SP.  */
24206   if (pops_needed > 0)
24207     {
24208       int  popped_into;
24209
24210       thumb_pop (f, regs_available_for_popping);
24211
24212       popped_into = number_of_first_bit_set (regs_available_for_popping);
24213
24214       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24215       /*
24216         assert (regs_to_pop == (1 << STACK_POINTER))
24217         assert (pops_needed == 1)
24218       */
24219     }
24220
24221   /* If necessary restore the a4 register.  */
24222   if (restore_a4)
24223     {
24224       if (reg_containing_return_addr != LR_REGNUM)
24225         {
24226           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24227           reg_containing_return_addr = LR_REGNUM;
24228         }
24229
24230       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24231     }
24232
24233   if (crtl->calls_eh_return)
24234     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24235
24236   /* Return to caller.  */
24237   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24238     {
24239       /* This is for the cases where LR is not being used to contain the return
24240          address.  It may therefore contain information that we might not want
24241          to leak, hence it must be cleared.  The value in R0 will never be a
24242          secret at this point, so it is safe to use it, see the clearing code
24243          in 'cmse_nonsecure_entry_clear_before_return'.  */
24244       if (reg_containing_return_addr != LR_REGNUM)
24245         asm_fprintf (f, "\tmov\tlr, r0\n");
24246
24247       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24248       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24249     }
24250   else
24251     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24252 }
24253 \f
24254 /* Scan INSN just before assembler is output for it.
24255    For Thumb-1, we track the status of the condition codes; this
24256    information is used in the cbranchsi4_insn pattern.  */
24257 void
24258 thumb1_final_prescan_insn (rtx_insn *insn)
24259 {
24260   if (flag_print_asm_name)
24261     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24262                  INSN_ADDRESSES (INSN_UID (insn)));
24263   /* Don't overwrite the previous setter when we get to a cbranch.  */
24264   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24265     {
24266       enum attr_conds conds;
24267
24268       if (cfun->machine->thumb1_cc_insn)
24269         {
24270           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24271               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24272             CC_STATUS_INIT;
24273         }
24274       conds = get_attr_conds (insn);
24275       if (conds == CONDS_SET)
24276         {
24277           rtx set = single_set (insn);
24278           cfun->machine->thumb1_cc_insn = insn;
24279           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24280           cfun->machine->thumb1_cc_op1 = const0_rtx;
24281           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24282           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24283             {
24284               rtx src1 = XEXP (SET_SRC (set), 1);
24285               if (src1 == const0_rtx)
24286                 cfun->machine->thumb1_cc_mode = CCmode;
24287             }
24288           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24289             {
24290               /* Record the src register operand instead of dest because
24291                  cprop_hardreg pass propagates src.  */
24292               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24293             }
24294         }
24295       else if (conds != CONDS_NOCOND)
24296         cfun->machine->thumb1_cc_insn = NULL_RTX;
24297     }
24298
24299     /* Check if unexpected far jump is used.  */
24300     if (cfun->machine->lr_save_eliminated
24301         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24302       internal_error("Unexpected thumb1 far jump");
24303 }
24304
24305 int
24306 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24307 {
24308   unsigned HOST_WIDE_INT mask = 0xff;
24309   int i;
24310
24311   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24312   if (val == 0) /* XXX */
24313     return 0;
24314
24315   for (i = 0; i < 25; i++)
24316     if ((val & (mask << i)) == val)
24317       return 1;
24318
24319   return 0;
24320 }
24321
24322 /* Returns nonzero if the current function contains,
24323    or might contain a far jump.  */
24324 static int
24325 thumb_far_jump_used_p (void)
24326 {
24327   rtx_insn *insn;
24328   bool far_jump = false;
24329   unsigned int func_size = 0;
24330
24331   /* If we have already decided that far jumps may be used,
24332      do not bother checking again, and always return true even if
24333      it turns out that they are not being used.  Once we have made
24334      the decision that far jumps are present (and that hence the link
24335      register will be pushed onto the stack) we cannot go back on it.  */
24336   if (cfun->machine->far_jump_used)
24337     return 1;
24338
24339   /* If this function is not being called from the prologue/epilogue
24340      generation code then it must be being called from the
24341      INITIAL_ELIMINATION_OFFSET macro.  */
24342   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24343     {
24344       /* In this case we know that we are being asked about the elimination
24345          of the arg pointer register.  If that register is not being used,
24346          then there are no arguments on the stack, and we do not have to
24347          worry that a far jump might force the prologue to push the link
24348          register, changing the stack offsets.  In this case we can just
24349          return false, since the presence of far jumps in the function will
24350          not affect stack offsets.
24351
24352          If the arg pointer is live (or if it was live, but has now been
24353          eliminated and so set to dead) then we do have to test to see if
24354          the function might contain a far jump.  This test can lead to some
24355          false negatives, since before reload is completed, then length of
24356          branch instructions is not known, so gcc defaults to returning their
24357          longest length, which in turn sets the far jump attribute to true.
24358
24359          A false negative will not result in bad code being generated, but it
24360          will result in a needless push and pop of the link register.  We
24361          hope that this does not occur too often.
24362
24363          If we need doubleword stack alignment this could affect the other
24364          elimination offsets so we can't risk getting it wrong.  */
24365       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24366         cfun->machine->arg_pointer_live = 1;
24367       else if (!cfun->machine->arg_pointer_live)
24368         return 0;
24369     }
24370
24371   /* We should not change far_jump_used during or after reload, as there is
24372      no chance to change stack frame layout.  */
24373   if (reload_in_progress || reload_completed)
24374     return 0;
24375
24376   /* Check to see if the function contains a branch
24377      insn with the far jump attribute set.  */
24378   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24379     {
24380       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24381         {
24382           far_jump = true;
24383         }
24384       func_size += get_attr_length (insn);
24385     }
24386
24387   /* Attribute far_jump will always be true for thumb1 before
24388      shorten_branch pass.  So checking far_jump attribute before
24389      shorten_branch isn't much useful.
24390
24391      Following heuristic tries to estimate more accurately if a far jump
24392      may finally be used.  The heuristic is very conservative as there is
24393      no chance to roll-back the decision of not to use far jump.
24394
24395      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24396      2-byte insn is associated with a 4 byte constant pool.  Using
24397      function size 2048/3 as the threshold is conservative enough.  */
24398   if (far_jump)
24399     {
24400       if ((func_size * 3) >= 2048)
24401         {
24402           /* Record the fact that we have decided that
24403              the function does use far jumps.  */
24404           cfun->machine->far_jump_used = 1;
24405           return 1;
24406         }
24407     }
24408
24409   return 0;
24410 }
24411
24412 /* Return nonzero if FUNC must be entered in ARM mode.  */
24413 static bool
24414 is_called_in_ARM_mode (tree func)
24415 {
24416   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24417
24418   /* Ignore the problem about functions whose address is taken.  */
24419   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24420     return true;
24421
24422 #ifdef ARM_PE
24423   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24424 #else
24425   return false;
24426 #endif
24427 }
24428
24429 /* Given the stack offsets and register mask in OFFSETS, decide how
24430    many additional registers to push instead of subtracting a constant
24431    from SP.  For epilogues the principle is the same except we use pop.
24432    FOR_PROLOGUE indicates which we're generating.  */
24433 static int
24434 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24435 {
24436   HOST_WIDE_INT amount;
24437   unsigned long live_regs_mask = offsets->saved_regs_mask;
24438   /* Extract a mask of the ones we can give to the Thumb's push/pop
24439      instruction.  */
24440   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24441   /* Then count how many other high registers will need to be pushed.  */
24442   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24443   int n_free, reg_base, size;
24444
24445   if (!for_prologue && frame_pointer_needed)
24446     amount = offsets->locals_base - offsets->saved_regs;
24447   else
24448     amount = offsets->outgoing_args - offsets->saved_regs;
24449
24450   /* If the stack frame size is 512 exactly, we can save one load
24451      instruction, which should make this a win even when optimizing
24452      for speed.  */
24453   if (!optimize_size && amount != 512)
24454     return 0;
24455
24456   /* Can't do this if there are high registers to push.  */
24457   if (high_regs_pushed != 0)
24458     return 0;
24459
24460   /* Shouldn't do it in the prologue if no registers would normally
24461      be pushed at all.  In the epilogue, also allow it if we'll have
24462      a pop insn for the PC.  */
24463   if  (l_mask == 0
24464        && (for_prologue
24465            || TARGET_BACKTRACE
24466            || (live_regs_mask & 1 << LR_REGNUM) == 0
24467            || TARGET_INTERWORK
24468            || crtl->args.pretend_args_size != 0))
24469     return 0;
24470
24471   /* Don't do this if thumb_expand_prologue wants to emit instructions
24472      between the push and the stack frame allocation.  */
24473   if (for_prologue
24474       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24475           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24476     return 0;
24477
24478   reg_base = 0;
24479   n_free = 0;
24480   if (!for_prologue)
24481     {
24482       size = arm_size_return_regs ();
24483       reg_base = ARM_NUM_INTS (size);
24484       live_regs_mask >>= reg_base;
24485     }
24486
24487   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24488          && (for_prologue || call_used_regs[reg_base + n_free]))
24489     {
24490       live_regs_mask >>= 1;
24491       n_free++;
24492     }
24493
24494   if (n_free == 0)
24495     return 0;
24496   gcc_assert (amount / 4 * 4 == amount);
24497
24498   if (amount >= 512 && (amount - n_free * 4) < 512)
24499     return (amount - 508) / 4;
24500   if (amount <= n_free * 4)
24501     return amount / 4;
24502   return 0;
24503 }
24504
24505 /* The bits which aren't usefully expanded as rtl.  */
24506 const char *
24507 thumb1_unexpanded_epilogue (void)
24508 {
24509   arm_stack_offsets *offsets;
24510   int regno;
24511   unsigned long live_regs_mask = 0;
24512   int high_regs_pushed = 0;
24513   int extra_pop;
24514   int had_to_push_lr;
24515   int size;
24516
24517   if (cfun->machine->return_used_this_function != 0)
24518     return "";
24519
24520   if (IS_NAKED (arm_current_func_type ()))
24521     return "";
24522
24523   offsets = arm_get_frame_offsets ();
24524   live_regs_mask = offsets->saved_regs_mask;
24525   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24526
24527   /* If we can deduce the registers used from the function's return value.
24528      This is more reliable that examining df_regs_ever_live_p () because that
24529      will be set if the register is ever used in the function, not just if
24530      the register is used to hold a return value.  */
24531   size = arm_size_return_regs ();
24532
24533   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24534   if (extra_pop > 0)
24535     {
24536       unsigned long extra_mask = (1 << extra_pop) - 1;
24537       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24538     }
24539
24540   /* The prolog may have pushed some high registers to use as
24541      work registers.  e.g. the testsuite file:
24542      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24543      compiles to produce:
24544         push    {r4, r5, r6, r7, lr}
24545         mov     r7, r9
24546         mov     r6, r8
24547         push    {r6, r7}
24548      as part of the prolog.  We have to undo that pushing here.  */
24549
24550   if (high_regs_pushed)
24551     {
24552       unsigned long mask = live_regs_mask & 0xff;
24553       int next_hi_reg;
24554
24555       /* The available low registers depend on the size of the value we are
24556          returning.  */
24557       if (size <= 12)
24558         mask |=  1 << 3;
24559       if (size <= 8)
24560         mask |= 1 << 2;
24561
24562       if (mask == 0)
24563         /* Oh dear!  We have no low registers into which we can pop
24564            high registers!  */
24565         internal_error
24566           ("no low registers available for popping high registers");
24567
24568       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24569         if (live_regs_mask & (1 << next_hi_reg))
24570           break;
24571
24572       while (high_regs_pushed)
24573         {
24574           /* Find lo register(s) into which the high register(s) can
24575              be popped.  */
24576           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24577             {
24578               if (mask & (1 << regno))
24579                 high_regs_pushed--;
24580               if (high_regs_pushed == 0)
24581                 break;
24582             }
24583
24584           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24585
24586           /* Pop the values into the low register(s).  */
24587           thumb_pop (asm_out_file, mask);
24588
24589           /* Move the value(s) into the high registers.  */
24590           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24591             {
24592               if (mask & (1 << regno))
24593                 {
24594                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24595                                regno);
24596
24597                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24598                     if (live_regs_mask & (1 << next_hi_reg))
24599                       break;
24600                 }
24601             }
24602         }
24603       live_regs_mask &= ~0x0f00;
24604     }
24605
24606   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24607   live_regs_mask &= 0xff;
24608
24609   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24610     {
24611       /* Pop the return address into the PC.  */
24612       if (had_to_push_lr)
24613         live_regs_mask |= 1 << PC_REGNUM;
24614
24615       /* Either no argument registers were pushed or a backtrace
24616          structure was created which includes an adjusted stack
24617          pointer, so just pop everything.  */
24618       if (live_regs_mask)
24619         thumb_pop (asm_out_file, live_regs_mask);
24620
24621       /* We have either just popped the return address into the
24622          PC or it is was kept in LR for the entire function.
24623          Note that thumb_pop has already called thumb_exit if the
24624          PC was in the list.  */
24625       if (!had_to_push_lr)
24626         thumb_exit (asm_out_file, LR_REGNUM);
24627     }
24628   else
24629     {
24630       /* Pop everything but the return address.  */
24631       if (live_regs_mask)
24632         thumb_pop (asm_out_file, live_regs_mask);
24633
24634       if (had_to_push_lr)
24635         {
24636           if (size > 12)
24637             {
24638               /* We have no free low regs, so save one.  */
24639               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24640                            LAST_ARG_REGNUM);
24641             }
24642
24643           /* Get the return address into a temporary register.  */
24644           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24645
24646           if (size > 12)
24647             {
24648               /* Move the return address to lr.  */
24649               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24650                            LAST_ARG_REGNUM);
24651               /* Restore the low register.  */
24652               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24653                            IP_REGNUM);
24654               regno = LR_REGNUM;
24655             }
24656           else
24657             regno = LAST_ARG_REGNUM;
24658         }
24659       else
24660         regno = LR_REGNUM;
24661
24662       /* Remove the argument registers that were pushed onto the stack.  */
24663       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24664                    SP_REGNUM, SP_REGNUM,
24665                    crtl->args.pretend_args_size);
24666
24667       thumb_exit (asm_out_file, regno);
24668     }
24669
24670   return "";
24671 }
24672
24673 /* Functions to save and restore machine-specific function data.  */
24674 static struct machine_function *
24675 arm_init_machine_status (void)
24676 {
24677   struct machine_function *machine;
24678   machine = ggc_cleared_alloc<machine_function> ();
24679
24680 #if ARM_FT_UNKNOWN != 0
24681   machine->func_type = ARM_FT_UNKNOWN;
24682 #endif
24683   return machine;
24684 }
24685
24686 /* Return an RTX indicating where the return address to the
24687    calling function can be found.  */
24688 rtx
24689 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24690 {
24691   if (count != 0)
24692     return NULL_RTX;
24693
24694   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24695 }
24696
24697 /* Do anything needed before RTL is emitted for each function.  */
24698 void
24699 arm_init_expanders (void)
24700 {
24701   /* Arrange to initialize and mark the machine per-function status.  */
24702   init_machine_status = arm_init_machine_status;
24703
24704   /* This is to stop the combine pass optimizing away the alignment
24705      adjustment of va_arg.  */
24706   /* ??? It is claimed that this should not be necessary.  */
24707   if (cfun)
24708     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24709 }
24710
24711 /* Check that FUNC is called with a different mode.  */
24712
24713 bool
24714 arm_change_mode_p (tree func)
24715 {
24716   if (TREE_CODE (func) != FUNCTION_DECL)
24717     return false;
24718
24719   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24720
24721   if (!callee_tree)
24722     callee_tree = target_option_default_node;
24723
24724   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24725   int flags = callee_opts->x_target_flags;
24726
24727   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24728 }
24729
24730 /* Like arm_compute_initial_elimination offset.  Simpler because there
24731    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24732    to point at the base of the local variables after static stack
24733    space for a function has been allocated.  */
24734
24735 HOST_WIDE_INT
24736 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24737 {
24738   arm_stack_offsets *offsets;
24739
24740   offsets = arm_get_frame_offsets ();
24741
24742   switch (from)
24743     {
24744     case ARG_POINTER_REGNUM:
24745       switch (to)
24746         {
24747         case STACK_POINTER_REGNUM:
24748           return offsets->outgoing_args - offsets->saved_args;
24749
24750         case FRAME_POINTER_REGNUM:
24751           return offsets->soft_frame - offsets->saved_args;
24752
24753         case ARM_HARD_FRAME_POINTER_REGNUM:
24754           return offsets->saved_regs - offsets->saved_args;
24755
24756         case THUMB_HARD_FRAME_POINTER_REGNUM:
24757           return offsets->locals_base - offsets->saved_args;
24758
24759         default:
24760           gcc_unreachable ();
24761         }
24762       break;
24763
24764     case FRAME_POINTER_REGNUM:
24765       switch (to)
24766         {
24767         case STACK_POINTER_REGNUM:
24768           return offsets->outgoing_args - offsets->soft_frame;
24769
24770         case ARM_HARD_FRAME_POINTER_REGNUM:
24771           return offsets->saved_regs - offsets->soft_frame;
24772
24773         case THUMB_HARD_FRAME_POINTER_REGNUM:
24774           return offsets->locals_base - offsets->soft_frame;
24775
24776         default:
24777           gcc_unreachable ();
24778         }
24779       break;
24780
24781     default:
24782       gcc_unreachable ();
24783     }
24784 }
24785
24786 /* Generate the function's prologue.  */
24787
24788 void
24789 thumb1_expand_prologue (void)
24790 {
24791   rtx_insn *insn;
24792
24793   HOST_WIDE_INT amount;
24794   HOST_WIDE_INT size;
24795   arm_stack_offsets *offsets;
24796   unsigned long func_type;
24797   int regno;
24798   unsigned long live_regs_mask;
24799   unsigned long l_mask;
24800   unsigned high_regs_pushed = 0;
24801   bool lr_needs_saving;
24802
24803   func_type = arm_current_func_type ();
24804
24805   /* Naked functions don't have prologues.  */
24806   if (IS_NAKED (func_type))
24807     {
24808       if (flag_stack_usage_info)
24809         current_function_static_stack_size = 0;
24810       return;
24811     }
24812
24813   if (IS_INTERRUPT (func_type))
24814     {
24815       error ("interrupt Service Routines cannot be coded in Thumb mode");
24816       return;
24817     }
24818
24819   if (is_called_in_ARM_mode (current_function_decl))
24820     emit_insn (gen_prologue_thumb1_interwork ());
24821
24822   offsets = arm_get_frame_offsets ();
24823   live_regs_mask = offsets->saved_regs_mask;
24824   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24825
24826   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24827   l_mask = live_regs_mask & 0x40ff;
24828   /* Then count how many other high registers will need to be pushed.  */
24829   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24830
24831   if (crtl->args.pretend_args_size)
24832     {
24833       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24834
24835       if (cfun->machine->uses_anonymous_args)
24836         {
24837           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24838           unsigned long mask;
24839
24840           mask = 1ul << (LAST_ARG_REGNUM + 1);
24841           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24842
24843           insn = thumb1_emit_multi_reg_push (mask, 0);
24844         }
24845       else
24846         {
24847           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24848                                         stack_pointer_rtx, x));
24849         }
24850       RTX_FRAME_RELATED_P (insn) = 1;
24851     }
24852
24853   if (TARGET_BACKTRACE)
24854     {
24855       HOST_WIDE_INT offset = 0;
24856       unsigned work_register;
24857       rtx work_reg, x, arm_hfp_rtx;
24858
24859       /* We have been asked to create a stack backtrace structure.
24860          The code looks like this:
24861
24862          0   .align 2
24863          0   func:
24864          0     sub   SP, #16         Reserve space for 4 registers.
24865          2     push  {R7}            Push low registers.
24866          4     add   R7, SP, #20     Get the stack pointer before the push.
24867          6     str   R7, [SP, #8]    Store the stack pointer
24868                                         (before reserving the space).
24869          8     mov   R7, PC          Get hold of the start of this code + 12.
24870         10     str   R7, [SP, #16]   Store it.
24871         12     mov   R7, FP          Get hold of the current frame pointer.
24872         14     str   R7, [SP, #4]    Store it.
24873         16     mov   R7, LR          Get hold of the current return address.
24874         18     str   R7, [SP, #12]   Store it.
24875         20     add   R7, SP, #16     Point at the start of the
24876                                         backtrace structure.
24877         22     mov   FP, R7          Put this value into the frame pointer.  */
24878
24879       work_register = thumb_find_work_register (live_regs_mask);
24880       work_reg = gen_rtx_REG (SImode, work_register);
24881       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24882
24883       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24884                                     stack_pointer_rtx, GEN_INT (-16)));
24885       RTX_FRAME_RELATED_P (insn) = 1;
24886
24887       if (l_mask)
24888         {
24889           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24890           RTX_FRAME_RELATED_P (insn) = 1;
24891           lr_needs_saving = false;
24892
24893           offset = bit_count (l_mask) * UNITS_PER_WORD;
24894         }
24895
24896       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24897       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24898
24899       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24900       x = gen_frame_mem (SImode, x);
24901       emit_move_insn (x, work_reg);
24902
24903       /* Make sure that the instruction fetching the PC is in the right place
24904          to calculate "start of backtrace creation code + 12".  */
24905       /* ??? The stores using the common WORK_REG ought to be enough to
24906          prevent the scheduler from doing anything weird.  Failing that
24907          we could always move all of the following into an UNSPEC_VOLATILE.  */
24908       if (l_mask)
24909         {
24910           x = gen_rtx_REG (SImode, PC_REGNUM);
24911           emit_move_insn (work_reg, x);
24912
24913           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24914           x = gen_frame_mem (SImode, x);
24915           emit_move_insn (x, work_reg);
24916
24917           emit_move_insn (work_reg, arm_hfp_rtx);
24918
24919           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24920           x = gen_frame_mem (SImode, x);
24921           emit_move_insn (x, work_reg);
24922         }
24923       else
24924         {
24925           emit_move_insn (work_reg, arm_hfp_rtx);
24926
24927           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24928           x = gen_frame_mem (SImode, x);
24929           emit_move_insn (x, work_reg);
24930
24931           x = gen_rtx_REG (SImode, PC_REGNUM);
24932           emit_move_insn (work_reg, x);
24933
24934           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24935           x = gen_frame_mem (SImode, x);
24936           emit_move_insn (x, work_reg);
24937         }
24938
24939       x = gen_rtx_REG (SImode, LR_REGNUM);
24940       emit_move_insn (work_reg, x);
24941
24942       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24943       x = gen_frame_mem (SImode, x);
24944       emit_move_insn (x, work_reg);
24945
24946       x = GEN_INT (offset + 12);
24947       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24948
24949       emit_move_insn (arm_hfp_rtx, work_reg);
24950     }
24951   /* Optimization:  If we are not pushing any low registers but we are going
24952      to push some high registers then delay our first push.  This will just
24953      be a push of LR and we can combine it with the push of the first high
24954      register.  */
24955   else if ((l_mask & 0xff) != 0
24956            || (high_regs_pushed == 0 && lr_needs_saving))
24957     {
24958       unsigned long mask = l_mask;
24959       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24960       insn = thumb1_emit_multi_reg_push (mask, mask);
24961       RTX_FRAME_RELATED_P (insn) = 1;
24962       lr_needs_saving = false;
24963     }
24964
24965   if (high_regs_pushed)
24966     {
24967       unsigned pushable_regs;
24968       unsigned next_hi_reg;
24969       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24970                                                  : crtl->args.info.nregs;
24971       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24972
24973       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24974         if (live_regs_mask & (1 << next_hi_reg))
24975           break;
24976
24977       /* Here we need to mask out registers used for passing arguments
24978          even if they can be pushed.  This is to avoid using them to stash the high
24979          registers.  Such kind of stash may clobber the use of arguments.  */
24980       pushable_regs = l_mask & (~arg_regs_mask);
24981       if (lr_needs_saving)
24982         pushable_regs &= ~(1 << LR_REGNUM);
24983
24984       if (pushable_regs == 0)
24985         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24986
24987       while (high_regs_pushed > 0)
24988         {
24989           unsigned long real_regs_mask = 0;
24990           unsigned long push_mask = 0;
24991
24992           for (regno = LR_REGNUM; regno >= 0; regno --)
24993             {
24994               if (pushable_regs & (1 << regno))
24995                 {
24996                   emit_move_insn (gen_rtx_REG (SImode, regno),
24997                                   gen_rtx_REG (SImode, next_hi_reg));
24998
24999                   high_regs_pushed --;
25000                   real_regs_mask |= (1 << next_hi_reg);
25001                   push_mask |= (1 << regno);
25002
25003                   if (high_regs_pushed)
25004                     {
25005                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25006                            next_hi_reg --)
25007                         if (live_regs_mask & (1 << next_hi_reg))
25008                           break;
25009                     }
25010                   else
25011                     break;
25012                 }
25013             }
25014
25015           /* If we had to find a work register and we have not yet
25016              saved the LR then add it to the list of regs to push.  */
25017           if (lr_needs_saving)
25018             {
25019               push_mask |= 1 << LR_REGNUM;
25020               real_regs_mask |= 1 << LR_REGNUM;
25021               lr_needs_saving = false;
25022             }
25023
25024           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25025           RTX_FRAME_RELATED_P (insn) = 1;
25026         }
25027     }
25028
25029   /* Load the pic register before setting the frame pointer,
25030      so we can use r7 as a temporary work register.  */
25031   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25032     arm_load_pic_register (live_regs_mask);
25033
25034   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25035     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25036                     stack_pointer_rtx);
25037
25038   size = offsets->outgoing_args - offsets->saved_args;
25039   if (flag_stack_usage_info)
25040     current_function_static_stack_size = size;
25041
25042   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25043   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25044        || flag_stack_clash_protection)
25045       && size)
25046     sorry ("-fstack-check=specific for Thumb-1");
25047
25048   amount = offsets->outgoing_args - offsets->saved_regs;
25049   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25050   if (amount)
25051     {
25052       if (amount < 512)
25053         {
25054           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25055                                         GEN_INT (- amount)));
25056           RTX_FRAME_RELATED_P (insn) = 1;
25057         }
25058       else
25059         {
25060           rtx reg, dwarf;
25061
25062           /* The stack decrement is too big for an immediate value in a single
25063              insn.  In theory we could issue multiple subtracts, but after
25064              three of them it becomes more space efficient to place the full
25065              value in the constant pool and load into a register.  (Also the
25066              ARM debugger really likes to see only one stack decrement per
25067              function).  So instead we look for a scratch register into which
25068              we can load the decrement, and then we subtract this from the
25069              stack pointer.  Unfortunately on the thumb the only available
25070              scratch registers are the argument registers, and we cannot use
25071              these as they may hold arguments to the function.  Instead we
25072              attempt to locate a call preserved register which is used by this
25073              function.  If we can find one, then we know that it will have
25074              been pushed at the start of the prologue and so we can corrupt
25075              it now.  */
25076           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25077             if (live_regs_mask & (1 << regno))
25078               break;
25079
25080           gcc_assert(regno <= LAST_LO_REGNUM);
25081
25082           reg = gen_rtx_REG (SImode, regno);
25083
25084           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25085
25086           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25087                                         stack_pointer_rtx, reg));
25088
25089           dwarf = gen_rtx_SET (stack_pointer_rtx,
25090                                plus_constant (Pmode, stack_pointer_rtx,
25091                                               -amount));
25092           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25093           RTX_FRAME_RELATED_P (insn) = 1;
25094         }
25095     }
25096
25097   if (frame_pointer_needed)
25098     thumb_set_frame_pointer (offsets);
25099
25100   /* If we are profiling, make sure no instructions are scheduled before
25101      the call to mcount.  Similarly if the user has requested no
25102      scheduling in the prolog.  Similarly if we want non-call exceptions
25103      using the EABI unwinder, to prevent faulting instructions from being
25104      swapped with a stack adjustment.  */
25105   if (crtl->profile || !TARGET_SCHED_PROLOG
25106       || (arm_except_unwind_info (&global_options) == UI_TARGET
25107           && cfun->can_throw_non_call_exceptions))
25108     emit_insn (gen_blockage ());
25109
25110   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25111   if (live_regs_mask & 0xff)
25112     cfun->machine->lr_save_eliminated = 0;
25113 }
25114
25115 /* Clear caller saved registers not used to pass return values and leaked
25116    condition flags before exiting a cmse_nonsecure_entry function.  */
25117
25118 void
25119 cmse_nonsecure_entry_clear_before_return (void)
25120 {
25121   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25122   uint32_t padding_bits_to_clear = 0;
25123   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25124   auto_sbitmap to_clear_bitmap (maxregno + 1);
25125   tree result_type;
25126   rtx result_rtl;
25127
25128   bitmap_clear (to_clear_bitmap);
25129   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25130   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25131
25132   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25133      registers.  */
25134   if (TARGET_HARD_FLOAT)
25135     {
25136       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25137
25138       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25139
25140       /* Make sure we don't clear the two scratch registers used to clear the
25141          relevant FPSCR bits in output_return_instruction.  */
25142       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25143       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25144       emit_use (gen_rtx_REG (SImode, 4));
25145       bitmap_clear_bit (to_clear_bitmap, 4);
25146     }
25147
25148   /* If the user has defined registers to be caller saved, these are no longer
25149      restored by the function before returning and must thus be cleared for
25150      security purposes.  */
25151   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25152     {
25153       /* We do not touch registers that can be used to pass arguments as per
25154          the AAPCS, since these should never be made callee-saved by user
25155          options.  */
25156       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25157         continue;
25158       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25159         continue;
25160       if (call_used_regs[regno])
25161         bitmap_set_bit (to_clear_bitmap, regno);
25162     }
25163
25164   /* Make sure we do not clear the registers used to return the result in.  */
25165   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25166   if (!VOID_TYPE_P (result_type))
25167     {
25168       uint64_t to_clear_return_mask;
25169       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25170
25171       /* No need to check that we return in registers, because we don't
25172          support returning on stack yet.  */
25173       gcc_assert (REG_P (result_rtl));
25174       to_clear_return_mask
25175         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25176                                      padding_bits_to_clear_ptr);
25177       if (to_clear_return_mask)
25178         {
25179           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25180           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25181             {
25182               if (to_clear_return_mask & (1ULL << regno))
25183                 bitmap_clear_bit (to_clear_bitmap, regno);
25184             }
25185         }
25186     }
25187
25188   if (padding_bits_to_clear != 0)
25189     {
25190       rtx reg_rtx;
25191       auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25192
25193       /* Padding bits to clear is not 0 so we know we are dealing with
25194          returning a composite type, which only uses r0.  Let's make sure that
25195          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25196       bitmap_clear (to_clear_arg_regs_bitmap);
25197       bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25198                         NUM_ARG_REGS - 1);
25199       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25200
25201       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25202
25203       /* Fill the lower half of the negated padding_bits_to_clear.  */
25204       emit_move_insn (reg_rtx,
25205                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25206
25207       /* Also fill the top half of the negated padding_bits_to_clear.  */
25208       if (((~padding_bits_to_clear) >> 16) > 0)
25209         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25210                                                       GEN_INT (16),
25211                                                       GEN_INT (16)),
25212                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25213
25214       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25215                            gen_rtx_REG (SImode, R0_REGNUM),
25216                            reg_rtx));
25217     }
25218
25219   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25220     {
25221       if (!bitmap_bit_p (to_clear_bitmap, regno))
25222         continue;
25223
25224       if (IS_VFP_REGNUM (regno))
25225         {
25226           /* If regno is an even vfp register and its successor is also to
25227              be cleared, use vmov.  */
25228           if (TARGET_VFP_DOUBLE
25229               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25230               && bitmap_bit_p (to_clear_bitmap, regno + 1))
25231             {
25232               emit_move_insn (gen_rtx_REG (DFmode, regno),
25233                               CONST1_RTX (DFmode));
25234               emit_use (gen_rtx_REG (DFmode, regno));
25235               regno++;
25236             }
25237           else
25238             {
25239               emit_move_insn (gen_rtx_REG (SFmode, regno),
25240                               CONST1_RTX (SFmode));
25241               emit_use (gen_rtx_REG (SFmode, regno));
25242             }
25243         }
25244       else
25245         {
25246           if (TARGET_THUMB1)
25247             {
25248               if (regno == R0_REGNUM)
25249                 emit_move_insn (gen_rtx_REG (SImode, regno),
25250                                 const0_rtx);
25251               else
25252                 /* R0 has either been cleared before, see code above, or it
25253                    holds a return value, either way it is not secret
25254                    information.  */
25255                 emit_move_insn (gen_rtx_REG (SImode, regno),
25256                                 gen_rtx_REG (SImode, R0_REGNUM));
25257               emit_use (gen_rtx_REG (SImode, regno));
25258             }
25259           else
25260             {
25261               emit_move_insn (gen_rtx_REG (SImode, regno),
25262                               gen_rtx_REG (SImode, LR_REGNUM));
25263               emit_use (gen_rtx_REG (SImode, regno));
25264             }
25265         }
25266     }
25267 }
25268
25269 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25270    POP instruction can be generated.  LR should be replaced by PC.  All
25271    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25272    all we really need to check here is if single register is to be
25273    returned, or multiple register return.  */
25274 void
25275 thumb2_expand_return (bool simple_return)
25276 {
25277   int i, num_regs;
25278   unsigned long saved_regs_mask;
25279   arm_stack_offsets *offsets;
25280
25281   offsets = arm_get_frame_offsets ();
25282   saved_regs_mask = offsets->saved_regs_mask;
25283
25284   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25285     if (saved_regs_mask & (1 << i))
25286       num_regs++;
25287
25288   if (!simple_return && saved_regs_mask)
25289     {
25290       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25291          functions or adapt code to handle according to ACLE.  This path should
25292          not be reachable for cmse_nonsecure_entry functions though we prefer
25293          to assert it for now to ensure that future code changes do not silently
25294          change this behavior.  */
25295       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25296       if (num_regs == 1)
25297         {
25298           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25299           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25300           rtx addr = gen_rtx_MEM (SImode,
25301                                   gen_rtx_POST_INC (SImode,
25302                                                     stack_pointer_rtx));
25303           set_mem_alias_set (addr, get_frame_alias_set ());
25304           XVECEXP (par, 0, 0) = ret_rtx;
25305           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25306           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25307           emit_jump_insn (par);
25308         }
25309       else
25310         {
25311           saved_regs_mask &= ~ (1 << LR_REGNUM);
25312           saved_regs_mask |=   (1 << PC_REGNUM);
25313           arm_emit_multi_reg_pop (saved_regs_mask);
25314         }
25315     }
25316   else
25317     {
25318       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25319         cmse_nonsecure_entry_clear_before_return ();
25320       emit_jump_insn (simple_return_rtx);
25321     }
25322 }
25323
25324 void
25325 thumb1_expand_epilogue (void)
25326 {
25327   HOST_WIDE_INT amount;
25328   arm_stack_offsets *offsets;
25329   int regno;
25330
25331   /* Naked functions don't have prologues.  */
25332   if (IS_NAKED (arm_current_func_type ()))
25333     return;
25334
25335   offsets = arm_get_frame_offsets ();
25336   amount = offsets->outgoing_args - offsets->saved_regs;
25337
25338   if (frame_pointer_needed)
25339     {
25340       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25341       amount = offsets->locals_base - offsets->saved_regs;
25342     }
25343   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25344
25345   gcc_assert (amount >= 0);
25346   if (amount)
25347     {
25348       emit_insn (gen_blockage ());
25349
25350       if (amount < 512)
25351         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25352                                GEN_INT (amount)));
25353       else
25354         {
25355           /* r3 is always free in the epilogue.  */
25356           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25357
25358           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25359           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25360         }
25361     }
25362
25363   /* Emit a USE (stack_pointer_rtx), so that
25364      the stack adjustment will not be deleted.  */
25365   emit_insn (gen_force_register_use (stack_pointer_rtx));
25366
25367   if (crtl->profile || !TARGET_SCHED_PROLOG)
25368     emit_insn (gen_blockage ());
25369
25370   /* Emit a clobber for each insn that will be restored in the epilogue,
25371      so that flow2 will get register lifetimes correct.  */
25372   for (regno = 0; regno < 13; regno++)
25373     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25374       emit_clobber (gen_rtx_REG (SImode, regno));
25375
25376   if (! df_regs_ever_live_p (LR_REGNUM))
25377     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25378
25379   /* Clear all caller-saved regs that are not used to return.  */
25380   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25381     cmse_nonsecure_entry_clear_before_return ();
25382 }
25383
25384 /* Epilogue code for APCS frame.  */
25385 static void
25386 arm_expand_epilogue_apcs_frame (bool really_return)
25387 {
25388   unsigned long func_type;
25389   unsigned long saved_regs_mask;
25390   int num_regs = 0;
25391   int i;
25392   int floats_from_frame = 0;
25393   arm_stack_offsets *offsets;
25394
25395   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25396   func_type = arm_current_func_type ();
25397
25398   /* Get frame offsets for ARM.  */
25399   offsets = arm_get_frame_offsets ();
25400   saved_regs_mask = offsets->saved_regs_mask;
25401
25402   /* Find the offset of the floating-point save area in the frame.  */
25403   floats_from_frame
25404     = (offsets->saved_args
25405        + arm_compute_static_chain_stack_bytes ()
25406        - offsets->frame);
25407
25408   /* Compute how many core registers saved and how far away the floats are.  */
25409   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25410     if (saved_regs_mask & (1 << i))
25411       {
25412         num_regs++;
25413         floats_from_frame += 4;
25414       }
25415
25416   if (TARGET_HARD_FLOAT)
25417     {
25418       int start_reg;
25419       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25420
25421       /* The offset is from IP_REGNUM.  */
25422       int saved_size = arm_get_vfp_saved_size ();
25423       if (saved_size > 0)
25424         {
25425           rtx_insn *insn;
25426           floats_from_frame += saved_size;
25427           insn = emit_insn (gen_addsi3 (ip_rtx,
25428                                         hard_frame_pointer_rtx,
25429                                         GEN_INT (-floats_from_frame)));
25430           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25431                                        ip_rtx, hard_frame_pointer_rtx);
25432         }
25433
25434       /* Generate VFP register multi-pop.  */
25435       start_reg = FIRST_VFP_REGNUM;
25436
25437       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25438         /* Look for a case where a reg does not need restoring.  */
25439         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25440             && (!df_regs_ever_live_p (i + 1)
25441                 || call_used_regs[i + 1]))
25442           {
25443             if (start_reg != i)
25444               arm_emit_vfp_multi_reg_pop (start_reg,
25445                                           (i - start_reg) / 2,
25446                                           gen_rtx_REG (SImode,
25447                                                        IP_REGNUM));
25448             start_reg = i + 2;
25449           }
25450
25451       /* Restore the remaining regs that we have discovered (or possibly
25452          even all of them, if the conditional in the for loop never
25453          fired).  */
25454       if (start_reg != i)
25455         arm_emit_vfp_multi_reg_pop (start_reg,
25456                                     (i - start_reg) / 2,
25457                                     gen_rtx_REG (SImode, IP_REGNUM));
25458     }
25459
25460   if (TARGET_IWMMXT)
25461     {
25462       /* The frame pointer is guaranteed to be non-double-word aligned, as
25463          it is set to double-word-aligned old_stack_pointer - 4.  */
25464       rtx_insn *insn;
25465       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25466
25467       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25468         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25469           {
25470             rtx addr = gen_frame_mem (V2SImode,
25471                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25472                                                 - lrm_count * 4));
25473             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25474             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25475                                                gen_rtx_REG (V2SImode, i),
25476                                                NULL_RTX);
25477             lrm_count += 2;
25478           }
25479     }
25480
25481   /* saved_regs_mask should contain IP which contains old stack pointer
25482      at the time of activation creation.  Since SP and IP are adjacent registers,
25483      we can restore the value directly into SP.  */
25484   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25485   saved_regs_mask &= ~(1 << IP_REGNUM);
25486   saved_regs_mask |= (1 << SP_REGNUM);
25487
25488   /* There are two registers left in saved_regs_mask - LR and PC.  We
25489      only need to restore LR (the return address), but to
25490      save time we can load it directly into PC, unless we need a
25491      special function exit sequence, or we are not really returning.  */
25492   if (really_return
25493       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25494       && !crtl->calls_eh_return)
25495     /* Delete LR from the register mask, so that LR on
25496        the stack is loaded into the PC in the register mask.  */
25497     saved_regs_mask &= ~(1 << LR_REGNUM);
25498   else
25499     saved_regs_mask &= ~(1 << PC_REGNUM);
25500
25501   num_regs = bit_count (saved_regs_mask);
25502   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25503     {
25504       rtx_insn *insn;
25505       emit_insn (gen_blockage ());
25506       /* Unwind the stack to just below the saved registers.  */
25507       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25508                                     hard_frame_pointer_rtx,
25509                                     GEN_INT (- 4 * num_regs)));
25510
25511       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25512                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25513     }
25514
25515   arm_emit_multi_reg_pop (saved_regs_mask);
25516
25517   if (IS_INTERRUPT (func_type))
25518     {
25519       /* Interrupt handlers will have pushed the
25520          IP onto the stack, so restore it now.  */
25521       rtx_insn *insn;
25522       rtx addr = gen_rtx_MEM (SImode,
25523                               gen_rtx_POST_INC (SImode,
25524                               stack_pointer_rtx));
25525       set_mem_alias_set (addr, get_frame_alias_set ());
25526       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25527       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25528                                          gen_rtx_REG (SImode, IP_REGNUM),
25529                                          NULL_RTX);
25530     }
25531
25532   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25533     return;
25534
25535   if (crtl->calls_eh_return)
25536     emit_insn (gen_addsi3 (stack_pointer_rtx,
25537                            stack_pointer_rtx,
25538                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25539
25540   if (IS_STACKALIGN (func_type))
25541     /* Restore the original stack pointer.  Before prologue, the stack was
25542        realigned and the original stack pointer saved in r0.  For details,
25543        see comment in arm_expand_prologue.  */
25544     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25545
25546   emit_jump_insn (simple_return_rtx);
25547 }
25548
25549 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25550    function is not a sibcall.  */
25551 void
25552 arm_expand_epilogue (bool really_return)
25553 {
25554   unsigned long func_type;
25555   unsigned long saved_regs_mask;
25556   int num_regs = 0;
25557   int i;
25558   int amount;
25559   arm_stack_offsets *offsets;
25560
25561   func_type = arm_current_func_type ();
25562
25563   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25564      let output_return_instruction take care of instruction emission if any.  */
25565   if (IS_NAKED (func_type)
25566       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25567     {
25568       if (really_return)
25569         emit_jump_insn (simple_return_rtx);
25570       return;
25571     }
25572
25573   /* If we are throwing an exception, then we really must be doing a
25574      return, so we can't tail-call.  */
25575   gcc_assert (!crtl->calls_eh_return || really_return);
25576
25577   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25578     {
25579       arm_expand_epilogue_apcs_frame (really_return);
25580       return;
25581     }
25582
25583   /* Get frame offsets for ARM.  */
25584   offsets = arm_get_frame_offsets ();
25585   saved_regs_mask = offsets->saved_regs_mask;
25586   num_regs = bit_count (saved_regs_mask);
25587
25588   if (frame_pointer_needed)
25589     {
25590       rtx_insn *insn;
25591       /* Restore stack pointer if necessary.  */
25592       if (TARGET_ARM)
25593         {
25594           /* In ARM mode, frame pointer points to first saved register.
25595              Restore stack pointer to last saved register.  */
25596           amount = offsets->frame - offsets->saved_regs;
25597
25598           /* Force out any pending memory operations that reference stacked data
25599              before stack de-allocation occurs.  */
25600           emit_insn (gen_blockage ());
25601           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25602                             hard_frame_pointer_rtx,
25603                             GEN_INT (amount)));
25604           arm_add_cfa_adjust_cfa_note (insn, amount,
25605                                        stack_pointer_rtx,
25606                                        hard_frame_pointer_rtx);
25607
25608           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25609              deleted.  */
25610           emit_insn (gen_force_register_use (stack_pointer_rtx));
25611         }
25612       else
25613         {
25614           /* In Thumb-2 mode, the frame pointer points to the last saved
25615              register.  */
25616           amount = offsets->locals_base - offsets->saved_regs;
25617           if (amount)
25618             {
25619               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25620                                 hard_frame_pointer_rtx,
25621                                 GEN_INT (amount)));
25622               arm_add_cfa_adjust_cfa_note (insn, amount,
25623                                            hard_frame_pointer_rtx,
25624                                            hard_frame_pointer_rtx);
25625             }
25626
25627           /* Force out any pending memory operations that reference stacked data
25628              before stack de-allocation occurs.  */
25629           emit_insn (gen_blockage ());
25630           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25631                                        hard_frame_pointer_rtx));
25632           arm_add_cfa_adjust_cfa_note (insn, 0,
25633                                        stack_pointer_rtx,
25634                                        hard_frame_pointer_rtx);
25635           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25636              deleted.  */
25637           emit_insn (gen_force_register_use (stack_pointer_rtx));
25638         }
25639     }
25640   else
25641     {
25642       /* Pop off outgoing args and local frame to adjust stack pointer to
25643          last saved register.  */
25644       amount = offsets->outgoing_args - offsets->saved_regs;
25645       if (amount)
25646         {
25647           rtx_insn *tmp;
25648           /* Force out any pending memory operations that reference stacked data
25649              before stack de-allocation occurs.  */
25650           emit_insn (gen_blockage ());
25651           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25652                                        stack_pointer_rtx,
25653                                        GEN_INT (amount)));
25654           arm_add_cfa_adjust_cfa_note (tmp, amount,
25655                                        stack_pointer_rtx, stack_pointer_rtx);
25656           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25657              not deleted.  */
25658           emit_insn (gen_force_register_use (stack_pointer_rtx));
25659         }
25660     }
25661
25662   if (TARGET_HARD_FLOAT)
25663     {
25664       /* Generate VFP register multi-pop.  */
25665       int end_reg = LAST_VFP_REGNUM + 1;
25666
25667       /* Scan the registers in reverse order.  We need to match
25668          any groupings made in the prologue and generate matching
25669          vldm operations.  The need to match groups is because,
25670          unlike pop, vldm can only do consecutive regs.  */
25671       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25672         /* Look for a case where a reg does not need restoring.  */
25673         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25674             && (!df_regs_ever_live_p (i + 1)
25675                 || call_used_regs[i + 1]))
25676           {
25677             /* Restore the regs discovered so far (from reg+2 to
25678                end_reg).  */
25679             if (end_reg > i + 2)
25680               arm_emit_vfp_multi_reg_pop (i + 2,
25681                                           (end_reg - (i + 2)) / 2,
25682                                           stack_pointer_rtx);
25683             end_reg = i;
25684           }
25685
25686       /* Restore the remaining regs that we have discovered (or possibly
25687          even all of them, if the conditional in the for loop never
25688          fired).  */
25689       if (end_reg > i + 2)
25690         arm_emit_vfp_multi_reg_pop (i + 2,
25691                                     (end_reg - (i + 2)) / 2,
25692                                     stack_pointer_rtx);
25693     }
25694
25695   if (TARGET_IWMMXT)
25696     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25697       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25698         {
25699           rtx_insn *insn;
25700           rtx addr = gen_rtx_MEM (V2SImode,
25701                                   gen_rtx_POST_INC (SImode,
25702                                                     stack_pointer_rtx));
25703           set_mem_alias_set (addr, get_frame_alias_set ());
25704           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25705           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25706                                              gen_rtx_REG (V2SImode, i),
25707                                              NULL_RTX);
25708           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25709                                        stack_pointer_rtx, stack_pointer_rtx);
25710         }
25711
25712   if (saved_regs_mask)
25713     {
25714       rtx insn;
25715       bool return_in_pc = false;
25716
25717       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25718           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25719           && !IS_CMSE_ENTRY (func_type)
25720           && !IS_STACKALIGN (func_type)
25721           && really_return
25722           && crtl->args.pretend_args_size == 0
25723           && saved_regs_mask & (1 << LR_REGNUM)
25724           && !crtl->calls_eh_return)
25725         {
25726           saved_regs_mask &= ~(1 << LR_REGNUM);
25727           saved_regs_mask |= (1 << PC_REGNUM);
25728           return_in_pc = true;
25729         }
25730
25731       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25732         {
25733           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25734             if (saved_regs_mask & (1 << i))
25735               {
25736                 rtx addr = gen_rtx_MEM (SImode,
25737                                         gen_rtx_POST_INC (SImode,
25738                                                           stack_pointer_rtx));
25739                 set_mem_alias_set (addr, get_frame_alias_set ());
25740
25741                 if (i == PC_REGNUM)
25742                   {
25743                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25744                     XVECEXP (insn, 0, 0) = ret_rtx;
25745                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25746                                                         addr);
25747                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25748                     insn = emit_jump_insn (insn);
25749                   }
25750                 else
25751                   {
25752                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25753                                                  addr));
25754                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25755                                                        gen_rtx_REG (SImode, i),
25756                                                        NULL_RTX);
25757                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25758                                                  stack_pointer_rtx,
25759                                                  stack_pointer_rtx);
25760                   }
25761               }
25762         }
25763       else
25764         {
25765           if (TARGET_LDRD
25766               && current_tune->prefer_ldrd_strd
25767               && !optimize_function_for_size_p (cfun))
25768             {
25769               if (TARGET_THUMB2)
25770                 thumb2_emit_ldrd_pop (saved_regs_mask);
25771               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25772                 arm_emit_ldrd_pop (saved_regs_mask);
25773               else
25774                 arm_emit_multi_reg_pop (saved_regs_mask);
25775             }
25776           else
25777             arm_emit_multi_reg_pop (saved_regs_mask);
25778         }
25779
25780       if (return_in_pc)
25781         return;
25782     }
25783
25784   amount
25785     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25786   if (amount)
25787     {
25788       int i, j;
25789       rtx dwarf = NULL_RTX;
25790       rtx_insn *tmp =
25791         emit_insn (gen_addsi3 (stack_pointer_rtx,
25792                                stack_pointer_rtx,
25793                                GEN_INT (amount)));
25794
25795       RTX_FRAME_RELATED_P (tmp) = 1;
25796
25797       if (cfun->machine->uses_anonymous_args)
25798         {
25799           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25800              pretend_args in stack.  */
25801           int num_regs = crtl->args.pretend_args_size / 4;
25802           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25803           for (j = 0, i = 0; j < num_regs; i++)
25804             if (saved_regs_mask & (1 << i))
25805               {
25806                 rtx reg = gen_rtx_REG (SImode, i);
25807                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25808                 j++;
25809               }
25810           REG_NOTES (tmp) = dwarf;
25811         }
25812       arm_add_cfa_adjust_cfa_note (tmp, amount,
25813                                    stack_pointer_rtx, stack_pointer_rtx);
25814     }
25815
25816     /* Clear all caller-saved regs that are not used to return.  */
25817     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25818       {
25819         /* CMSE_ENTRY always returns.  */
25820         gcc_assert (really_return);
25821         cmse_nonsecure_entry_clear_before_return ();
25822       }
25823
25824   if (!really_return)
25825     return;
25826
25827   if (crtl->calls_eh_return)
25828     emit_insn (gen_addsi3 (stack_pointer_rtx,
25829                            stack_pointer_rtx,
25830                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25831
25832   if (IS_STACKALIGN (func_type))
25833     /* Restore the original stack pointer.  Before prologue, the stack was
25834        realigned and the original stack pointer saved in r0.  For details,
25835        see comment in arm_expand_prologue.  */
25836     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25837
25838   emit_jump_insn (simple_return_rtx);
25839 }
25840
25841 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25842    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25843
25844 const char *
25845 thumb1_output_interwork (void)
25846 {
25847   const char * name;
25848   FILE *f = asm_out_file;
25849
25850   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25851   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25852               == SYMBOL_REF);
25853   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25854
25855   /* Generate code sequence to switch us into Thumb mode.  */
25856   /* The .code 32 directive has already been emitted by
25857      ASM_DECLARE_FUNCTION_NAME.  */
25858   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25859   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25860
25861   /* Generate a label, so that the debugger will notice the
25862      change in instruction sets.  This label is also used by
25863      the assembler to bypass the ARM code when this function
25864      is called from a Thumb encoded function elsewhere in the
25865      same file.  Hence the definition of STUB_NAME here must
25866      agree with the definition in gas/config/tc-arm.c.  */
25867
25868 #define STUB_NAME ".real_start_of"
25869
25870   fprintf (f, "\t.code\t16\n");
25871 #ifdef ARM_PE
25872   if (arm_dllexport_name_p (name))
25873     name = arm_strip_name_encoding (name);
25874 #endif
25875   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25876   fprintf (f, "\t.thumb_func\n");
25877   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25878
25879   return "";
25880 }
25881
25882 /* Handle the case of a double word load into a low register from
25883    a computed memory address.  The computed address may involve a
25884    register which is overwritten by the load.  */
25885 const char *
25886 thumb_load_double_from_address (rtx *operands)
25887 {
25888   rtx addr;
25889   rtx base;
25890   rtx offset;
25891   rtx arg1;
25892   rtx arg2;
25893
25894   gcc_assert (REG_P (operands[0]));
25895   gcc_assert (MEM_P (operands[1]));
25896
25897   /* Get the memory address.  */
25898   addr = XEXP (operands[1], 0);
25899
25900   /* Work out how the memory address is computed.  */
25901   switch (GET_CODE (addr))
25902     {
25903     case REG:
25904       operands[2] = adjust_address (operands[1], SImode, 4);
25905
25906       if (REGNO (operands[0]) == REGNO (addr))
25907         {
25908           output_asm_insn ("ldr\t%H0, %2", operands);
25909           output_asm_insn ("ldr\t%0, %1", operands);
25910         }
25911       else
25912         {
25913           output_asm_insn ("ldr\t%0, %1", operands);
25914           output_asm_insn ("ldr\t%H0, %2", operands);
25915         }
25916       break;
25917
25918     case CONST:
25919       /* Compute <address> + 4 for the high order load.  */
25920       operands[2] = adjust_address (operands[1], SImode, 4);
25921
25922       output_asm_insn ("ldr\t%0, %1", operands);
25923       output_asm_insn ("ldr\t%H0, %2", operands);
25924       break;
25925
25926     case PLUS:
25927       arg1   = XEXP (addr, 0);
25928       arg2   = XEXP (addr, 1);
25929
25930       if (CONSTANT_P (arg1))
25931         base = arg2, offset = arg1;
25932       else
25933         base = arg1, offset = arg2;
25934
25935       gcc_assert (REG_P (base));
25936
25937       /* Catch the case of <address> = <reg> + <reg> */
25938       if (REG_P (offset))
25939         {
25940           int reg_offset = REGNO (offset);
25941           int reg_base   = REGNO (base);
25942           int reg_dest   = REGNO (operands[0]);
25943
25944           /* Add the base and offset registers together into the
25945              higher destination register.  */
25946           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25947                        reg_dest + 1, reg_base, reg_offset);
25948
25949           /* Load the lower destination register from the address in
25950              the higher destination register.  */
25951           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25952                        reg_dest, reg_dest + 1);
25953
25954           /* Load the higher destination register from its own address
25955              plus 4.  */
25956           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25957                        reg_dest + 1, reg_dest + 1);
25958         }
25959       else
25960         {
25961           /* Compute <address> + 4 for the high order load.  */
25962           operands[2] = adjust_address (operands[1], SImode, 4);
25963
25964           /* If the computed address is held in the low order register
25965              then load the high order register first, otherwise always
25966              load the low order register first.  */
25967           if (REGNO (operands[0]) == REGNO (base))
25968             {
25969               output_asm_insn ("ldr\t%H0, %2", operands);
25970               output_asm_insn ("ldr\t%0, %1", operands);
25971             }
25972           else
25973             {
25974               output_asm_insn ("ldr\t%0, %1", operands);
25975               output_asm_insn ("ldr\t%H0, %2", operands);
25976             }
25977         }
25978       break;
25979
25980     case LABEL_REF:
25981       /* With no registers to worry about we can just load the value
25982          directly.  */
25983       operands[2] = adjust_address (operands[1], SImode, 4);
25984
25985       output_asm_insn ("ldr\t%H0, %2", operands);
25986       output_asm_insn ("ldr\t%0, %1", operands);
25987       break;
25988
25989     default:
25990       gcc_unreachable ();
25991     }
25992
25993   return "";
25994 }
25995
25996 const char *
25997 thumb_output_move_mem_multiple (int n, rtx *operands)
25998 {
25999   switch (n)
26000     {
26001     case 2:
26002       if (REGNO (operands[4]) > REGNO (operands[5]))
26003         std::swap (operands[4], operands[5]);
26004
26005       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26006       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26007       break;
26008
26009     case 3:
26010       if (REGNO (operands[4]) > REGNO (operands[5]))
26011         std::swap (operands[4], operands[5]);
26012       if (REGNO (operands[5]) > REGNO (operands[6]))
26013         std::swap (operands[5], operands[6]);
26014       if (REGNO (operands[4]) > REGNO (operands[5]))
26015         std::swap (operands[4], operands[5]);
26016
26017       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26018       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26019       break;
26020
26021     default:
26022       gcc_unreachable ();
26023     }
26024
26025   return "";
26026 }
26027
26028 /* Output a call-via instruction for thumb state.  */
26029 const char *
26030 thumb_call_via_reg (rtx reg)
26031 {
26032   int regno = REGNO (reg);
26033   rtx *labelp;
26034
26035   gcc_assert (regno < LR_REGNUM);
26036
26037   /* If we are in the normal text section we can use a single instance
26038      per compilation unit.  If we are doing function sections, then we need
26039      an entry per section, since we can't rely on reachability.  */
26040   if (in_section == text_section)
26041     {
26042       thumb_call_reg_needed = 1;
26043
26044       if (thumb_call_via_label[regno] == NULL)
26045         thumb_call_via_label[regno] = gen_label_rtx ();
26046       labelp = thumb_call_via_label + regno;
26047     }
26048   else
26049     {
26050       if (cfun->machine->call_via[regno] == NULL)
26051         cfun->machine->call_via[regno] = gen_label_rtx ();
26052       labelp = cfun->machine->call_via + regno;
26053     }
26054
26055   output_asm_insn ("bl\t%a0", labelp);
26056   return "";
26057 }
26058
26059 /* Routines for generating rtl.  */
26060 void
26061 thumb_expand_movmemqi (rtx *operands)
26062 {
26063   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26064   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26065   HOST_WIDE_INT len = INTVAL (operands[2]);
26066   HOST_WIDE_INT offset = 0;
26067
26068   while (len >= 12)
26069     {
26070       emit_insn (gen_movmem12b (out, in, out, in));
26071       len -= 12;
26072     }
26073
26074   if (len >= 8)
26075     {
26076       emit_insn (gen_movmem8b (out, in, out, in));
26077       len -= 8;
26078     }
26079
26080   if (len >= 4)
26081     {
26082       rtx reg = gen_reg_rtx (SImode);
26083       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26084       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26085       len -= 4;
26086       offset += 4;
26087     }
26088
26089   if (len >= 2)
26090     {
26091       rtx reg = gen_reg_rtx (HImode);
26092       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26093                                               plus_constant (Pmode, in,
26094                                                              offset))));
26095       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26096                                                                 offset)),
26097                             reg));
26098       len -= 2;
26099       offset += 2;
26100     }
26101
26102   if (len)
26103     {
26104       rtx reg = gen_reg_rtx (QImode);
26105       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26106                                               plus_constant (Pmode, in,
26107                                                              offset))));
26108       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26109                                                                 offset)),
26110                             reg));
26111     }
26112 }
26113
26114 void
26115 thumb_reload_out_hi (rtx *operands)
26116 {
26117   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26118 }
26119
26120 /* Return the length of a function name prefix
26121     that starts with the character 'c'.  */
26122 static int
26123 arm_get_strip_length (int c)
26124 {
26125   switch (c)
26126     {
26127     ARM_NAME_ENCODING_LENGTHS
26128       default: return 0;
26129     }
26130 }
26131
26132 /* Return a pointer to a function's name with any
26133    and all prefix encodings stripped from it.  */
26134 const char *
26135 arm_strip_name_encoding (const char *name)
26136 {
26137   int skip;
26138
26139   while ((skip = arm_get_strip_length (* name)))
26140     name += skip;
26141
26142   return name;
26143 }
26144
26145 /* If there is a '*' anywhere in the name's prefix, then
26146    emit the stripped name verbatim, otherwise prepend an
26147    underscore if leading underscores are being used.  */
26148 void
26149 arm_asm_output_labelref (FILE *stream, const char *name)
26150 {
26151   int skip;
26152   int verbatim = 0;
26153
26154   while ((skip = arm_get_strip_length (* name)))
26155     {
26156       verbatim |= (*name == '*');
26157       name += skip;
26158     }
26159
26160   if (verbatim)
26161     fputs (name, stream);
26162   else
26163     asm_fprintf (stream, "%U%s", name);
26164 }
26165
26166 /* This function is used to emit an EABI tag and its associated value.
26167    We emit the numerical value of the tag in case the assembler does not
26168    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26169    the tag name in a comment so that anyone reading the assembler output
26170    will know which tag is being set.
26171
26172    This function is not static because arm-c.c needs it too.  */
26173
26174 void
26175 arm_emit_eabi_attribute (const char *name, int num, int val)
26176 {
26177   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26178   if (flag_verbose_asm || flag_debug_asm)
26179     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26180   asm_fprintf (asm_out_file, "\n");
26181 }
26182
26183 /* This function is used to print CPU tuning information as comment
26184    in assembler file.  Pointers are not printed for now.  */
26185
26186 void
26187 arm_print_tune_info (void)
26188 {
26189   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26190   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26191                current_tune->constant_limit);
26192   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26193                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26194   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26195                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26196   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26197                "prefetch.l1_cache_size:\t%d\n",
26198                current_tune->prefetch.l1_cache_size);
26199   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26200                "prefetch.l1_cache_line_size:\t%d\n",
26201                current_tune->prefetch.l1_cache_line_size);
26202   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26203                "prefer_constant_pool:\t%d\n",
26204                (int) current_tune->prefer_constant_pool);
26205   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26206                "branch_cost:\t(s:speed, p:predictable)\n");
26207   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26208   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26209                current_tune->branch_cost (false, false));
26210   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26211                current_tune->branch_cost (false, true));
26212   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26213                current_tune->branch_cost (true, false));
26214   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26215                current_tune->branch_cost (true, true));
26216   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26217                "prefer_ldrd_strd:\t%d\n",
26218                (int) current_tune->prefer_ldrd_strd);
26219   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26220                "logical_op_non_short_circuit:\t[%d,%d]\n",
26221                (int) current_tune->logical_op_non_short_circuit_thumb,
26222                (int) current_tune->logical_op_non_short_circuit_arm);
26223   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26224                "prefer_neon_for_64bits:\t%d\n",
26225                (int) current_tune->prefer_neon_for_64bits);
26226   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26227                "disparage_flag_setting_t16_encodings:\t%d\n",
26228                (int) current_tune->disparage_flag_setting_t16_encodings);
26229   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26230                "string_ops_prefer_neon:\t%d\n",
26231                (int) current_tune->string_ops_prefer_neon);
26232   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26233                "max_insns_inline_memset:\t%d\n",
26234                current_tune->max_insns_inline_memset);
26235   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26236                current_tune->fusible_ops);
26237   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26238                (int) current_tune->sched_autopref);
26239 }
26240
26241 /* Print .arch and .arch_extension directives corresponding to the
26242    current architecture configuration.  */
26243 static void
26244 arm_print_asm_arch_directives ()
26245 {
26246   const arch_option *arch
26247     = arm_parse_arch_option_name (all_architectures, "-march",
26248                                   arm_active_target.arch_name);
26249   auto_sbitmap opt_bits (isa_num_bits);
26250
26251   gcc_assert (arch);
26252
26253   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26254   if (!arch->common.extensions)
26255     return;
26256
26257   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26258        opt->name != NULL;
26259        opt++)
26260     {
26261       if (!opt->remove)
26262         {
26263           arm_initialize_isa (opt_bits, opt->isa_bits);
26264
26265           /* If every feature bit of this option is set in the target
26266              ISA specification, print out the option name.  However,
26267              don't print anything if all the bits are part of the
26268              FPU specification.  */
26269           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26270               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26271             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26272         }
26273     }
26274 }
26275
26276 static void
26277 arm_file_start (void)
26278 {
26279   int val;
26280
26281   if (TARGET_BPABI)
26282     {
26283       /* We don't have a specified CPU.  Use the architecture to
26284          generate the tags.
26285
26286          Note: it might be better to do this unconditionally, then the
26287          assembler would not need to know about all new CPU names as
26288          they are added.  */
26289       if (!arm_active_target.core_name)
26290         {
26291           /* armv7ve doesn't support any extensions.  */
26292           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26293             {
26294               /* Keep backward compatability for assemblers
26295                  which don't support armv7ve.  */
26296               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26297               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26298               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26299               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26300               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26301             }
26302           else
26303             arm_print_asm_arch_directives ();
26304         }
26305       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26306         asm_fprintf (asm_out_file, "\t.arch %s\n",
26307                      arm_active_target.core_name + 8);
26308       else
26309         {
26310           const char* truncated_name
26311             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26312           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26313         }
26314
26315       if (print_tune_info)
26316         arm_print_tune_info ();
26317
26318       if (! TARGET_SOFT_FLOAT)
26319         {
26320           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26321             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26322
26323           if (TARGET_HARD_FLOAT_ABI)
26324             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26325         }
26326
26327       /* Some of these attributes only apply when the corresponding features
26328          are used.  However we don't have any easy way of figuring this out.
26329          Conservatively record the setting that would have been used.  */
26330
26331       if (flag_rounding_math)
26332         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26333
26334       if (!flag_unsafe_math_optimizations)
26335         {
26336           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26337           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26338         }
26339       if (flag_signaling_nans)
26340         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26341
26342       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26343                            flag_finite_math_only ? 1 : 3);
26344
26345       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26346       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26347       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26348                                flag_short_enums ? 1 : 2);
26349
26350       /* Tag_ABI_optimization_goals.  */
26351       if (optimize_size)
26352         val = 4;
26353       else if (optimize >= 2)
26354         val = 2;
26355       else if (optimize)
26356         val = 1;
26357       else
26358         val = 6;
26359       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26360
26361       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26362                                unaligned_access);
26363
26364       if (arm_fp16_format)
26365         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26366                              (int) arm_fp16_format);
26367
26368       if (arm_lang_output_object_attributes_hook)
26369         arm_lang_output_object_attributes_hook();
26370     }
26371
26372   default_file_start ();
26373 }
26374
26375 static void
26376 arm_file_end (void)
26377 {
26378   int regno;
26379
26380   if (NEED_INDICATE_EXEC_STACK)
26381     /* Add .note.GNU-stack.  */
26382     file_end_indicate_exec_stack ();
26383
26384   if (! thumb_call_reg_needed)
26385     return;
26386
26387   switch_to_section (text_section);
26388   asm_fprintf (asm_out_file, "\t.code 16\n");
26389   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26390
26391   for (regno = 0; regno < LR_REGNUM; regno++)
26392     {
26393       rtx label = thumb_call_via_label[regno];
26394
26395       if (label != 0)
26396         {
26397           targetm.asm_out.internal_label (asm_out_file, "L",
26398                                           CODE_LABEL_NUMBER (label));
26399           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26400         }
26401     }
26402 }
26403
26404 #ifndef ARM_PE
26405 /* Symbols in the text segment can be accessed without indirecting via the
26406    constant pool; it may take an extra binary operation, but this is still
26407    faster than indirecting via memory.  Don't do this when not optimizing,
26408    since we won't be calculating al of the offsets necessary to do this
26409    simplification.  */
26410
26411 static void
26412 arm_encode_section_info (tree decl, rtx rtl, int first)
26413 {
26414   if (optimize > 0 && TREE_CONSTANT (decl))
26415     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26416
26417   default_encode_section_info (decl, rtl, first);
26418 }
26419 #endif /* !ARM_PE */
26420
26421 static void
26422 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26423 {
26424   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26425       && !strcmp (prefix, "L"))
26426     {
26427       arm_ccfsm_state = 0;
26428       arm_target_insn = NULL;
26429     }
26430   default_internal_label (stream, prefix, labelno);
26431 }
26432
26433 /* Output code to add DELTA to the first argument, and then jump
26434    to FUNCTION.  Used for C++ multiple inheritance.  */
26435
26436 static void
26437 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26438                      HOST_WIDE_INT, tree function)
26439 {
26440   static int thunk_label = 0;
26441   char label[256];
26442   char labelpc[256];
26443   int mi_delta = delta;
26444   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26445   int shift = 0;
26446   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26447                     ? 1 : 0);
26448   if (mi_delta < 0)
26449     mi_delta = - mi_delta;
26450
26451   final_start_function (emit_barrier (), file, 1);
26452
26453   if (TARGET_THUMB1)
26454     {
26455       int labelno = thunk_label++;
26456       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26457       /* Thunks are entered in arm mode when available.  */
26458       if (TARGET_THUMB1_ONLY)
26459         {
26460           /* push r3 so we can use it as a temporary.  */
26461           /* TODO: Omit this save if r3 is not used.  */
26462           fputs ("\tpush {r3}\n", file);
26463           fputs ("\tldr\tr3, ", file);
26464         }
26465       else
26466         {
26467           fputs ("\tldr\tr12, ", file);
26468         }
26469       assemble_name (file, label);
26470       fputc ('\n', file);
26471       if (flag_pic)
26472         {
26473           /* If we are generating PIC, the ldr instruction below loads
26474              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26475              the address of the add + 8, so we have:
26476
26477              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26478                  = target + 1.
26479
26480              Note that we have "+ 1" because some versions of GNU ld
26481              don't set the low bit of the result for R_ARM_REL32
26482              relocations against thumb function symbols.
26483              On ARMv6M this is +4, not +8.  */
26484           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26485           assemble_name (file, labelpc);
26486           fputs (":\n", file);
26487           if (TARGET_THUMB1_ONLY)
26488             {
26489               /* This is 2 insns after the start of the thunk, so we know it
26490                  is 4-byte aligned.  */
26491               fputs ("\tadd\tr3, pc, r3\n", file);
26492               fputs ("\tmov r12, r3\n", file);
26493             }
26494           else
26495             fputs ("\tadd\tr12, pc, r12\n", file);
26496         }
26497       else if (TARGET_THUMB1_ONLY)
26498         fputs ("\tmov r12, r3\n", file);
26499     }
26500   if (TARGET_THUMB1_ONLY)
26501     {
26502       if (mi_delta > 255)
26503         {
26504           fputs ("\tldr\tr3, ", file);
26505           assemble_name (file, label);
26506           fputs ("+4\n", file);
26507           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26508                        mi_op, this_regno, this_regno);
26509         }
26510       else if (mi_delta != 0)
26511         {
26512           /* Thumb1 unified syntax requires s suffix in instruction name when
26513              one of the operands is immediate.  */
26514           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26515                        mi_op, this_regno, this_regno,
26516                        mi_delta);
26517         }
26518     }
26519   else
26520     {
26521       /* TODO: Use movw/movt for large constants when available.  */
26522       while (mi_delta != 0)
26523         {
26524           if ((mi_delta & (3 << shift)) == 0)
26525             shift += 2;
26526           else
26527             {
26528               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26529                            mi_op, this_regno, this_regno,
26530                            mi_delta & (0xff << shift));
26531               mi_delta &= ~(0xff << shift);
26532               shift += 8;
26533             }
26534         }
26535     }
26536   if (TARGET_THUMB1)
26537     {
26538       if (TARGET_THUMB1_ONLY)
26539         fputs ("\tpop\t{r3}\n", file);
26540
26541       fprintf (file, "\tbx\tr12\n");
26542       ASM_OUTPUT_ALIGN (file, 2);
26543       assemble_name (file, label);
26544       fputs (":\n", file);
26545       if (flag_pic)
26546         {
26547           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26548           rtx tem = XEXP (DECL_RTL (function), 0);
26549           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26550              pipeline offset is four rather than eight.  Adjust the offset
26551              accordingly.  */
26552           tem = plus_constant (GET_MODE (tem), tem,
26553                                TARGET_THUMB1_ONLY ? -3 : -7);
26554           tem = gen_rtx_MINUS (GET_MODE (tem),
26555                                tem,
26556                                gen_rtx_SYMBOL_REF (Pmode,
26557                                                    ggc_strdup (labelpc)));
26558           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26559         }
26560       else
26561         /* Output ".word .LTHUNKn".  */
26562         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26563
26564       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26565         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26566     }
26567   else
26568     {
26569       fputs ("\tb\t", file);
26570       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26571       if (NEED_PLT_RELOC)
26572         fputs ("(PLT)", file);
26573       fputc ('\n', file);
26574     }
26575
26576   final_end_function ();
26577 }
26578
26579 /* MI thunk handling for TARGET_32BIT.  */
26580
26581 static void
26582 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26583                        HOST_WIDE_INT vcall_offset, tree function)
26584 {
26585   /* On ARM, this_regno is R0 or R1 depending on
26586      whether the function returns an aggregate or not.
26587   */
26588   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26589                                        function)
26590                     ? R1_REGNUM : R0_REGNUM);
26591
26592   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26593   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26594   reload_completed = 1;
26595   emit_note (NOTE_INSN_PROLOGUE_END);
26596
26597   /* Add DELTA to THIS_RTX.  */
26598   if (delta != 0)
26599     arm_split_constant (PLUS, Pmode, NULL_RTX,
26600                         delta, this_rtx, this_rtx, false);
26601
26602   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26603   if (vcall_offset != 0)
26604     {
26605       /* Load *THIS_RTX.  */
26606       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26607       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26608       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26609                           false);
26610       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26611       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26612       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26613     }
26614
26615   /* Generate a tail call to the target function.  */
26616   if (!TREE_USED (function))
26617     {
26618       assemble_external (function);
26619       TREE_USED (function) = 1;
26620     }
26621   rtx funexp = XEXP (DECL_RTL (function), 0);
26622   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26623   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26624   SIBLING_CALL_P (insn) = 1;
26625
26626   insn = get_insns ();
26627   shorten_branches (insn);
26628   final_start_function (insn, file, 1);
26629   final (insn, file, 1);
26630   final_end_function ();
26631
26632   /* Stop pretending this is a post-reload pass.  */
26633   reload_completed = 0;
26634 }
26635
26636 /* Output code to add DELTA to the first argument, and then jump
26637    to FUNCTION.  Used for C++ multiple inheritance.  */
26638
26639 static void
26640 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26641                      HOST_WIDE_INT vcall_offset, tree function)
26642 {
26643   if (TARGET_32BIT)
26644     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26645   else
26646     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26647 }
26648
26649 int
26650 arm_emit_vector_const (FILE *file, rtx x)
26651 {
26652   int i;
26653   const char * pattern;
26654
26655   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26656
26657   switch (GET_MODE (x))
26658     {
26659     case E_V2SImode: pattern = "%08x"; break;
26660     case E_V4HImode: pattern = "%04x"; break;
26661     case E_V8QImode: pattern = "%02x"; break;
26662     default:       gcc_unreachable ();
26663     }
26664
26665   fprintf (file, "0x");
26666   for (i = CONST_VECTOR_NUNITS (x); i--;)
26667     {
26668       rtx element;
26669
26670       element = CONST_VECTOR_ELT (x, i);
26671       fprintf (file, pattern, INTVAL (element));
26672     }
26673
26674   return 1;
26675 }
26676
26677 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26678    HFmode constant pool entries are actually loaded with ldr.  */
26679 void
26680 arm_emit_fp16_const (rtx c)
26681 {
26682   long bits;
26683
26684   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26685   if (WORDS_BIG_ENDIAN)
26686     assemble_zeros (2);
26687   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26688   if (!WORDS_BIG_ENDIAN)
26689     assemble_zeros (2);
26690 }
26691
26692 const char *
26693 arm_output_load_gr (rtx *operands)
26694 {
26695   rtx reg;
26696   rtx offset;
26697   rtx wcgr;
26698   rtx sum;
26699
26700   if (!MEM_P (operands [1])
26701       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26702       || !REG_P (reg = XEXP (sum, 0))
26703       || !CONST_INT_P (offset = XEXP (sum, 1))
26704       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26705     return "wldrw%?\t%0, %1";
26706
26707   /* Fix up an out-of-range load of a GR register.  */
26708   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26709   wcgr = operands[0];
26710   operands[0] = reg;
26711   output_asm_insn ("ldr%?\t%0, %1", operands);
26712
26713   operands[0] = wcgr;
26714   operands[1] = reg;
26715   output_asm_insn ("tmcr%?\t%0, %1", operands);
26716   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26717
26718   return "";
26719 }
26720
26721 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26722
26723    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26724    named arg and all anonymous args onto the stack.
26725    XXX I know the prologue shouldn't be pushing registers, but it is faster
26726    that way.  */
26727
26728 static void
26729 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26730                             machine_mode mode,
26731                             tree type,
26732                             int *pretend_size,
26733                             int second_time ATTRIBUTE_UNUSED)
26734 {
26735   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26736   int nregs;
26737
26738   cfun->machine->uses_anonymous_args = 1;
26739   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26740     {
26741       nregs = pcum->aapcs_ncrn;
26742       if (nregs & 1)
26743         {
26744           int res = arm_needs_doubleword_align (mode, type);
26745           if (res < 0 && warn_psabi)
26746             inform (input_location, "parameter passing for argument of "
26747                     "type %qT changed in GCC 7.1", type);
26748           else if (res > 0)
26749             nregs++;
26750         }
26751     }
26752   else
26753     nregs = pcum->nregs;
26754
26755   if (nregs < NUM_ARG_REGS)
26756     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26757 }
26758
26759 /* We can't rely on the caller doing the proper promotion when
26760    using APCS or ATPCS.  */
26761
26762 static bool
26763 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26764 {
26765     return !TARGET_AAPCS_BASED;
26766 }
26767
26768 static machine_mode
26769 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26770                            machine_mode mode,
26771                            int *punsignedp ATTRIBUTE_UNUSED,
26772                            const_tree fntype ATTRIBUTE_UNUSED,
26773                            int for_return ATTRIBUTE_UNUSED)
26774 {
26775   if (GET_MODE_CLASS (mode) == MODE_INT
26776       && GET_MODE_SIZE (mode) < 4)
26777     return SImode;
26778
26779   return mode;
26780 }
26781
26782
26783 static bool
26784 arm_default_short_enums (void)
26785 {
26786   return ARM_DEFAULT_SHORT_ENUMS;
26787 }
26788
26789
26790 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26791
26792 static bool
26793 arm_align_anon_bitfield (void)
26794 {
26795   return TARGET_AAPCS_BASED;
26796 }
26797
26798
26799 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26800
26801 static tree
26802 arm_cxx_guard_type (void)
26803 {
26804   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26805 }
26806
26807
26808 /* The EABI says test the least significant bit of a guard variable.  */
26809
26810 static bool
26811 arm_cxx_guard_mask_bit (void)
26812 {
26813   return TARGET_AAPCS_BASED;
26814 }
26815
26816
26817 /* The EABI specifies that all array cookies are 8 bytes long.  */
26818
26819 static tree
26820 arm_get_cookie_size (tree type)
26821 {
26822   tree size;
26823
26824   if (!TARGET_AAPCS_BASED)
26825     return default_cxx_get_cookie_size (type);
26826
26827   size = build_int_cst (sizetype, 8);
26828   return size;
26829 }
26830
26831
26832 /* The EABI says that array cookies should also contain the element size.  */
26833
26834 static bool
26835 arm_cookie_has_size (void)
26836 {
26837   return TARGET_AAPCS_BASED;
26838 }
26839
26840
26841 /* The EABI says constructors and destructors should return a pointer to
26842    the object constructed/destroyed.  */
26843
26844 static bool
26845 arm_cxx_cdtor_returns_this (void)
26846 {
26847   return TARGET_AAPCS_BASED;
26848 }
26849
26850 /* The EABI says that an inline function may never be the key
26851    method.  */
26852
26853 static bool
26854 arm_cxx_key_method_may_be_inline (void)
26855 {
26856   return !TARGET_AAPCS_BASED;
26857 }
26858
26859 static void
26860 arm_cxx_determine_class_data_visibility (tree decl)
26861 {
26862   if (!TARGET_AAPCS_BASED
26863       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26864     return;
26865
26866   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26867      is exported.  However, on systems without dynamic vague linkage,
26868      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26869   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26870     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26871   else
26872     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26873   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26874 }
26875
26876 static bool
26877 arm_cxx_class_data_always_comdat (void)
26878 {
26879   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26880      vague linkage if the class has no key function.  */
26881   return !TARGET_AAPCS_BASED;
26882 }
26883
26884
26885 /* The EABI says __aeabi_atexit should be used to register static
26886    destructors.  */
26887
26888 static bool
26889 arm_cxx_use_aeabi_atexit (void)
26890 {
26891   return TARGET_AAPCS_BASED;
26892 }
26893
26894
26895 void
26896 arm_set_return_address (rtx source, rtx scratch)
26897 {
26898   arm_stack_offsets *offsets;
26899   HOST_WIDE_INT delta;
26900   rtx addr, mem;
26901   unsigned long saved_regs;
26902
26903   offsets = arm_get_frame_offsets ();
26904   saved_regs = offsets->saved_regs_mask;
26905
26906   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26907     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26908   else
26909     {
26910       if (frame_pointer_needed)
26911         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26912       else
26913         {
26914           /* LR will be the first saved register.  */
26915           delta = offsets->outgoing_args - (offsets->frame + 4);
26916
26917
26918           if (delta >= 4096)
26919             {
26920               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26921                                      GEN_INT (delta & ~4095)));
26922               addr = scratch;
26923               delta &= 4095;
26924             }
26925           else
26926             addr = stack_pointer_rtx;
26927
26928           addr = plus_constant (Pmode, addr, delta);
26929         }
26930
26931       /* The store needs to be marked to prevent DSE from deleting
26932          it as dead if it is based on fp.  */
26933       mem = gen_frame_mem (Pmode, addr);
26934       MEM_VOLATILE_P (mem) = true;
26935       emit_move_insn (mem, source);
26936     }
26937 }
26938
26939
26940 void
26941 thumb_set_return_address (rtx source, rtx scratch)
26942 {
26943   arm_stack_offsets *offsets;
26944   HOST_WIDE_INT delta;
26945   HOST_WIDE_INT limit;
26946   int reg;
26947   rtx addr, mem;
26948   unsigned long mask;
26949
26950   emit_use (source);
26951
26952   offsets = arm_get_frame_offsets ();
26953   mask = offsets->saved_regs_mask;
26954   if (mask & (1 << LR_REGNUM))
26955     {
26956       limit = 1024;
26957       /* Find the saved regs.  */
26958       if (frame_pointer_needed)
26959         {
26960           delta = offsets->soft_frame - offsets->saved_args;
26961           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26962           if (TARGET_THUMB1)
26963             limit = 128;
26964         }
26965       else
26966         {
26967           delta = offsets->outgoing_args - offsets->saved_args;
26968           reg = SP_REGNUM;
26969         }
26970       /* Allow for the stack frame.  */
26971       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26972         delta -= 16;
26973       /* The link register is always the first saved register.  */
26974       delta -= 4;
26975
26976       /* Construct the address.  */
26977       addr = gen_rtx_REG (SImode, reg);
26978       if (delta > limit)
26979         {
26980           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26981           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26982           addr = scratch;
26983         }
26984       else
26985         addr = plus_constant (Pmode, addr, delta);
26986
26987       /* The store needs to be marked to prevent DSE from deleting
26988          it as dead if it is based on fp.  */
26989       mem = gen_frame_mem (Pmode, addr);
26990       MEM_VOLATILE_P (mem) = true;
26991       emit_move_insn (mem, source);
26992     }
26993   else
26994     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26995 }
26996
26997 /* Implements target hook vector_mode_supported_p.  */
26998 bool
26999 arm_vector_mode_supported_p (machine_mode mode)
27000 {
27001   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27002   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27003       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27004       || mode == V2DImode || mode == V8HFmode))
27005     return true;
27006
27007   if ((TARGET_NEON || TARGET_IWMMXT)
27008       && ((mode == V2SImode)
27009           || (mode == V4HImode)
27010           || (mode == V8QImode)))
27011     return true;
27012
27013   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27014       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27015       || mode == V2HAmode))
27016     return true;
27017
27018   return false;
27019 }
27020
27021 /* Implements target hook array_mode_supported_p.  */
27022
27023 static bool
27024 arm_array_mode_supported_p (machine_mode mode,
27025                             unsigned HOST_WIDE_INT nelems)
27026 {
27027   if (TARGET_NEON
27028       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27029       && (nelems >= 2 && nelems <= 4))
27030     return true;
27031
27032   return false;
27033 }
27034
27035 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27036    registers when autovectorizing for Neon, at least until multiple vector
27037    widths are supported properly by the middle-end.  */
27038
27039 static machine_mode
27040 arm_preferred_simd_mode (scalar_mode mode)
27041 {
27042   if (TARGET_NEON)
27043     switch (mode)
27044       {
27045       case E_SFmode:
27046         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27047       case E_SImode:
27048         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27049       case E_HImode:
27050         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27051       case E_QImode:
27052         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27053       case E_DImode:
27054         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27055           return V2DImode;
27056         break;
27057
27058       default:;
27059       }
27060
27061   if (TARGET_REALLY_IWMMXT)
27062     switch (mode)
27063       {
27064       case E_SImode:
27065         return V2SImode;
27066       case E_HImode:
27067         return V4HImode;
27068       case E_QImode:
27069         return V8QImode;
27070
27071       default:;
27072       }
27073
27074   return word_mode;
27075 }
27076
27077 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27078
27079    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27080    using r0-r4 for function arguments, r7 for the stack frame and don't have
27081    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27082    potentially problematic instructions accept high registers so this is not
27083    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27084    that require many low registers.  */
27085 static bool
27086 arm_class_likely_spilled_p (reg_class_t rclass)
27087 {
27088   if ((TARGET_THUMB1 && rclass == LO_REGS)
27089       || rclass  == CC_REG)
27090     return true;
27091
27092   return false;
27093 }
27094
27095 /* Implements target hook small_register_classes_for_mode_p.  */
27096 bool
27097 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27098 {
27099   return TARGET_THUMB1;
27100 }
27101
27102 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27103    ARM insns and therefore guarantee that the shift count is modulo 256.
27104    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27105    guarantee no particular behavior for out-of-range counts.  */
27106
27107 static unsigned HOST_WIDE_INT
27108 arm_shift_truncation_mask (machine_mode mode)
27109 {
27110   return mode == SImode ? 255 : 0;
27111 }
27112
27113
27114 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27115
27116 unsigned int
27117 arm_dbx_register_number (unsigned int regno)
27118 {
27119   if (regno < 16)
27120     return regno;
27121
27122   if (IS_VFP_REGNUM (regno))
27123     {
27124       /* See comment in arm_dwarf_register_span.  */
27125       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27126         return 64 + regno - FIRST_VFP_REGNUM;
27127       else
27128         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27129     }
27130
27131   if (IS_IWMMXT_GR_REGNUM (regno))
27132     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27133
27134   if (IS_IWMMXT_REGNUM (regno))
27135     return 112 + regno - FIRST_IWMMXT_REGNUM;
27136
27137   return DWARF_FRAME_REGISTERS;
27138 }
27139
27140 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27141    GCC models tham as 64 32-bit registers, so we need to describe this to
27142    the DWARF generation code.  Other registers can use the default.  */
27143 static rtx
27144 arm_dwarf_register_span (rtx rtl)
27145 {
27146   machine_mode mode;
27147   unsigned regno;
27148   rtx parts[16];
27149   int nregs;
27150   int i;
27151
27152   regno = REGNO (rtl);
27153   if (!IS_VFP_REGNUM (regno))
27154     return NULL_RTX;
27155
27156   /* XXX FIXME: The EABI defines two VFP register ranges:
27157         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27158         256-287: D0-D31
27159      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27160      corresponding D register.  Until GDB supports this, we shall use the
27161      legacy encodings.  We also use these encodings for D0-D15 for
27162      compatibility with older debuggers.  */
27163   mode = GET_MODE (rtl);
27164   if (GET_MODE_SIZE (mode) < 8)
27165     return NULL_RTX;
27166
27167   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27168     {
27169       nregs = GET_MODE_SIZE (mode) / 4;
27170       for (i = 0; i < nregs; i += 2)
27171         if (TARGET_BIG_END)
27172           {
27173             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27174             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27175           }
27176         else
27177           {
27178             parts[i] = gen_rtx_REG (SImode, regno + i);
27179             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27180           }
27181     }
27182   else
27183     {
27184       nregs = GET_MODE_SIZE (mode) / 8;
27185       for (i = 0; i < nregs; i++)
27186         parts[i] = gen_rtx_REG (DImode, regno + i);
27187     }
27188
27189   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27190 }
27191
27192 #if ARM_UNWIND_INFO
27193 /* Emit unwind directives for a store-multiple instruction or stack pointer
27194    push during alignment.
27195    These should only ever be generated by the function prologue code, so
27196    expect them to have a particular form.
27197    The store-multiple instruction sometimes pushes pc as the last register,
27198    although it should not be tracked into unwind information, or for -Os
27199    sometimes pushes some dummy registers before first register that needs
27200    to be tracked in unwind information; such dummy registers are there just
27201    to avoid separate stack adjustment, and will not be restored in the
27202    epilogue.  */
27203
27204 static void
27205 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27206 {
27207   int i;
27208   HOST_WIDE_INT offset;
27209   HOST_WIDE_INT nregs;
27210   int reg_size;
27211   unsigned reg;
27212   unsigned lastreg;
27213   unsigned padfirst = 0, padlast = 0;
27214   rtx e;
27215
27216   e = XVECEXP (p, 0, 0);
27217   gcc_assert (GET_CODE (e) == SET);
27218
27219   /* First insn will adjust the stack pointer.  */
27220   gcc_assert (GET_CODE (e) == SET
27221               && REG_P (SET_DEST (e))
27222               && REGNO (SET_DEST (e)) == SP_REGNUM
27223               && GET_CODE (SET_SRC (e)) == PLUS);
27224
27225   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27226   nregs = XVECLEN (p, 0) - 1;
27227   gcc_assert (nregs);
27228
27229   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27230   if (reg < 16)
27231     {
27232       /* For -Os dummy registers can be pushed at the beginning to
27233          avoid separate stack pointer adjustment.  */
27234       e = XVECEXP (p, 0, 1);
27235       e = XEXP (SET_DEST (e), 0);
27236       if (GET_CODE (e) == PLUS)
27237         padfirst = INTVAL (XEXP (e, 1));
27238       gcc_assert (padfirst == 0 || optimize_size);
27239       /* The function prologue may also push pc, but not annotate it as it is
27240          never restored.  We turn this into a stack pointer adjustment.  */
27241       e = XVECEXP (p, 0, nregs);
27242       e = XEXP (SET_DEST (e), 0);
27243       if (GET_CODE (e) == PLUS)
27244         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27245       else
27246         padlast = offset - 4;
27247       gcc_assert (padlast == 0 || padlast == 4);
27248       if (padlast == 4)
27249         fprintf (asm_out_file, "\t.pad #4\n");
27250       reg_size = 4;
27251       fprintf (asm_out_file, "\t.save {");
27252     }
27253   else if (IS_VFP_REGNUM (reg))
27254     {
27255       reg_size = 8;
27256       fprintf (asm_out_file, "\t.vsave {");
27257     }
27258   else
27259     /* Unknown register type.  */
27260     gcc_unreachable ();
27261
27262   /* If the stack increment doesn't match the size of the saved registers,
27263      something has gone horribly wrong.  */
27264   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27265
27266   offset = padfirst;
27267   lastreg = 0;
27268   /* The remaining insns will describe the stores.  */
27269   for (i = 1; i <= nregs; i++)
27270     {
27271       /* Expect (set (mem <addr>) (reg)).
27272          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27273       e = XVECEXP (p, 0, i);
27274       gcc_assert (GET_CODE (e) == SET
27275                   && MEM_P (SET_DEST (e))
27276                   && REG_P (SET_SRC (e)));
27277
27278       reg = REGNO (SET_SRC (e));
27279       gcc_assert (reg >= lastreg);
27280
27281       if (i != 1)
27282         fprintf (asm_out_file, ", ");
27283       /* We can't use %r for vfp because we need to use the
27284          double precision register names.  */
27285       if (IS_VFP_REGNUM (reg))
27286         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27287       else
27288         asm_fprintf (asm_out_file, "%r", reg);
27289
27290       if (flag_checking)
27291         {
27292           /* Check that the addresses are consecutive.  */
27293           e = XEXP (SET_DEST (e), 0);
27294           if (GET_CODE (e) == PLUS)
27295             gcc_assert (REG_P (XEXP (e, 0))
27296                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27297                         && CONST_INT_P (XEXP (e, 1))
27298                         && offset == INTVAL (XEXP (e, 1)));
27299           else
27300             gcc_assert (i == 1
27301                         && REG_P (e)
27302                         && REGNO (e) == SP_REGNUM);
27303           offset += reg_size;
27304         }
27305     }
27306   fprintf (asm_out_file, "}\n");
27307   if (padfirst)
27308     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27309 }
27310
27311 /*  Emit unwind directives for a SET.  */
27312
27313 static void
27314 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27315 {
27316   rtx e0;
27317   rtx e1;
27318   unsigned reg;
27319
27320   e0 = XEXP (p, 0);
27321   e1 = XEXP (p, 1);
27322   switch (GET_CODE (e0))
27323     {
27324     case MEM:
27325       /* Pushing a single register.  */
27326       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27327           || !REG_P (XEXP (XEXP (e0, 0), 0))
27328           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27329         abort ();
27330
27331       asm_fprintf (asm_out_file, "\t.save ");
27332       if (IS_VFP_REGNUM (REGNO (e1)))
27333         asm_fprintf(asm_out_file, "{d%d}\n",
27334                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27335       else
27336         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27337       break;
27338
27339     case REG:
27340       if (REGNO (e0) == SP_REGNUM)
27341         {
27342           /* A stack increment.  */
27343           if (GET_CODE (e1) != PLUS
27344               || !REG_P (XEXP (e1, 0))
27345               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27346               || !CONST_INT_P (XEXP (e1, 1)))
27347             abort ();
27348
27349           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27350                        -INTVAL (XEXP (e1, 1)));
27351         }
27352       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27353         {
27354           HOST_WIDE_INT offset;
27355
27356           if (GET_CODE (e1) == PLUS)
27357             {
27358               if (!REG_P (XEXP (e1, 0))
27359                   || !CONST_INT_P (XEXP (e1, 1)))
27360                 abort ();
27361               reg = REGNO (XEXP (e1, 0));
27362               offset = INTVAL (XEXP (e1, 1));
27363               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27364                            HARD_FRAME_POINTER_REGNUM, reg,
27365                            offset);
27366             }
27367           else if (REG_P (e1))
27368             {
27369               reg = REGNO (e1);
27370               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27371                            HARD_FRAME_POINTER_REGNUM, reg);
27372             }
27373           else
27374             abort ();
27375         }
27376       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27377         {
27378           /* Move from sp to reg.  */
27379           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27380         }
27381      else if (GET_CODE (e1) == PLUS
27382               && REG_P (XEXP (e1, 0))
27383               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27384               && CONST_INT_P (XEXP (e1, 1)))
27385         {
27386           /* Set reg to offset from sp.  */
27387           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27388                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27389         }
27390       else
27391         abort ();
27392       break;
27393
27394     default:
27395       abort ();
27396     }
27397 }
27398
27399
27400 /* Emit unwind directives for the given insn.  */
27401
27402 static void
27403 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27404 {
27405   rtx note, pat;
27406   bool handled_one = false;
27407
27408   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27409     return;
27410
27411   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27412       && (TREE_NOTHROW (current_function_decl)
27413           || crtl->all_throwers_are_sibcalls))
27414     return;
27415
27416   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27417     return;
27418
27419   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27420     {
27421       switch (REG_NOTE_KIND (note))
27422         {
27423         case REG_FRAME_RELATED_EXPR:
27424           pat = XEXP (note, 0);
27425           goto found;
27426
27427         case REG_CFA_REGISTER:
27428           pat = XEXP (note, 0);
27429           if (pat == NULL)
27430             {
27431               pat = PATTERN (insn);
27432               if (GET_CODE (pat) == PARALLEL)
27433                 pat = XVECEXP (pat, 0, 0);
27434             }
27435
27436           /* Only emitted for IS_STACKALIGN re-alignment.  */
27437           {
27438             rtx dest, src;
27439             unsigned reg;
27440
27441             src = SET_SRC (pat);
27442             dest = SET_DEST (pat);
27443
27444             gcc_assert (src == stack_pointer_rtx);
27445             reg = REGNO (dest);
27446             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27447                          reg + 0x90, reg);
27448           }
27449           handled_one = true;
27450           break;
27451
27452         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27453            to get correct dwarf information for shrink-wrap.  We should not
27454            emit unwind information for it because these are used either for
27455            pretend arguments or notes to adjust sp and restore registers from
27456            stack.  */
27457         case REG_CFA_DEF_CFA:
27458         case REG_CFA_ADJUST_CFA:
27459         case REG_CFA_RESTORE:
27460           return;
27461
27462         case REG_CFA_EXPRESSION:
27463         case REG_CFA_OFFSET:
27464           /* ??? Only handling here what we actually emit.  */
27465           gcc_unreachable ();
27466
27467         default:
27468           break;
27469         }
27470     }
27471   if (handled_one)
27472     return;
27473   pat = PATTERN (insn);
27474  found:
27475
27476   switch (GET_CODE (pat))
27477     {
27478     case SET:
27479       arm_unwind_emit_set (asm_out_file, pat);
27480       break;
27481
27482     case SEQUENCE:
27483       /* Store multiple.  */
27484       arm_unwind_emit_sequence (asm_out_file, pat);
27485       break;
27486
27487     default:
27488       abort();
27489     }
27490 }
27491
27492
27493 /* Output a reference from a function exception table to the type_info
27494    object X.  The EABI specifies that the symbol should be relocated by
27495    an R_ARM_TARGET2 relocation.  */
27496
27497 static bool
27498 arm_output_ttype (rtx x)
27499 {
27500   fputs ("\t.word\t", asm_out_file);
27501   output_addr_const (asm_out_file, x);
27502   /* Use special relocations for symbol references.  */
27503   if (!CONST_INT_P (x))
27504     fputs ("(TARGET2)", asm_out_file);
27505   fputc ('\n', asm_out_file);
27506
27507   return TRUE;
27508 }
27509
27510 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27511
27512 static void
27513 arm_asm_emit_except_personality (rtx personality)
27514 {
27515   fputs ("\t.personality\t", asm_out_file);
27516   output_addr_const (asm_out_file, personality);
27517   fputc ('\n', asm_out_file);
27518 }
27519 #endif /* ARM_UNWIND_INFO */
27520
27521 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27522
27523 static void
27524 arm_asm_init_sections (void)
27525 {
27526 #if ARM_UNWIND_INFO
27527   exception_section = get_unnamed_section (0, output_section_asm_op,
27528                                            "\t.handlerdata");
27529 #endif /* ARM_UNWIND_INFO */
27530
27531 #ifdef OBJECT_FORMAT_ELF
27532   if (target_pure_code)
27533     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27534 #endif
27535 }
27536
27537 /* Output unwind directives for the start/end of a function.  */
27538
27539 void
27540 arm_output_fn_unwind (FILE * f, bool prologue)
27541 {
27542   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27543     return;
27544
27545   if (prologue)
27546     fputs ("\t.fnstart\n", f);
27547   else
27548     {
27549       /* If this function will never be unwound, then mark it as such.
27550          The came condition is used in arm_unwind_emit to suppress
27551          the frame annotations.  */
27552       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27553           && (TREE_NOTHROW (current_function_decl)
27554               || crtl->all_throwers_are_sibcalls))
27555         fputs("\t.cantunwind\n", f);
27556
27557       fputs ("\t.fnend\n", f);
27558     }
27559 }
27560
27561 static bool
27562 arm_emit_tls_decoration (FILE *fp, rtx x)
27563 {
27564   enum tls_reloc reloc;
27565   rtx val;
27566
27567   val = XVECEXP (x, 0, 0);
27568   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27569
27570   output_addr_const (fp, val);
27571
27572   switch (reloc)
27573     {
27574     case TLS_GD32:
27575       fputs ("(tlsgd)", fp);
27576       break;
27577     case TLS_LDM32:
27578       fputs ("(tlsldm)", fp);
27579       break;
27580     case TLS_LDO32:
27581       fputs ("(tlsldo)", fp);
27582       break;
27583     case TLS_IE32:
27584       fputs ("(gottpoff)", fp);
27585       break;
27586     case TLS_LE32:
27587       fputs ("(tpoff)", fp);
27588       break;
27589     case TLS_DESCSEQ:
27590       fputs ("(tlsdesc)", fp);
27591       break;
27592     default:
27593       gcc_unreachable ();
27594     }
27595
27596   switch (reloc)
27597     {
27598     case TLS_GD32:
27599     case TLS_LDM32:
27600     case TLS_IE32:
27601     case TLS_DESCSEQ:
27602       fputs (" + (. - ", fp);
27603       output_addr_const (fp, XVECEXP (x, 0, 2));
27604       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27605       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27606       output_addr_const (fp, XVECEXP (x, 0, 3));
27607       fputc (')', fp);
27608       break;
27609     default:
27610       break;
27611     }
27612
27613   return TRUE;
27614 }
27615
27616 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27617
27618 static void
27619 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27620 {
27621   gcc_assert (size == 4);
27622   fputs ("\t.word\t", file);
27623   output_addr_const (file, x);
27624   fputs ("(tlsldo)", file);
27625 }
27626
27627 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27628
27629 static bool
27630 arm_output_addr_const_extra (FILE *fp, rtx x)
27631 {
27632   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27633     return arm_emit_tls_decoration (fp, x);
27634   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27635     {
27636       char label[256];
27637       int labelno = INTVAL (XVECEXP (x, 0, 0));
27638
27639       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27640       assemble_name_raw (fp, label);
27641
27642       return TRUE;
27643     }
27644   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27645     {
27646       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27647       if (GOT_PCREL)
27648         fputs ("+.", fp);
27649       fputs ("-(", fp);
27650       output_addr_const (fp, XVECEXP (x, 0, 0));
27651       fputc (')', fp);
27652       return TRUE;
27653     }
27654   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27655     {
27656       output_addr_const (fp, XVECEXP (x, 0, 0));
27657       if (GOT_PCREL)
27658         fputs ("+.", fp);
27659       fputs ("-(", fp);
27660       output_addr_const (fp, XVECEXP (x, 0, 1));
27661       fputc (')', fp);
27662       return TRUE;
27663     }
27664   else if (GET_CODE (x) == CONST_VECTOR)
27665     return arm_emit_vector_const (fp, x);
27666
27667   return FALSE;
27668 }
27669
27670 /* Output assembly for a shift instruction.
27671    SET_FLAGS determines how the instruction modifies the condition codes.
27672    0 - Do not set condition codes.
27673    1 - Set condition codes.
27674    2 - Use smallest instruction.  */
27675 const char *
27676 arm_output_shift(rtx * operands, int set_flags)
27677 {
27678   char pattern[100];
27679   static const char flag_chars[3] = {'?', '.', '!'};
27680   const char *shift;
27681   HOST_WIDE_INT val;
27682   char c;
27683
27684   c = flag_chars[set_flags];
27685   shift = shift_op(operands[3], &val);
27686   if (shift)
27687     {
27688       if (val != -1)
27689         operands[2] = GEN_INT(val);
27690       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27691     }
27692   else
27693     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27694
27695   output_asm_insn (pattern, operands);
27696   return "";
27697 }
27698
27699 /* Output assembly for a WMMX immediate shift instruction.  */
27700 const char *
27701 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27702 {
27703   int shift = INTVAL (operands[2]);
27704   char templ[50];
27705   machine_mode opmode = GET_MODE (operands[0]);
27706
27707   gcc_assert (shift >= 0);
27708
27709   /* If the shift value in the register versions is > 63 (for D qualifier),
27710      31 (for W qualifier) or 15 (for H qualifier).  */
27711   if (((opmode == V4HImode) && (shift > 15))
27712         || ((opmode == V2SImode) && (shift > 31))
27713         || ((opmode == DImode) && (shift > 63)))
27714   {
27715     if (wror_or_wsra)
27716       {
27717         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27718         output_asm_insn (templ, operands);
27719         if (opmode == DImode)
27720           {
27721             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27722             output_asm_insn (templ, operands);
27723           }
27724       }
27725     else
27726       {
27727         /* The destination register will contain all zeros.  */
27728         sprintf (templ, "wzero\t%%0");
27729         output_asm_insn (templ, operands);
27730       }
27731     return "";
27732   }
27733
27734   if ((opmode == DImode) && (shift > 32))
27735     {
27736       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27737       output_asm_insn (templ, operands);
27738       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27739       output_asm_insn (templ, operands);
27740     }
27741   else
27742     {
27743       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27744       output_asm_insn (templ, operands);
27745     }
27746   return "";
27747 }
27748
27749 /* Output assembly for a WMMX tinsr instruction.  */
27750 const char *
27751 arm_output_iwmmxt_tinsr (rtx *operands)
27752 {
27753   int mask = INTVAL (operands[3]);
27754   int i;
27755   char templ[50];
27756   int units = mode_nunits[GET_MODE (operands[0])];
27757   gcc_assert ((mask & (mask - 1)) == 0);
27758   for (i = 0; i < units; ++i)
27759     {
27760       if ((mask & 0x01) == 1)
27761         {
27762           break;
27763         }
27764       mask >>= 1;
27765     }
27766   gcc_assert (i < units);
27767   {
27768     switch (GET_MODE (operands[0]))
27769       {
27770       case E_V8QImode:
27771         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27772         break;
27773       case E_V4HImode:
27774         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27775         break;
27776       case E_V2SImode:
27777         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27778         break;
27779       default:
27780         gcc_unreachable ();
27781         break;
27782       }
27783     output_asm_insn (templ, operands);
27784   }
27785   return "";
27786 }
27787
27788 /* Output a Thumb-1 casesi dispatch sequence.  */
27789 const char *
27790 thumb1_output_casesi (rtx *operands)
27791 {
27792   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27793
27794   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27795
27796   switch (GET_MODE(diff_vec))
27797     {
27798     case E_QImode:
27799       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27800               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27801     case E_HImode:
27802       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27803               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27804     case E_SImode:
27805       return "bl\t%___gnu_thumb1_case_si";
27806     default:
27807       gcc_unreachable ();
27808     }
27809 }
27810
27811 /* Output a Thumb-2 casesi instruction.  */
27812 const char *
27813 thumb2_output_casesi (rtx *operands)
27814 {
27815   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27816
27817   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27818
27819   output_asm_insn ("cmp\t%0, %1", operands);
27820   output_asm_insn ("bhi\t%l3", operands);
27821   switch (GET_MODE(diff_vec))
27822     {
27823     case E_QImode:
27824       return "tbb\t[%|pc, %0]";
27825     case E_HImode:
27826       return "tbh\t[%|pc, %0, lsl #1]";
27827     case E_SImode:
27828       if (flag_pic)
27829         {
27830           output_asm_insn ("adr\t%4, %l2", operands);
27831           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27832           output_asm_insn ("add\t%4, %4, %5", operands);
27833           return "bx\t%4";
27834         }
27835       else
27836         {
27837           output_asm_insn ("adr\t%4, %l2", operands);
27838           return "ldr\t%|pc, [%4, %0, lsl #2]";
27839         }
27840     default:
27841       gcc_unreachable ();
27842     }
27843 }
27844
27845 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27846    per-core tuning structs.  */
27847 static int
27848 arm_issue_rate (void)
27849 {
27850   return current_tune->issue_rate;
27851 }
27852
27853 /* Return how many instructions should scheduler lookahead to choose the
27854    best one.  */
27855 static int
27856 arm_first_cycle_multipass_dfa_lookahead (void)
27857 {
27858   int issue_rate = arm_issue_rate ();
27859
27860   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27861 }
27862
27863 /* Enable modeling of L2 auto-prefetcher.  */
27864 static int
27865 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27866 {
27867   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27868 }
27869
27870 const char *
27871 arm_mangle_type (const_tree type)
27872 {
27873   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27874      has to be managled as if it is in the "std" namespace.  */
27875   if (TARGET_AAPCS_BASED
27876       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27877     return "St9__va_list";
27878
27879   /* Half-precision float.  */
27880   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27881     return "Dh";
27882
27883   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27884      builtin type.  */
27885   if (TYPE_NAME (type) != NULL)
27886     return arm_mangle_builtin_type (type);
27887
27888   /* Use the default mangling.  */
27889   return NULL;
27890 }
27891
27892 /* Order of allocation of core registers for Thumb: this allocation is
27893    written over the corresponding initial entries of the array
27894    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27895    first.  Saving and restoring a low register is usually cheaper than
27896    using a call-clobbered high register.  */
27897
27898 static const int thumb_core_reg_alloc_order[] =
27899 {
27900    3,  2,  1,  0,  4,  5,  6,  7,
27901   12, 14,  8,  9, 10, 11
27902 };
27903
27904 /* Adjust register allocation order when compiling for Thumb.  */
27905
27906 void
27907 arm_order_regs_for_local_alloc (void)
27908 {
27909   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27910   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27911   if (TARGET_THUMB)
27912     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27913             sizeof (thumb_core_reg_alloc_order));
27914 }
27915
27916 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27917
27918 bool
27919 arm_frame_pointer_required (void)
27920 {
27921   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27922     return true;
27923
27924   /* If the function receives nonlocal gotos, it needs to save the frame
27925      pointer in the nonlocal_goto_save_area object.  */
27926   if (cfun->has_nonlocal_label)
27927     return true;
27928
27929   /* The frame pointer is required for non-leaf APCS frames.  */
27930   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27931     return true;
27932
27933   /* If we are probing the stack in the prologue, we will have a faulting
27934      instruction prior to the stack adjustment and this requires a frame
27935      pointer if we want to catch the exception using the EABI unwinder.  */
27936   if (!IS_INTERRUPT (arm_current_func_type ())
27937       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27938           || flag_stack_clash_protection)
27939       && arm_except_unwind_info (&global_options) == UI_TARGET
27940       && cfun->can_throw_non_call_exceptions)
27941     {
27942       HOST_WIDE_INT size = get_frame_size ();
27943
27944       /* That's irrelevant if there is no stack adjustment.  */
27945       if (size <= 0)
27946         return false;
27947
27948       /* That's relevant only if there is a stack probe.  */
27949       if (crtl->is_leaf && !cfun->calls_alloca)
27950         {
27951           /* We don't have the final size of the frame so adjust.  */
27952           size += 32 * UNITS_PER_WORD;
27953           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27954             return true;
27955         }
27956       else
27957         return true;
27958     }
27959
27960   return false;
27961 }
27962
27963 /* Only thumb1 can't support conditional execution, so return true if
27964    the target is not thumb1.  */
27965 static bool
27966 arm_have_conditional_execution (void)
27967 {
27968   return !TARGET_THUMB1;
27969 }
27970
27971 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27972 static HOST_WIDE_INT
27973 arm_vector_alignment (const_tree type)
27974 {
27975   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27976
27977   if (TARGET_AAPCS_BASED)
27978     align = MIN (align, 64);
27979
27980   return align;
27981 }
27982
27983 static unsigned int
27984 arm_autovectorize_vector_sizes (void)
27985 {
27986   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27987 }
27988
27989 static bool
27990 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27991 {
27992   /* Vectors which aren't in packed structures will not be less aligned than
27993      the natural alignment of their element type, so this is safe.  */
27994   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27995     return !is_packed;
27996
27997   return default_builtin_vector_alignment_reachable (type, is_packed);
27998 }
27999
28000 static bool
28001 arm_builtin_support_vector_misalignment (machine_mode mode,
28002                                          const_tree type, int misalignment,
28003                                          bool is_packed)
28004 {
28005   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28006     {
28007       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28008
28009       if (is_packed)
28010         return align == 1;
28011
28012       /* If the misalignment is unknown, we should be able to handle the access
28013          so long as it is not to a member of a packed data structure.  */
28014       if (misalignment == -1)
28015         return true;
28016
28017       /* Return true if the misalignment is a multiple of the natural alignment
28018          of the vector's element type.  This is probably always going to be
28019          true in practice, since we've already established that this isn't a
28020          packed access.  */
28021       return ((misalignment % align) == 0);
28022     }
28023
28024   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28025                                                       is_packed);
28026 }
28027
28028 static void
28029 arm_conditional_register_usage (void)
28030 {
28031   int regno;
28032
28033   if (TARGET_THUMB1 && optimize_size)
28034     {
28035       /* When optimizing for size on Thumb-1, it's better not
28036         to use the HI regs, because of the overhead of
28037         stacking them.  */
28038       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28039         fixed_regs[regno] = call_used_regs[regno] = 1;
28040     }
28041
28042   /* The link register can be clobbered by any branch insn,
28043      but we have no way to track that at present, so mark
28044      it as unavailable.  */
28045   if (TARGET_THUMB1)
28046     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28047
28048   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28049     {
28050       /* VFPv3 registers are disabled when earlier VFP
28051          versions are selected due to the definition of
28052          LAST_VFP_REGNUM.  */
28053       for (regno = FIRST_VFP_REGNUM;
28054            regno <= LAST_VFP_REGNUM; ++ regno)
28055         {
28056           fixed_regs[regno] = 0;
28057           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28058             || regno >= FIRST_VFP_REGNUM + 32;
28059         }
28060     }
28061
28062   if (TARGET_REALLY_IWMMXT)
28063     {
28064       regno = FIRST_IWMMXT_GR_REGNUM;
28065       /* The 2002/10/09 revision of the XScale ABI has wCG0
28066          and wCG1 as call-preserved registers.  The 2002/11/21
28067          revision changed this so that all wCG registers are
28068          scratch registers.  */
28069       for (regno = FIRST_IWMMXT_GR_REGNUM;
28070            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28071         fixed_regs[regno] = 0;
28072       /* The XScale ABI has wR0 - wR9 as scratch registers,
28073          the rest as call-preserved registers.  */
28074       for (regno = FIRST_IWMMXT_REGNUM;
28075            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28076         {
28077           fixed_regs[regno] = 0;
28078           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28079         }
28080     }
28081
28082   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28083     {
28084       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28085       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28086     }
28087   else if (TARGET_APCS_STACK)
28088     {
28089       fixed_regs[10]     = 1;
28090       call_used_regs[10] = 1;
28091     }
28092   /* -mcaller-super-interworking reserves r11 for calls to
28093      _interwork_r11_call_via_rN().  Making the register global
28094      is an easy way of ensuring that it remains valid for all
28095      calls.  */
28096   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28097       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28098     {
28099       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28100       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28101       if (TARGET_CALLER_INTERWORKING)
28102         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28103     }
28104   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28105 }
28106
28107 static reg_class_t
28108 arm_preferred_rename_class (reg_class_t rclass)
28109 {
28110   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28111      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28112      and code size can be reduced.  */
28113   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28114     return LO_REGS;
28115   else
28116     return NO_REGS;
28117 }
28118
28119 /* Compute the attribute "length" of insn "*push_multi".
28120    So this function MUST be kept in sync with that insn pattern.  */
28121 int
28122 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28123 {
28124   int i, regno, hi_reg;
28125   int num_saves = XVECLEN (parallel_op, 0);
28126
28127   /* ARM mode.  */
28128   if (TARGET_ARM)
28129     return 4;
28130   /* Thumb1 mode.  */
28131   if (TARGET_THUMB1)
28132     return 2;
28133
28134   /* Thumb2 mode.  */
28135   regno = REGNO (first_op);
28136   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28137      list is 8-bit.  Normally this means all registers in the list must be
28138      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28139      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28140      with 16-bit encoding.  */
28141   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28142   for (i = 1; i < num_saves && !hi_reg; i++)
28143     {
28144       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28145       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28146     }
28147
28148   if (!hi_reg)
28149     return 2;
28150   return 4;
28151 }
28152
28153 /* Compute the attribute "length" of insn.  Currently, this function is used
28154    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28155    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28156    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28157    true if OPERANDS contains insn which explicit updates base register.  */
28158
28159 int
28160 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28161 {
28162   /* ARM mode.  */
28163   if (TARGET_ARM)
28164     return 4;
28165   /* Thumb1 mode.  */
28166   if (TARGET_THUMB1)
28167     return 2;
28168
28169   rtx parallel_op = operands[0];
28170   /* Initialize to elements number of PARALLEL.  */
28171   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28172   /* Initialize the value to base register.  */
28173   unsigned regno = REGNO (operands[1]);
28174   /* Skip return and write back pattern.
28175      We only need register pop pattern for later analysis.  */
28176   unsigned first_indx = 0;
28177   first_indx += return_pc ? 1 : 0;
28178   first_indx += write_back_p ? 1 : 0;
28179
28180   /* A pop operation can be done through LDM or POP.  If the base register is SP
28181      and if it's with write back, then a LDM will be alias of POP.  */
28182   bool pop_p = (regno == SP_REGNUM && write_back_p);
28183   bool ldm_p = !pop_p;
28184
28185   /* Check base register for LDM.  */
28186   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28187     return 4;
28188
28189   /* Check each register in the list.  */
28190   for (; indx >= first_indx; indx--)
28191     {
28192       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28193       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28194          comment in arm_attr_length_push_multi.  */
28195       if (REGNO_REG_CLASS (regno) == HI_REGS
28196           && (regno != PC_REGNUM || ldm_p))
28197         return 4;
28198     }
28199
28200   return 2;
28201 }
28202
28203 /* Compute the number of instructions emitted by output_move_double.  */
28204 int
28205 arm_count_output_move_double_insns (rtx *operands)
28206 {
28207   int count;
28208   rtx ops[2];
28209   /* output_move_double may modify the operands array, so call it
28210      here on a copy of the array.  */
28211   ops[0] = operands[0];
28212   ops[1] = operands[1];
28213   output_move_double (ops, false, &count);
28214   return count;
28215 }
28216
28217 int
28218 vfp3_const_double_for_fract_bits (rtx operand)
28219 {
28220   REAL_VALUE_TYPE r0;
28221
28222   if (!CONST_DOUBLE_P (operand))
28223     return 0;
28224
28225   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28226   if (exact_real_inverse (DFmode, &r0)
28227       && !REAL_VALUE_NEGATIVE (r0))
28228     {
28229       if (exact_real_truncate (DFmode, &r0))
28230         {
28231           HOST_WIDE_INT value = real_to_integer (&r0);
28232           value = value & 0xffffffff;
28233           if ((value != 0) && ( (value & (value - 1)) == 0))
28234             {
28235               int ret = exact_log2 (value);
28236               gcc_assert (IN_RANGE (ret, 0, 31));
28237               return ret;
28238             }
28239         }
28240     }
28241   return 0;
28242 }
28243
28244 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28245    log2 is in [1, 32], return that log2.  Otherwise return -1.
28246    This is used in the patterns for vcvt.s32.f32 floating-point to
28247    fixed-point conversions.  */
28248
28249 int
28250 vfp3_const_double_for_bits (rtx x)
28251 {
28252   const REAL_VALUE_TYPE *r;
28253
28254   if (!CONST_DOUBLE_P (x))
28255     return -1;
28256
28257   r = CONST_DOUBLE_REAL_VALUE (x);
28258
28259   if (REAL_VALUE_NEGATIVE (*r)
28260       || REAL_VALUE_ISNAN (*r)
28261       || REAL_VALUE_ISINF (*r)
28262       || !real_isinteger (r, SFmode))
28263     return -1;
28264
28265   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28266
28267 /* The exact_log2 above will have returned -1 if this is
28268    not an exact log2.  */
28269   if (!IN_RANGE (hwint, 1, 32))
28270     return -1;
28271
28272   return hwint;
28273 }
28274
28275 \f
28276 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28277
28278 static void
28279 arm_pre_atomic_barrier (enum memmodel model)
28280 {
28281   if (need_atomic_barrier_p (model, true))
28282     emit_insn (gen_memory_barrier ());
28283 }
28284
28285 static void
28286 arm_post_atomic_barrier (enum memmodel model)
28287 {
28288   if (need_atomic_barrier_p (model, false))
28289     emit_insn (gen_memory_barrier ());
28290 }
28291
28292 /* Emit the load-exclusive and store-exclusive instructions.
28293    Use acquire and release versions if necessary.  */
28294
28295 static void
28296 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28297 {
28298   rtx (*gen) (rtx, rtx);
28299
28300   if (acq)
28301     {
28302       switch (mode)
28303         {
28304         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28305         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28306         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28307         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28308         default:
28309           gcc_unreachable ();
28310         }
28311     }
28312   else
28313     {
28314       switch (mode)
28315         {
28316         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28317         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28318         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28319         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28320         default:
28321           gcc_unreachable ();
28322         }
28323     }
28324
28325   emit_insn (gen (rval, mem));
28326 }
28327
28328 static void
28329 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28330                           rtx mem, bool rel)
28331 {
28332   rtx (*gen) (rtx, rtx, rtx);
28333
28334   if (rel)
28335     {
28336       switch (mode)
28337         {
28338         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28339         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28340         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28341         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28342         default:
28343           gcc_unreachable ();
28344         }
28345     }
28346   else
28347     {
28348       switch (mode)
28349         {
28350         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28351         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28352         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28353         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28354         default:
28355           gcc_unreachable ();
28356         }
28357     }
28358
28359   emit_insn (gen (bval, rval, mem));
28360 }
28361
28362 /* Mark the previous jump instruction as unlikely.  */
28363
28364 static void
28365 emit_unlikely_jump (rtx insn)
28366 {
28367   rtx_insn *jump = emit_jump_insn (insn);
28368   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28369 }
28370
28371 /* Expand a compare and swap pattern.  */
28372
28373 void
28374 arm_expand_compare_and_swap (rtx operands[])
28375 {
28376   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28377   machine_mode mode;
28378   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28379
28380   bval = operands[0];
28381   rval = operands[1];
28382   mem = operands[2];
28383   oldval = operands[3];
28384   newval = operands[4];
28385   is_weak = operands[5];
28386   mod_s = operands[6];
28387   mod_f = operands[7];
28388   mode = GET_MODE (mem);
28389
28390   /* Normally the succ memory model must be stronger than fail, but in the
28391      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28392      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28393
28394   if (TARGET_HAVE_LDACQ
28395       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28396       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28397     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28398
28399   switch (mode)
28400     {
28401     case E_QImode:
28402     case E_HImode:
28403       /* For narrow modes, we're going to perform the comparison in SImode,
28404          so do the zero-extension now.  */
28405       rval = gen_reg_rtx (SImode);
28406       oldval = convert_modes (SImode, mode, oldval, true);
28407       /* FALLTHRU */
28408
28409     case E_SImode:
28410       /* Force the value into a register if needed.  We waited until after
28411          the zero-extension above to do this properly.  */
28412       if (!arm_add_operand (oldval, SImode))
28413         oldval = force_reg (SImode, oldval);
28414       break;
28415
28416     case E_DImode:
28417       if (!cmpdi_operand (oldval, mode))
28418         oldval = force_reg (mode, oldval);
28419       break;
28420
28421     default:
28422       gcc_unreachable ();
28423     }
28424
28425   if (TARGET_THUMB1)
28426     {
28427       switch (mode)
28428         {
28429         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28430         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28431         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28432         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28433         default:
28434           gcc_unreachable ();
28435         }
28436     }
28437   else
28438     {
28439       switch (mode)
28440         {
28441         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28442         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28443         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28444         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28445         default:
28446           gcc_unreachable ();
28447         }
28448     }
28449
28450   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28451   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28452
28453   if (mode == QImode || mode == HImode)
28454     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28455
28456   /* In all cases, we arrange for success to be signaled by Z set.
28457      This arrangement allows for the boolean result to be used directly
28458      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28459      boolean negation of the result is also stored in bval because Thumb-1
28460      backend lacks dependency tracking for CC flag due to flag-setting not
28461      being represented at RTL level.  */
28462   if (TARGET_THUMB1)
28463       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28464   else
28465     {
28466       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28467       emit_insn (gen_rtx_SET (bval, x));
28468     }
28469 }
28470
28471 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28472    another memory store between the load-exclusive and store-exclusive can
28473    reset the monitor from Exclusive to Open state.  This means we must wait
28474    until after reload to split the pattern, lest we get a register spill in
28475    the middle of the atomic sequence.  Success of the compare and swap is
28476    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28477    for Thumb-1 targets (ie. negation of the boolean value returned by
28478    atomic_compare_and_swapmode standard pattern in operand 0).  */
28479
28480 void
28481 arm_split_compare_and_swap (rtx operands[])
28482 {
28483   rtx rval, mem, oldval, newval, neg_bval;
28484   machine_mode mode;
28485   enum memmodel mod_s, mod_f;
28486   bool is_weak;
28487   rtx_code_label *label1, *label2;
28488   rtx x, cond;
28489
28490   rval = operands[1];
28491   mem = operands[2];
28492   oldval = operands[3];
28493   newval = operands[4];
28494   is_weak = (operands[5] != const0_rtx);
28495   mod_s = memmodel_from_int (INTVAL (operands[6]));
28496   mod_f = memmodel_from_int (INTVAL (operands[7]));
28497   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28498   mode = GET_MODE (mem);
28499
28500   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28501
28502   bool use_acquire = TARGET_HAVE_LDACQ
28503                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28504                           || is_mm_release (mod_s));
28505
28506   bool use_release = TARGET_HAVE_LDACQ
28507                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28508                           || is_mm_acquire (mod_s));
28509
28510   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28511      a full barrier is emitted after the store-release.  */
28512   if (is_armv8_sync)
28513     use_acquire = false;
28514
28515   /* Checks whether a barrier is needed and emits one accordingly.  */
28516   if (!(use_acquire || use_release))
28517     arm_pre_atomic_barrier (mod_s);
28518
28519   label1 = NULL;
28520   if (!is_weak)
28521     {
28522       label1 = gen_label_rtx ();
28523       emit_label (label1);
28524     }
28525   label2 = gen_label_rtx ();
28526
28527   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28528
28529   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28530      as required to communicate with arm_expand_compare_and_swap.  */
28531   if (TARGET_32BIT)
28532     {
28533       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28534       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28535       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28536                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28537       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28538     }
28539   else
28540     {
28541       emit_move_insn (neg_bval, const1_rtx);
28542       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28543       if (thumb1_cmpneg_operand (oldval, SImode))
28544         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28545                                                     label2, cond));
28546       else
28547         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28548     }
28549
28550   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28551
28552   /* Weak or strong, we want EQ to be true for success, so that we
28553      match the flags that we got from the compare above.  */
28554   if (TARGET_32BIT)
28555     {
28556       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28557       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28558       emit_insn (gen_rtx_SET (cond, x));
28559     }
28560
28561   if (!is_weak)
28562     {
28563       /* Z is set to boolean value of !neg_bval, as required to communicate
28564          with arm_expand_compare_and_swap.  */
28565       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28566       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28567     }
28568
28569   if (!is_mm_relaxed (mod_f))
28570     emit_label (label2);
28571
28572   /* Checks whether a barrier is needed and emits one accordingly.  */
28573   if (is_armv8_sync
28574       || !(use_acquire || use_release))
28575     arm_post_atomic_barrier (mod_s);
28576
28577   if (is_mm_relaxed (mod_f))
28578     emit_label (label2);
28579 }
28580
28581 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28582    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28583    operation).  Operation is performed on the content at MEM and on VALUE
28584    following the memory model MODEL_RTX.  The content at MEM before and after
28585    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28586    success of the operation is returned in COND.  Using a scratch register or
28587    an operand register for these determines what result is returned for that
28588    pattern.  */
28589
28590 void
28591 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28592                      rtx value, rtx model_rtx, rtx cond)
28593 {
28594   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28595   machine_mode mode = GET_MODE (mem);
28596   machine_mode wmode = (mode == DImode ? DImode : SImode);
28597   rtx_code_label *label;
28598   bool all_low_regs, bind_old_new;
28599   rtx x;
28600
28601   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28602
28603   bool use_acquire = TARGET_HAVE_LDACQ
28604                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28605                           || is_mm_release (model));
28606
28607   bool use_release = TARGET_HAVE_LDACQ
28608                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28609                           || is_mm_acquire (model));
28610
28611   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28612      a full barrier is emitted after the store-release.  */
28613   if (is_armv8_sync)
28614     use_acquire = false;
28615
28616   /* Checks whether a barrier is needed and emits one accordingly.  */
28617   if (!(use_acquire || use_release))
28618     arm_pre_atomic_barrier (model);
28619
28620   label = gen_label_rtx ();
28621   emit_label (label);
28622
28623   if (new_out)
28624     new_out = gen_lowpart (wmode, new_out);
28625   if (old_out)
28626     old_out = gen_lowpart (wmode, old_out);
28627   else
28628     old_out = new_out;
28629   value = simplify_gen_subreg (wmode, value, mode, 0);
28630
28631   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28632
28633   /* Does the operation require destination and first operand to use the same
28634      register?  This is decided by register constraints of relevant insn
28635      patterns in thumb1.md.  */
28636   gcc_assert (!new_out || REG_P (new_out));
28637   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28638                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28639                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28640   bind_old_new =
28641     (TARGET_THUMB1
28642      && code != SET
28643      && code != MINUS
28644      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28645
28646   /* We want to return the old value while putting the result of the operation
28647      in the same register as the old value so copy the old value over to the
28648      destination register and use that register for the operation.  */
28649   if (old_out && bind_old_new)
28650     {
28651       emit_move_insn (new_out, old_out);
28652       old_out = new_out;
28653     }
28654
28655   switch (code)
28656     {
28657     case SET:
28658       new_out = value;
28659       break;
28660
28661     case NOT:
28662       x = gen_rtx_AND (wmode, old_out, value);
28663       emit_insn (gen_rtx_SET (new_out, x));
28664       x = gen_rtx_NOT (wmode, new_out);
28665       emit_insn (gen_rtx_SET (new_out, x));
28666       break;
28667
28668     case MINUS:
28669       if (CONST_INT_P (value))
28670         {
28671           value = GEN_INT (-INTVAL (value));
28672           code = PLUS;
28673         }
28674       /* FALLTHRU */
28675
28676     case PLUS:
28677       if (mode == DImode)
28678         {
28679           /* DImode plus/minus need to clobber flags.  */
28680           /* The adddi3 and subdi3 patterns are incorrectly written so that
28681              they require matching operands, even when we could easily support
28682              three operands.  Thankfully, this can be fixed up post-splitting,
28683              as the individual add+adc patterns do accept three operands and
28684              post-reload cprop can make these moves go away.  */
28685           emit_move_insn (new_out, old_out);
28686           if (code == PLUS)
28687             x = gen_adddi3 (new_out, new_out, value);
28688           else
28689             x = gen_subdi3 (new_out, new_out, value);
28690           emit_insn (x);
28691           break;
28692         }
28693       /* FALLTHRU */
28694
28695     default:
28696       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28697       emit_insn (gen_rtx_SET (new_out, x));
28698       break;
28699     }
28700
28701   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28702                             use_release);
28703
28704   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28705   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28706
28707   /* Checks whether a barrier is needed and emits one accordingly.  */
28708   if (is_armv8_sync
28709       || !(use_acquire || use_release))
28710     arm_post_atomic_barrier (model);
28711 }
28712 \f
28713 #define MAX_VECT_LEN 16
28714
28715 struct expand_vec_perm_d
28716 {
28717   rtx target, op0, op1;
28718   auto_vec_perm_indices perm;
28719   machine_mode vmode;
28720   bool one_vector_p;
28721   bool testing_p;
28722 };
28723
28724 /* Generate a variable permutation.  */
28725
28726 static void
28727 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28728 {
28729   machine_mode vmode = GET_MODE (target);
28730   bool one_vector_p = rtx_equal_p (op0, op1);
28731
28732   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28733   gcc_checking_assert (GET_MODE (op0) == vmode);
28734   gcc_checking_assert (GET_MODE (op1) == vmode);
28735   gcc_checking_assert (GET_MODE (sel) == vmode);
28736   gcc_checking_assert (TARGET_NEON);
28737
28738   if (one_vector_p)
28739     {
28740       if (vmode == V8QImode)
28741         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28742       else
28743         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28744     }
28745   else
28746     {
28747       rtx pair;
28748
28749       if (vmode == V8QImode)
28750         {
28751           pair = gen_reg_rtx (V16QImode);
28752           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28753           pair = gen_lowpart (TImode, pair);
28754           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28755         }
28756       else
28757         {
28758           pair = gen_reg_rtx (OImode);
28759           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28760           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28761         }
28762     }
28763 }
28764
28765 void
28766 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28767 {
28768   machine_mode vmode = GET_MODE (target);
28769   unsigned int nelt = GET_MODE_NUNITS (vmode);
28770   bool one_vector_p = rtx_equal_p (op0, op1);
28771   rtx mask;
28772
28773   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28774      numbering of elements for big-endian, we must reverse the order.  */
28775   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28776
28777   /* The VTBL instruction does not use a modulo index, so we must take care
28778      of that ourselves.  */
28779   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28780   mask = gen_const_vec_duplicate (vmode, mask);
28781   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28782
28783   arm_expand_vec_perm_1 (target, op0, op1, sel);
28784 }
28785
28786 /* Map lane ordering between architectural lane order, and GCC lane order,
28787    taking into account ABI.  See comment above output_move_neon for details.  */
28788
28789 static int
28790 neon_endian_lane_map (machine_mode mode, int lane)
28791 {
28792   if (BYTES_BIG_ENDIAN)
28793   {
28794     int nelems = GET_MODE_NUNITS (mode);
28795     /* Reverse lane order.  */
28796     lane = (nelems - 1 - lane);
28797     /* Reverse D register order, to match ABI.  */
28798     if (GET_MODE_SIZE (mode) == 16)
28799       lane = lane ^ (nelems / 2);
28800   }
28801   return lane;
28802 }
28803
28804 /* Some permutations index into pairs of vectors, this is a helper function
28805    to map indexes into those pairs of vectors.  */
28806
28807 static int
28808 neon_pair_endian_lane_map (machine_mode mode, int lane)
28809 {
28810   int nelem = GET_MODE_NUNITS (mode);
28811   if (BYTES_BIG_ENDIAN)
28812     lane =
28813       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28814   return lane;
28815 }
28816
28817 /* Generate or test for an insn that supports a constant permutation.  */
28818
28819 /* Recognize patterns for the VUZP insns.  */
28820
28821 static bool
28822 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28823 {
28824   unsigned int i, odd, mask, nelt = d->perm.length ();
28825   rtx out0, out1, in0, in1;
28826   rtx (*gen)(rtx, rtx, rtx, rtx);
28827   int first_elem;
28828   int swap_nelt;
28829
28830   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28831     return false;
28832
28833   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28834      big endian pattern on 64 bit vectors, so we correct for that.  */
28835   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28836     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28837
28838   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28839
28840   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28841     odd = 0;
28842   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28843     odd = 1;
28844   else
28845     return false;
28846   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28847
28848   for (i = 0; i < nelt; i++)
28849     {
28850       unsigned elt =
28851         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28852       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28853         return false;
28854     }
28855
28856   /* Success!  */
28857   if (d->testing_p)
28858     return true;
28859
28860   switch (d->vmode)
28861     {
28862     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28863     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28864     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28865     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28866     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28867     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28868     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28869     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28870     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28871     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28872     default:
28873       gcc_unreachable ();
28874     }
28875
28876   in0 = d->op0;
28877   in1 = d->op1;
28878   if (swap_nelt != 0)
28879     std::swap (in0, in1);
28880
28881   out0 = d->target;
28882   out1 = gen_reg_rtx (d->vmode);
28883   if (odd)
28884     std::swap (out0, out1);
28885
28886   emit_insn (gen (out0, in0, in1, out1));
28887   return true;
28888 }
28889
28890 /* Recognize patterns for the VZIP insns.  */
28891
28892 static bool
28893 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28894 {
28895   unsigned int i, high, mask, nelt = d->perm.length ();
28896   rtx out0, out1, in0, in1;
28897   rtx (*gen)(rtx, rtx, rtx, rtx);
28898   int first_elem;
28899   bool is_swapped;
28900
28901   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28902     return false;
28903
28904   is_swapped = BYTES_BIG_ENDIAN;
28905
28906   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28907
28908   high = nelt / 2;
28909   if (first_elem == neon_endian_lane_map (d->vmode, high))
28910     ;
28911   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28912     high = 0;
28913   else
28914     return false;
28915   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28916
28917   for (i = 0; i < nelt / 2; i++)
28918     {
28919       unsigned elt =
28920         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28921       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28922           != elt)
28923         return false;
28924       elt =
28925         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28926       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28927           != elt)
28928         return false;
28929     }
28930
28931   /* Success!  */
28932   if (d->testing_p)
28933     return true;
28934
28935   switch (d->vmode)
28936     {
28937     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28938     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28939     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28940     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28941     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28942     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28943     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28944     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28945     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28946     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28947     default:
28948       gcc_unreachable ();
28949     }
28950
28951   in0 = d->op0;
28952   in1 = d->op1;
28953   if (is_swapped)
28954     std::swap (in0, in1);
28955
28956   out0 = d->target;
28957   out1 = gen_reg_rtx (d->vmode);
28958   if (high)
28959     std::swap (out0, out1);
28960
28961   emit_insn (gen (out0, in0, in1, out1));
28962   return true;
28963 }
28964
28965 /* Recognize patterns for the VREV insns.  */
28966
28967 static bool
28968 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28969 {
28970   unsigned int i, j, diff, nelt = d->perm.length ();
28971   rtx (*gen)(rtx, rtx);
28972
28973   if (!d->one_vector_p)
28974     return false;
28975
28976   diff = d->perm[0];
28977   switch (diff)
28978     {
28979     case 7:
28980       switch (d->vmode)
28981         {
28982         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28983         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28984         default:
28985           return false;
28986         }
28987       break;
28988     case 3:
28989       switch (d->vmode)
28990         {
28991         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28992         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28993         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28994         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28995         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28996         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28997         default:
28998           return false;
28999         }
29000       break;
29001     case 1:
29002       switch (d->vmode)
29003         {
29004         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29005         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29006         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29007         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29008         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29009         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29010         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29011         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29012         default:
29013           return false;
29014         }
29015       break;
29016     default:
29017       return false;
29018     }
29019
29020   for (i = 0; i < nelt ; i += diff + 1)
29021     for (j = 0; j <= diff; j += 1)
29022       {
29023         /* This is guaranteed to be true as the value of diff
29024            is 7, 3, 1 and we should have enough elements in the
29025            queue to generate this. Getting a vector mask with a
29026            value of diff other than these values implies that
29027            something is wrong by the time we get here.  */
29028         gcc_assert (i + j < nelt);
29029         if (d->perm[i + j] != i + diff - j)
29030           return false;
29031       }
29032
29033   /* Success! */
29034   if (d->testing_p)
29035     return true;
29036
29037   emit_insn (gen (d->target, d->op0));
29038   return true;
29039 }
29040
29041 /* Recognize patterns for the VTRN insns.  */
29042
29043 static bool
29044 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29045 {
29046   unsigned int i, odd, mask, nelt = d->perm.length ();
29047   rtx out0, out1, in0, in1;
29048   rtx (*gen)(rtx, rtx, rtx, rtx);
29049
29050   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29051     return false;
29052
29053   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29054   if (d->perm[0] == 0)
29055     odd = 0;
29056   else if (d->perm[0] == 1)
29057     odd = 1;
29058   else
29059     return false;
29060   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29061
29062   for (i = 0; i < nelt; i += 2)
29063     {
29064       if (d->perm[i] != i + odd)
29065         return false;
29066       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29067         return false;
29068     }
29069
29070   /* Success!  */
29071   if (d->testing_p)
29072     return true;
29073
29074   switch (d->vmode)
29075     {
29076     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29077     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29078     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29079     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29080     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29081     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29082     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29083     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29084     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29085     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29086     default:
29087       gcc_unreachable ();
29088     }
29089
29090   in0 = d->op0;
29091   in1 = d->op1;
29092   if (BYTES_BIG_ENDIAN)
29093     {
29094       std::swap (in0, in1);
29095       odd = !odd;
29096     }
29097
29098   out0 = d->target;
29099   out1 = gen_reg_rtx (d->vmode);
29100   if (odd)
29101     std::swap (out0, out1);
29102
29103   emit_insn (gen (out0, in0, in1, out1));
29104   return true;
29105 }
29106
29107 /* Recognize patterns for the VEXT insns.  */
29108
29109 static bool
29110 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29111 {
29112   unsigned int i, nelt = d->perm.length ();
29113   rtx (*gen) (rtx, rtx, rtx, rtx);
29114   rtx offset;
29115
29116   unsigned int location;
29117
29118   unsigned int next  = d->perm[0] + 1;
29119
29120   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29121   if (BYTES_BIG_ENDIAN)
29122     return false;
29123
29124   /* Check if the extracted indexes are increasing by one.  */
29125   for (i = 1; i < nelt; next++, i++)
29126     {
29127       /* If we hit the most significant element of the 2nd vector in
29128          the previous iteration, no need to test further.  */
29129       if (next == 2 * nelt)
29130         return false;
29131
29132       /* If we are operating on only one vector: it could be a
29133          rotation.  If there are only two elements of size < 64, let
29134          arm_evpc_neon_vrev catch it.  */
29135       if (d->one_vector_p && (next == nelt))
29136         {
29137           if ((nelt == 2) && (d->vmode != V2DImode))
29138             return false;
29139           else
29140             next = 0;
29141         }
29142
29143       if (d->perm[i] != next)
29144         return false;
29145     }
29146
29147   location = d->perm[0];
29148
29149   switch (d->vmode)
29150     {
29151     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29152     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29153     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29154     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29155     case E_V2SImode: gen = gen_neon_vextv2si; break;
29156     case E_V4SImode: gen = gen_neon_vextv4si; break;
29157     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29158     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29159     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29160     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29161     case E_V2DImode: gen = gen_neon_vextv2di; break;
29162     default:
29163       return false;
29164     }
29165
29166   /* Success! */
29167   if (d->testing_p)
29168     return true;
29169
29170   offset = GEN_INT (location);
29171   emit_insn (gen (d->target, d->op0, d->op1, offset));
29172   return true;
29173 }
29174
29175 /* The NEON VTBL instruction is a fully variable permuation that's even
29176    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29177    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29178    can do slightly better by expanding this as a constant where we don't
29179    have to apply a mask.  */
29180
29181 static bool
29182 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29183 {
29184   rtx rperm[MAX_VECT_LEN], sel;
29185   machine_mode vmode = d->vmode;
29186   unsigned int i, nelt = d->perm.length ();
29187
29188   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29189      numbering of elements for big-endian, we must reverse the order.  */
29190   if (BYTES_BIG_ENDIAN)
29191     return false;
29192
29193   if (d->testing_p)
29194     return true;
29195
29196   /* Generic code will try constant permutation twice.  Once with the
29197      original mode and again with the elements lowered to QImode.
29198      So wait and don't do the selector expansion ourselves.  */
29199   if (vmode != V8QImode && vmode != V16QImode)
29200     return false;
29201
29202   for (i = 0; i < nelt; ++i)
29203     rperm[i] = GEN_INT (d->perm[i]);
29204   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29205   sel = force_reg (vmode, sel);
29206
29207   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29208   return true;
29209 }
29210
29211 static bool
29212 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29213 {
29214   /* Check if the input mask matches vext before reordering the
29215      operands.  */
29216   if (TARGET_NEON)
29217     if (arm_evpc_neon_vext (d))
29218       return true;
29219
29220   /* The pattern matching functions above are written to look for a small
29221      number to begin the sequence (0, 1, N/2).  If we begin with an index
29222      from the second operand, we can swap the operands.  */
29223   unsigned int nelt = d->perm.length ();
29224   if (d->perm[0] >= nelt)
29225     {
29226       for (unsigned int i = 0; i < nelt; ++i)
29227         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29228
29229       std::swap (d->op0, d->op1);
29230     }
29231
29232   if (TARGET_NEON)
29233     {
29234       if (arm_evpc_neon_vuzp (d))
29235         return true;
29236       if (arm_evpc_neon_vzip (d))
29237         return true;
29238       if (arm_evpc_neon_vrev (d))
29239         return true;
29240       if (arm_evpc_neon_vtrn (d))
29241         return true;
29242       return arm_evpc_neon_vtbl (d);
29243     }
29244   return false;
29245 }
29246
29247 /* Expand a vec_perm_const pattern.  */
29248
29249 bool
29250 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29251 {
29252   struct expand_vec_perm_d d;
29253   int i, nelt, which;
29254
29255   d.target = target;
29256   d.op0 = op0;
29257   d.op1 = op1;
29258
29259   d.vmode = GET_MODE (target);
29260   gcc_assert (VECTOR_MODE_P (d.vmode));
29261   d.testing_p = false;
29262
29263   nelt = GET_MODE_NUNITS (d.vmode);
29264   d.perm.reserve (nelt);
29265   for (i = which = 0; i < nelt; ++i)
29266     {
29267       rtx e = XVECEXP (sel, 0, i);
29268       int ei = INTVAL (e) & (2 * nelt - 1);
29269       which |= (ei < nelt ? 1 : 2);
29270       d.perm.quick_push (ei);
29271     }
29272
29273   switch (which)
29274     {
29275     default:
29276       gcc_unreachable();
29277
29278     case 3:
29279       d.one_vector_p = false;
29280       if (!rtx_equal_p (op0, op1))
29281         break;
29282
29283       /* The elements of PERM do not suggest that only the first operand
29284          is used, but both operands are identical.  Allow easier matching
29285          of the permutation by folding the permutation into the single
29286          input vector.  */
29287       /* FALLTHRU */
29288     case 2:
29289       for (i = 0; i < nelt; ++i)
29290         d.perm[i] &= nelt - 1;
29291       d.op0 = op1;
29292       d.one_vector_p = true;
29293       break;
29294
29295     case 1:
29296       d.op1 = op0;
29297       d.one_vector_p = true;
29298       break;
29299     }
29300
29301   return arm_expand_vec_perm_const_1 (&d);
29302 }
29303
29304 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29305
29306 static bool
29307 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29308 {
29309   struct expand_vec_perm_d d;
29310   unsigned int i, nelt, which;
29311   bool ret;
29312
29313   d.vmode = vmode;
29314   d.testing_p = true;
29315   d.perm.safe_splice (sel);
29316
29317   /* Categorize the set of elements in the selector.  */
29318   nelt = GET_MODE_NUNITS (d.vmode);
29319   for (i = which = 0; i < nelt; ++i)
29320     {
29321       unsigned int e = d.perm[i];
29322       gcc_assert (e < 2 * nelt);
29323       which |= (e < nelt ? 1 : 2);
29324     }
29325
29326   /* For all elements from second vector, fold the elements to first.  */
29327   if (which == 2)
29328     for (i = 0; i < nelt; ++i)
29329       d.perm[i] -= nelt;
29330
29331   /* Check whether the mask can be applied to the vector type.  */
29332   d.one_vector_p = (which != 3);
29333
29334   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29335   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29336   if (!d.one_vector_p)
29337     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29338
29339   start_sequence ();
29340   ret = arm_expand_vec_perm_const_1 (&d);
29341   end_sequence ();
29342
29343   return ret;
29344 }
29345
29346 bool
29347 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29348 {
29349   /* If we are soft float and we do not have ldrd
29350      then all auto increment forms are ok.  */
29351   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29352     return true;
29353
29354   switch (code)
29355     {
29356       /* Post increment and Pre Decrement are supported for all
29357          instruction forms except for vector forms.  */
29358     case ARM_POST_INC:
29359     case ARM_PRE_DEC:
29360       if (VECTOR_MODE_P (mode))
29361         {
29362           if (code != ARM_PRE_DEC)
29363             return true;
29364           else
29365             return false;
29366         }
29367
29368       return true;
29369
29370     case ARM_POST_DEC:
29371     case ARM_PRE_INC:
29372       /* Without LDRD and mode size greater than
29373          word size, there is no point in auto-incrementing
29374          because ldm and stm will not have these forms.  */
29375       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29376         return false;
29377
29378       /* Vector and floating point modes do not support
29379          these auto increment forms.  */
29380       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29381         return false;
29382
29383       return true;
29384
29385     default:
29386       return false;
29387
29388     }
29389
29390   return false;
29391 }
29392
29393 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29394    on ARM, since we know that shifts by negative amounts are no-ops.
29395    Additionally, the default expansion code is not available or suitable
29396    for post-reload insn splits (this can occur when the register allocator
29397    chooses not to do a shift in NEON).
29398
29399    This function is used in both initial expand and post-reload splits, and
29400    handles all kinds of 64-bit shifts.
29401
29402    Input requirements:
29403     - It is safe for the input and output to be the same register, but
29404       early-clobber rules apply for the shift amount and scratch registers.
29405     - Shift by register requires both scratch registers.  In all other cases
29406       the scratch registers may be NULL.
29407     - Ashiftrt by a register also clobbers the CC register.  */
29408 void
29409 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29410                                rtx amount, rtx scratch1, rtx scratch2)
29411 {
29412   rtx out_high = gen_highpart (SImode, out);
29413   rtx out_low = gen_lowpart (SImode, out);
29414   rtx in_high = gen_highpart (SImode, in);
29415   rtx in_low = gen_lowpart (SImode, in);
29416
29417   /* Terminology:
29418         in = the register pair containing the input value.
29419         out = the destination register pair.
29420         up = the high- or low-part of each pair.
29421         down = the opposite part to "up".
29422      In a shift, we can consider bits to shift from "up"-stream to
29423      "down"-stream, so in a left-shift "up" is the low-part and "down"
29424      is the high-part of each register pair.  */
29425
29426   rtx out_up   = code == ASHIFT ? out_low : out_high;
29427   rtx out_down = code == ASHIFT ? out_high : out_low;
29428   rtx in_up   = code == ASHIFT ? in_low : in_high;
29429   rtx in_down = code == ASHIFT ? in_high : in_low;
29430
29431   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29432   gcc_assert (out
29433               && (REG_P (out) || GET_CODE (out) == SUBREG)
29434               && GET_MODE (out) == DImode);
29435   gcc_assert (in
29436               && (REG_P (in) || GET_CODE (in) == SUBREG)
29437               && GET_MODE (in) == DImode);
29438   gcc_assert (amount
29439               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29440                    && GET_MODE (amount) == SImode)
29441                   || CONST_INT_P (amount)));
29442   gcc_assert (scratch1 == NULL
29443               || (GET_CODE (scratch1) == SCRATCH)
29444               || (GET_MODE (scratch1) == SImode
29445                   && REG_P (scratch1)));
29446   gcc_assert (scratch2 == NULL
29447               || (GET_CODE (scratch2) == SCRATCH)
29448               || (GET_MODE (scratch2) == SImode
29449                   && REG_P (scratch2)));
29450   gcc_assert (!REG_P (out) || !REG_P (amount)
29451               || !HARD_REGISTER_P (out)
29452               || (REGNO (out) != REGNO (amount)
29453                   && REGNO (out) + 1 != REGNO (amount)));
29454
29455   /* Macros to make following code more readable.  */
29456   #define SUB_32(DEST,SRC) \
29457             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29458   #define RSB_32(DEST,SRC) \
29459             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29460   #define SUB_S_32(DEST,SRC) \
29461             gen_addsi3_compare0 ((DEST), (SRC), \
29462                                  GEN_INT (-32))
29463   #define SET(DEST,SRC) \
29464             gen_rtx_SET ((DEST), (SRC))
29465   #define SHIFT(CODE,SRC,AMOUNT) \
29466             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29467   #define LSHIFT(CODE,SRC,AMOUNT) \
29468             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29469                             SImode, (SRC), (AMOUNT))
29470   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29471             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29472                             SImode, (SRC), (AMOUNT))
29473   #define ORR(A,B) \
29474             gen_rtx_IOR (SImode, (A), (B))
29475   #define BRANCH(COND,LABEL) \
29476             gen_arm_cond_branch ((LABEL), \
29477                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29478                                                    const0_rtx), \
29479                                  cc_reg)
29480
29481   /* Shifts by register and shifts by constant are handled separately.  */
29482   if (CONST_INT_P (amount))
29483     {
29484       /* We have a shift-by-constant.  */
29485
29486       /* First, handle out-of-range shift amounts.
29487          In both cases we try to match the result an ARM instruction in a
29488          shift-by-register would give.  This helps reduce execution
29489          differences between optimization levels, but it won't stop other
29490          parts of the compiler doing different things.  This is "undefined
29491          behavior, in any case.  */
29492       if (INTVAL (amount) <= 0)
29493         emit_insn (gen_movdi (out, in));
29494       else if (INTVAL (amount) >= 64)
29495         {
29496           if (code == ASHIFTRT)
29497             {
29498               rtx const31_rtx = GEN_INT (31);
29499               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29500               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29501             }
29502           else
29503             emit_insn (gen_movdi (out, const0_rtx));
29504         }
29505
29506       /* Now handle valid shifts. */
29507       else if (INTVAL (amount) < 32)
29508         {
29509           /* Shifts by a constant less than 32.  */
29510           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29511
29512           /* Clearing the out register in DImode first avoids lots
29513              of spilling and results in less stack usage.
29514              Later this redundant insn is completely removed.
29515              Do that only if "in" and "out" are different registers.  */
29516           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29517             emit_insn (SET (out, const0_rtx));
29518           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29519           emit_insn (SET (out_down,
29520                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29521                                out_down)));
29522           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29523         }
29524       else
29525         {
29526           /* Shifts by a constant greater than 31.  */
29527           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29528
29529           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29530             emit_insn (SET (out, const0_rtx));
29531           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29532           if (code == ASHIFTRT)
29533             emit_insn (gen_ashrsi3 (out_up, in_up,
29534                                     GEN_INT (31)));
29535           else
29536             emit_insn (SET (out_up, const0_rtx));
29537         }
29538     }
29539   else
29540     {
29541       /* We have a shift-by-register.  */
29542       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29543
29544       /* This alternative requires the scratch registers.  */
29545       gcc_assert (scratch1 && REG_P (scratch1));
29546       gcc_assert (scratch2 && REG_P (scratch2));
29547
29548       /* We will need the values "amount-32" and "32-amount" later.
29549          Swapping them around now allows the later code to be more general. */
29550       switch (code)
29551         {
29552         case ASHIFT:
29553           emit_insn (SUB_32 (scratch1, amount));
29554           emit_insn (RSB_32 (scratch2, amount));
29555           break;
29556         case ASHIFTRT:
29557           emit_insn (RSB_32 (scratch1, amount));
29558           /* Also set CC = amount > 32.  */
29559           emit_insn (SUB_S_32 (scratch2, amount));
29560           break;
29561         case LSHIFTRT:
29562           emit_insn (RSB_32 (scratch1, amount));
29563           emit_insn (SUB_32 (scratch2, amount));
29564           break;
29565         default:
29566           gcc_unreachable ();
29567         }
29568
29569       /* Emit code like this:
29570
29571          arithmetic-left:
29572             out_down = in_down << amount;
29573             out_down = (in_up << (amount - 32)) | out_down;
29574             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29575             out_up = in_up << amount;
29576
29577          arithmetic-right:
29578             out_down = in_down >> amount;
29579             out_down = (in_up << (32 - amount)) | out_down;
29580             if (amount < 32)
29581               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29582             out_up = in_up << amount;
29583
29584          logical-right:
29585             out_down = in_down >> amount;
29586             out_down = (in_up << (32 - amount)) | out_down;
29587             if (amount < 32)
29588               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29589             out_up = in_up << amount;
29590
29591           The ARM and Thumb2 variants are the same but implemented slightly
29592           differently.  If this were only called during expand we could just
29593           use the Thumb2 case and let combine do the right thing, but this
29594           can also be called from post-reload splitters.  */
29595
29596       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29597
29598       if (!TARGET_THUMB2)
29599         {
29600           /* Emit code for ARM mode.  */
29601           emit_insn (SET (out_down,
29602                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29603           if (code == ASHIFTRT)
29604             {
29605               rtx_code_label *done_label = gen_label_rtx ();
29606               emit_jump_insn (BRANCH (LT, done_label));
29607               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29608                                              out_down)));
29609               emit_label (done_label);
29610             }
29611           else
29612             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29613                                            out_down)));
29614         }
29615       else
29616         {
29617           /* Emit code for Thumb2 mode.
29618              Thumb2 can't do shift and or in one insn.  */
29619           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29620           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29621
29622           if (code == ASHIFTRT)
29623             {
29624               rtx_code_label *done_label = gen_label_rtx ();
29625               emit_jump_insn (BRANCH (LT, done_label));
29626               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29627               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29628               emit_label (done_label);
29629             }
29630           else
29631             {
29632               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29633               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29634             }
29635         }
29636
29637       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29638     }
29639
29640   #undef SUB_32
29641   #undef RSB_32
29642   #undef SUB_S_32
29643   #undef SET
29644   #undef SHIFT
29645   #undef LSHIFT
29646   #undef REV_LSHIFT
29647   #undef ORR
29648   #undef BRANCH
29649 }
29650
29651 /* Returns true if the pattern is a valid symbolic address, which is either a
29652    symbol_ref or (symbol_ref + addend).
29653
29654    According to the ARM ELF ABI, the initial addend of REL-type relocations
29655    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29656    literal field of the instruction as a 16-bit signed value in the range
29657    -32768 <= A < 32768.  */
29658
29659 bool
29660 arm_valid_symbolic_address_p (rtx addr)
29661 {
29662   rtx xop0, xop1 = NULL_RTX;
29663   rtx tmp = addr;
29664
29665   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29666     return true;
29667
29668   /* (const (plus: symbol_ref const_int))  */
29669   if (GET_CODE (addr) == CONST)
29670     tmp = XEXP (addr, 0);
29671
29672   if (GET_CODE (tmp) == PLUS)
29673     {
29674       xop0 = XEXP (tmp, 0);
29675       xop1 = XEXP (tmp, 1);
29676
29677       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29678           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29679     }
29680
29681   return false;
29682 }
29683
29684 /* Returns true if a valid comparison operation and makes
29685    the operands in a form that is valid.  */
29686 bool
29687 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29688 {
29689   enum rtx_code code = GET_CODE (*comparison);
29690   int code_int;
29691   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29692     ? GET_MODE (*op2) : GET_MODE (*op1);
29693
29694   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29695
29696   if (code == UNEQ || code == LTGT)
29697     return false;
29698
29699   code_int = (int)code;
29700   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29701   PUT_CODE (*comparison, (enum rtx_code)code_int);
29702
29703   switch (mode)
29704     {
29705     case E_SImode:
29706       if (!arm_add_operand (*op1, mode))
29707         *op1 = force_reg (mode, *op1);
29708       if (!arm_add_operand (*op2, mode))
29709         *op2 = force_reg (mode, *op2);
29710       return true;
29711
29712     case E_DImode:
29713       if (!cmpdi_operand (*op1, mode))
29714         *op1 = force_reg (mode, *op1);
29715       if (!cmpdi_operand (*op2, mode))
29716         *op2 = force_reg (mode, *op2);
29717       return true;
29718
29719     case E_HFmode:
29720       if (!TARGET_VFP_FP16INST)
29721         break;
29722       /* FP16 comparisons are done in SF mode.  */
29723       mode = SFmode;
29724       *op1 = convert_to_mode (mode, *op1, 1);
29725       *op2 = convert_to_mode (mode, *op2, 1);
29726       /* Fall through.  */
29727     case E_SFmode:
29728     case E_DFmode:
29729       if (!vfp_compare_operand (*op1, mode))
29730         *op1 = force_reg (mode, *op1);
29731       if (!vfp_compare_operand (*op2, mode))
29732         *op2 = force_reg (mode, *op2);
29733       return true;
29734     default:
29735       break;
29736     }
29737
29738   return false;
29739
29740 }
29741
29742 /* Maximum number of instructions to set block of memory.  */
29743 static int
29744 arm_block_set_max_insns (void)
29745 {
29746   if (optimize_function_for_size_p (cfun))
29747     return 4;
29748   else
29749     return current_tune->max_insns_inline_memset;
29750 }
29751
29752 /* Return TRUE if it's profitable to set block of memory for
29753    non-vectorized case.  VAL is the value to set the memory
29754    with.  LENGTH is the number of bytes to set.  ALIGN is the
29755    alignment of the destination memory in bytes.  UNALIGNED_P
29756    is TRUE if we can only set the memory with instructions
29757    meeting alignment requirements.  USE_STRD_P is TRUE if we
29758    can use strd to set the memory.  */
29759 static bool
29760 arm_block_set_non_vect_profit_p (rtx val,
29761                                  unsigned HOST_WIDE_INT length,
29762                                  unsigned HOST_WIDE_INT align,
29763                                  bool unaligned_p, bool use_strd_p)
29764 {
29765   int num = 0;
29766   /* For leftovers in bytes of 0-7, we can set the memory block using
29767      strb/strh/str with minimum instruction number.  */
29768   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29769
29770   if (unaligned_p)
29771     {
29772       num = arm_const_inline_cost (SET, val);
29773       num += length / align + length % align;
29774     }
29775   else if (use_strd_p)
29776     {
29777       num = arm_const_double_inline_cost (val);
29778       num += (length >> 3) + leftover[length & 7];
29779     }
29780   else
29781     {
29782       num = arm_const_inline_cost (SET, val);
29783       num += (length >> 2) + leftover[length & 3];
29784     }
29785
29786   /* We may be able to combine last pair STRH/STRB into a single STR
29787      by shifting one byte back.  */
29788   if (unaligned_access && length > 3 && (length & 3) == 3)
29789     num--;
29790
29791   return (num <= arm_block_set_max_insns ());
29792 }
29793
29794 /* Return TRUE if it's profitable to set block of memory for
29795    vectorized case.  LENGTH is the number of bytes to set.
29796    ALIGN is the alignment of destination memory in bytes.
29797    MODE is the vector mode used to set the memory.  */
29798 static bool
29799 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29800                              unsigned HOST_WIDE_INT align,
29801                              machine_mode mode)
29802 {
29803   int num;
29804   bool unaligned_p = ((align & 3) != 0);
29805   unsigned int nelt = GET_MODE_NUNITS (mode);
29806
29807   /* Instruction loading constant value.  */
29808   num = 1;
29809   /* Instructions storing the memory.  */
29810   num += (length + nelt - 1) / nelt;
29811   /* Instructions adjusting the address expression.  Only need to
29812      adjust address expression if it's 4 bytes aligned and bytes
29813      leftover can only be stored by mis-aligned store instruction.  */
29814   if (!unaligned_p && (length & 3) != 0)
29815     num++;
29816
29817   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29818   if (!unaligned_p && mode == V16QImode)
29819     num--;
29820
29821   return (num <= arm_block_set_max_insns ());
29822 }
29823
29824 /* Set a block of memory using vectorization instructions for the
29825    unaligned case.  We fill the first LENGTH bytes of the memory
29826    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29827    the alignment requirement of memory.  Return TRUE if succeeded.  */
29828 static bool
29829 arm_block_set_unaligned_vect (rtx dstbase,
29830                               unsigned HOST_WIDE_INT length,
29831                               unsigned HOST_WIDE_INT value,
29832                               unsigned HOST_WIDE_INT align)
29833 {
29834   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29835   rtx dst, mem;
29836   rtx val_vec, reg;
29837   rtx (*gen_func) (rtx, rtx);
29838   machine_mode mode;
29839   unsigned HOST_WIDE_INT v = value;
29840   unsigned int offset = 0;
29841   gcc_assert ((align & 0x3) != 0);
29842   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29843   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29844   if (length >= nelt_v16)
29845     {
29846       mode = V16QImode;
29847       gen_func = gen_movmisalignv16qi;
29848     }
29849   else
29850     {
29851       mode = V8QImode;
29852       gen_func = gen_movmisalignv8qi;
29853     }
29854   nelt_mode = GET_MODE_NUNITS (mode);
29855   gcc_assert (length >= nelt_mode);
29856   /* Skip if it isn't profitable.  */
29857   if (!arm_block_set_vect_profit_p (length, align, mode))
29858     return false;
29859
29860   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29861   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29862
29863   v = sext_hwi (v, BITS_PER_WORD);
29864
29865   reg = gen_reg_rtx (mode);
29866   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29867   /* Emit instruction loading the constant value.  */
29868   emit_move_insn (reg, val_vec);
29869
29870   /* Handle nelt_mode bytes in a vector.  */
29871   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29872     {
29873       emit_insn ((*gen_func) (mem, reg));
29874       if (i + 2 * nelt_mode <= length)
29875         {
29876           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29877           offset += nelt_mode;
29878           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29879         }
29880     }
29881
29882   /* If there are not less than nelt_v8 bytes leftover, we must be in
29883      V16QI mode.  */
29884   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29885
29886   /* Handle (8, 16) bytes leftover.  */
29887   if (i + nelt_v8 < length)
29888     {
29889       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29890       offset += length - i;
29891       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29892
29893       /* We are shifting bytes back, set the alignment accordingly.  */
29894       if ((length & 1) != 0 && align >= 2)
29895         set_mem_align (mem, BITS_PER_UNIT);
29896
29897       emit_insn (gen_movmisalignv16qi (mem, reg));
29898     }
29899   /* Handle (0, 8] bytes leftover.  */
29900   else if (i < length && i + nelt_v8 >= length)
29901     {
29902       if (mode == V16QImode)
29903         reg = gen_lowpart (V8QImode, reg);
29904
29905       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29906                                               + (nelt_mode - nelt_v8))));
29907       offset += (length - i) + (nelt_mode - nelt_v8);
29908       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29909
29910       /* We are shifting bytes back, set the alignment accordingly.  */
29911       if ((length & 1) != 0 && align >= 2)
29912         set_mem_align (mem, BITS_PER_UNIT);
29913
29914       emit_insn (gen_movmisalignv8qi (mem, reg));
29915     }
29916
29917   return true;
29918 }
29919
29920 /* Set a block of memory using vectorization instructions for the
29921    aligned case.  We fill the first LENGTH bytes of the memory area
29922    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29923    alignment requirement of memory.  Return TRUE if succeeded.  */
29924 static bool
29925 arm_block_set_aligned_vect (rtx dstbase,
29926                             unsigned HOST_WIDE_INT length,
29927                             unsigned HOST_WIDE_INT value,
29928                             unsigned HOST_WIDE_INT align)
29929 {
29930   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
29931   rtx dst, addr, mem;
29932   rtx val_vec, reg;
29933   machine_mode mode;
29934   unsigned HOST_WIDE_INT v = value;
29935   unsigned int offset = 0;
29936
29937   gcc_assert ((align & 0x3) == 0);
29938   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29939   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29940   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29941     mode = V16QImode;
29942   else
29943     mode = V8QImode;
29944
29945   nelt_mode = GET_MODE_NUNITS (mode);
29946   gcc_assert (length >= nelt_mode);
29947   /* Skip if it isn't profitable.  */
29948   if (!arm_block_set_vect_profit_p (length, align, mode))
29949     return false;
29950
29951   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29952
29953   v = sext_hwi (v, BITS_PER_WORD);
29954
29955   reg = gen_reg_rtx (mode);
29956   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29957   /* Emit instruction loading the constant value.  */
29958   emit_move_insn (reg, val_vec);
29959
29960   i = 0;
29961   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29962   if (mode == V16QImode)
29963     {
29964       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29965       emit_insn (gen_movmisalignv16qi (mem, reg));
29966       i += nelt_mode;
29967       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29968       if (i + nelt_v8 < length && i + nelt_v16 > length)
29969         {
29970           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29971           offset += length - nelt_mode;
29972           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29973           /* We are shifting bytes back, set the alignment accordingly.  */
29974           if ((length & 0x3) == 0)
29975             set_mem_align (mem, BITS_PER_UNIT * 4);
29976           else if ((length & 0x1) == 0)
29977             set_mem_align (mem, BITS_PER_UNIT * 2);
29978           else
29979             set_mem_align (mem, BITS_PER_UNIT);
29980
29981           emit_insn (gen_movmisalignv16qi (mem, reg));
29982           return true;
29983         }
29984       /* Fall through for bytes leftover.  */
29985       mode = V8QImode;
29986       nelt_mode = GET_MODE_NUNITS (mode);
29987       reg = gen_lowpart (V8QImode, reg);
29988     }
29989
29990   /* Handle 8 bytes in a vector.  */
29991   for (; (i + nelt_mode <= length); i += nelt_mode)
29992     {
29993       addr = plus_constant (Pmode, dst, i);
29994       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29995       emit_move_insn (mem, reg);
29996     }
29997
29998   /* Handle single word leftover by shifting 4 bytes back.  We can
29999      use aligned access for this case.  */
30000   if (i + UNITS_PER_WORD == length)
30001     {
30002       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30003       offset += i - UNITS_PER_WORD;
30004       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30005       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30006       if (align > UNITS_PER_WORD)
30007         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30008
30009       emit_move_insn (mem, reg);
30010     }
30011   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30012      We have to use unaligned access for this case.  */
30013   else if (i < length)
30014     {
30015       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30016       offset += length - nelt_mode;
30017       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30018       /* We are shifting bytes back, set the alignment accordingly.  */
30019       if ((length & 1) == 0)
30020         set_mem_align (mem, BITS_PER_UNIT * 2);
30021       else
30022         set_mem_align (mem, BITS_PER_UNIT);
30023
30024       emit_insn (gen_movmisalignv8qi (mem, reg));
30025     }
30026
30027   return true;
30028 }
30029
30030 /* Set a block of memory using plain strh/strb instructions, only
30031    using instructions allowed by ALIGN on processor.  We fill the
30032    first LENGTH bytes of the memory area starting from DSTBASE
30033    with byte constant VALUE.  ALIGN is the alignment requirement
30034    of memory.  */
30035 static bool
30036 arm_block_set_unaligned_non_vect (rtx dstbase,
30037                                   unsigned HOST_WIDE_INT length,
30038                                   unsigned HOST_WIDE_INT value,
30039                                   unsigned HOST_WIDE_INT align)
30040 {
30041   unsigned int i;
30042   rtx dst, addr, mem;
30043   rtx val_exp, val_reg, reg;
30044   machine_mode mode;
30045   HOST_WIDE_INT v = value;
30046
30047   gcc_assert (align == 1 || align == 2);
30048
30049   if (align == 2)
30050     v |= (value << BITS_PER_UNIT);
30051
30052   v = sext_hwi (v, BITS_PER_WORD);
30053   val_exp = GEN_INT (v);
30054   /* Skip if it isn't profitable.  */
30055   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30056                                         align, true, false))
30057     return false;
30058
30059   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30060   mode = (align == 2 ? HImode : QImode);
30061   val_reg = force_reg (SImode, val_exp);
30062   reg = gen_lowpart (mode, val_reg);
30063
30064   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30065     {
30066       addr = plus_constant (Pmode, dst, i);
30067       mem = adjust_automodify_address (dstbase, mode, addr, i);
30068       emit_move_insn (mem, reg);
30069     }
30070
30071   /* Handle single byte leftover.  */
30072   if (i + 1 == length)
30073     {
30074       reg = gen_lowpart (QImode, val_reg);
30075       addr = plus_constant (Pmode, dst, i);
30076       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30077       emit_move_insn (mem, reg);
30078       i++;
30079     }
30080
30081   gcc_assert (i == length);
30082   return true;
30083 }
30084
30085 /* Set a block of memory using plain strd/str/strh/strb instructions,
30086    to permit unaligned copies on processors which support unaligned
30087    semantics for those instructions.  We fill the first LENGTH bytes
30088    of the memory area starting from DSTBASE with byte constant VALUE.
30089    ALIGN is the alignment requirement of memory.  */
30090 static bool
30091 arm_block_set_aligned_non_vect (rtx dstbase,
30092                                 unsigned HOST_WIDE_INT length,
30093                                 unsigned HOST_WIDE_INT value,
30094                                 unsigned HOST_WIDE_INT align)
30095 {
30096   unsigned int i;
30097   rtx dst, addr, mem;
30098   rtx val_exp, val_reg, reg;
30099   unsigned HOST_WIDE_INT v;
30100   bool use_strd_p;
30101
30102   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30103                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30104
30105   v = (value | (value << 8) | (value << 16) | (value << 24));
30106   if (length < UNITS_PER_WORD)
30107     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30108
30109   if (use_strd_p)
30110     v |= (v << BITS_PER_WORD);
30111   else
30112     v = sext_hwi (v, BITS_PER_WORD);
30113
30114   val_exp = GEN_INT (v);
30115   /* Skip if it isn't profitable.  */
30116   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30117                                         align, false, use_strd_p))
30118     {
30119       if (!use_strd_p)
30120         return false;
30121
30122       /* Try without strd.  */
30123       v = (v >> BITS_PER_WORD);
30124       v = sext_hwi (v, BITS_PER_WORD);
30125       val_exp = GEN_INT (v);
30126       use_strd_p = false;
30127       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30128                                             align, false, use_strd_p))
30129         return false;
30130     }
30131
30132   i = 0;
30133   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30134   /* Handle double words using strd if possible.  */
30135   if (use_strd_p)
30136     {
30137       val_reg = force_reg (DImode, val_exp);
30138       reg = val_reg;
30139       for (; (i + 8 <= length); i += 8)
30140         {
30141           addr = plus_constant (Pmode, dst, i);
30142           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30143           emit_move_insn (mem, reg);
30144         }
30145     }
30146   else
30147     val_reg = force_reg (SImode, val_exp);
30148
30149   /* Handle words.  */
30150   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30151   for (; (i + 4 <= length); i += 4)
30152     {
30153       addr = plus_constant (Pmode, dst, i);
30154       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30155       if ((align & 3) == 0)
30156         emit_move_insn (mem, reg);
30157       else
30158         emit_insn (gen_unaligned_storesi (mem, reg));
30159     }
30160
30161   /* Merge last pair of STRH and STRB into a STR if possible.  */
30162   if (unaligned_access && i > 0 && (i + 3) == length)
30163     {
30164       addr = plus_constant (Pmode, dst, i - 1);
30165       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30166       /* We are shifting one byte back, set the alignment accordingly.  */
30167       if ((align & 1) == 0)
30168         set_mem_align (mem, BITS_PER_UNIT);
30169
30170       /* Most likely this is an unaligned access, and we can't tell at
30171          compilation time.  */
30172       emit_insn (gen_unaligned_storesi (mem, reg));
30173       return true;
30174     }
30175
30176   /* Handle half word leftover.  */
30177   if (i + 2 <= length)
30178     {
30179       reg = gen_lowpart (HImode, val_reg);
30180       addr = plus_constant (Pmode, dst, i);
30181       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30182       if ((align & 1) == 0)
30183         emit_move_insn (mem, reg);
30184       else
30185         emit_insn (gen_unaligned_storehi (mem, reg));
30186
30187       i += 2;
30188     }
30189
30190   /* Handle single byte leftover.  */
30191   if (i + 1 == length)
30192     {
30193       reg = gen_lowpart (QImode, val_reg);
30194       addr = plus_constant (Pmode, dst, i);
30195       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30196       emit_move_insn (mem, reg);
30197     }
30198
30199   return true;
30200 }
30201
30202 /* Set a block of memory using vectorization instructions for both
30203    aligned and unaligned cases.  We fill the first LENGTH bytes of
30204    the memory area starting from DSTBASE with byte constant VALUE.
30205    ALIGN is the alignment requirement of memory.  */
30206 static bool
30207 arm_block_set_vect (rtx dstbase,
30208                     unsigned HOST_WIDE_INT length,
30209                     unsigned HOST_WIDE_INT value,
30210                     unsigned HOST_WIDE_INT align)
30211 {
30212   /* Check whether we need to use unaligned store instruction.  */
30213   if (((align & 3) != 0 || (length & 3) != 0)
30214       /* Check whether unaligned store instruction is available.  */
30215       && (!unaligned_access || BYTES_BIG_ENDIAN))
30216     return false;
30217
30218   if ((align & 3) == 0)
30219     return arm_block_set_aligned_vect (dstbase, length, value, align);
30220   else
30221     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30222 }
30223
30224 /* Expand string store operation.  Firstly we try to do that by using
30225    vectorization instructions, then try with ARM unaligned access and
30226    double-word store if profitable.  OPERANDS[0] is the destination,
30227    OPERANDS[1] is the number of bytes, operands[2] is the value to
30228    initialize the memory, OPERANDS[3] is the known alignment of the
30229    destination.  */
30230 bool
30231 arm_gen_setmem (rtx *operands)
30232 {
30233   rtx dstbase = operands[0];
30234   unsigned HOST_WIDE_INT length;
30235   unsigned HOST_WIDE_INT value;
30236   unsigned HOST_WIDE_INT align;
30237
30238   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30239     return false;
30240
30241   length = UINTVAL (operands[1]);
30242   if (length > 64)
30243     return false;
30244
30245   value = (UINTVAL (operands[2]) & 0xFF);
30246   align = UINTVAL (operands[3]);
30247   if (TARGET_NEON && length >= 8
30248       && current_tune->string_ops_prefer_neon
30249       && arm_block_set_vect (dstbase, length, value, align))
30250     return true;
30251
30252   if (!unaligned_access && (align & 3) != 0)
30253     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30254
30255   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30256 }
30257
30258
30259 static bool
30260 arm_macro_fusion_p (void)
30261 {
30262   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30263 }
30264
30265 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30266    for MOVW / MOVT macro fusion.  */
30267
30268 static bool
30269 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30270 {
30271   /* We are trying to fuse
30272      movw imm / movt imm
30273     instructions as a group that gets scheduled together.  */
30274
30275   rtx set_dest = SET_DEST (curr_set);
30276
30277   if (GET_MODE (set_dest) != SImode)
30278     return false;
30279
30280   /* We are trying to match:
30281      prev (movw)  == (set (reg r0) (const_int imm16))
30282      curr (movt) == (set (zero_extract (reg r0)
30283                                         (const_int 16)
30284                                         (const_int 16))
30285                           (const_int imm16_1))
30286      or
30287      prev (movw) == (set (reg r1)
30288                           (high (symbol_ref ("SYM"))))
30289     curr (movt) == (set (reg r0)
30290                         (lo_sum (reg r1)
30291                                 (symbol_ref ("SYM"))))  */
30292
30293     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30294       {
30295         if (CONST_INT_P (SET_SRC (curr_set))
30296             && CONST_INT_P (SET_SRC (prev_set))
30297             && REG_P (XEXP (set_dest, 0))
30298             && REG_P (SET_DEST (prev_set))
30299             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30300           return true;
30301
30302       }
30303     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30304              && REG_P (SET_DEST (curr_set))
30305              && REG_P (SET_DEST (prev_set))
30306              && GET_CODE (SET_SRC (prev_set)) == HIGH
30307              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30308       return true;
30309
30310   return false;
30311 }
30312
30313 static bool
30314 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30315 {
30316   rtx prev_set = single_set (prev);
30317   rtx curr_set = single_set (curr);
30318
30319   if (!prev_set
30320       || !curr_set)
30321     return false;
30322
30323   if (any_condjump_p (curr))
30324     return false;
30325
30326   if (!arm_macro_fusion_p ())
30327     return false;
30328
30329   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30330       && aarch_crypto_can_dual_issue (prev, curr))
30331     return true;
30332
30333   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30334       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30335     return true;
30336
30337   return false;
30338 }
30339
30340 /* Return true iff the instruction fusion described by OP is enabled.  */
30341 bool
30342 arm_fusion_enabled_p (tune_params::fuse_ops op)
30343 {
30344   return current_tune->fusible_ops & op;
30345 }
30346
30347 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30348    scheduled for speculative execution.  Reject the long-running division
30349    and square-root instructions.  */
30350
30351 static bool
30352 arm_sched_can_speculate_insn (rtx_insn *insn)
30353 {
30354   switch (get_attr_type (insn))
30355     {
30356       case TYPE_SDIV:
30357       case TYPE_UDIV:
30358       case TYPE_FDIVS:
30359       case TYPE_FDIVD:
30360       case TYPE_FSQRTS:
30361       case TYPE_FSQRTD:
30362       case TYPE_NEON_FP_SQRT_S:
30363       case TYPE_NEON_FP_SQRT_D:
30364       case TYPE_NEON_FP_SQRT_S_Q:
30365       case TYPE_NEON_FP_SQRT_D_Q:
30366       case TYPE_NEON_FP_DIV_S:
30367       case TYPE_NEON_FP_DIV_D:
30368       case TYPE_NEON_FP_DIV_S_Q:
30369       case TYPE_NEON_FP_DIV_D_Q:
30370         return false;
30371       default:
30372         return true;
30373     }
30374 }
30375
30376 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30377
30378 static unsigned HOST_WIDE_INT
30379 arm_asan_shadow_offset (void)
30380 {
30381   return HOST_WIDE_INT_1U << 29;
30382 }
30383
30384
30385 /* This is a temporary fix for PR60655.  Ideally we need
30386    to handle most of these cases in the generic part but
30387    currently we reject minus (..) (sym_ref).  We try to
30388    ameliorate the case with minus (sym_ref1) (sym_ref2)
30389    where they are in the same section.  */
30390
30391 static bool
30392 arm_const_not_ok_for_debug_p (rtx p)
30393 {
30394   tree decl_op0 = NULL;
30395   tree decl_op1 = NULL;
30396
30397   if (GET_CODE (p) == UNSPEC)
30398     return true;
30399   if (GET_CODE (p) == MINUS)
30400     {
30401       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30402         {
30403           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30404           if (decl_op1
30405               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30406               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30407             {
30408               if ((VAR_P (decl_op1)
30409                    || TREE_CODE (decl_op1) == CONST_DECL)
30410                   && (VAR_P (decl_op0)
30411                       || TREE_CODE (decl_op0) == CONST_DECL))
30412                 return (get_variable_section (decl_op1, false)
30413                         != get_variable_section (decl_op0, false));
30414
30415               if (TREE_CODE (decl_op1) == LABEL_DECL
30416                   && TREE_CODE (decl_op0) == LABEL_DECL)
30417                 return (DECL_CONTEXT (decl_op1)
30418                         != DECL_CONTEXT (decl_op0));
30419             }
30420
30421           return true;
30422         }
30423     }
30424
30425   return false;
30426 }
30427
30428 /* return TRUE if x is a reference to a value in a constant pool */
30429 extern bool
30430 arm_is_constant_pool_ref (rtx x)
30431 {
30432   return (MEM_P (x)
30433           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30434           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30435 }
30436
30437 /* Remember the last target of arm_set_current_function.  */
30438 static GTY(()) tree arm_previous_fndecl;
30439
30440 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30441
30442 void
30443 save_restore_target_globals (tree new_tree)
30444 {
30445   /* If we have a previous state, use it.  */
30446   if (TREE_TARGET_GLOBALS (new_tree))
30447     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30448   else if (new_tree == target_option_default_node)
30449     restore_target_globals (&default_target_globals);
30450   else
30451     {
30452       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30453       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30454     }
30455
30456   arm_option_params_internal ();
30457 }
30458
30459 /* Invalidate arm_previous_fndecl.  */
30460
30461 void
30462 arm_reset_previous_fndecl (void)
30463 {
30464   arm_previous_fndecl = NULL_TREE;
30465 }
30466
30467 /* Establish appropriate back-end context for processing the function
30468    FNDECL.  The argument might be NULL to indicate processing at top
30469    level, outside of any function scope.  */
30470
30471 static void
30472 arm_set_current_function (tree fndecl)
30473 {
30474   if (!fndecl || fndecl == arm_previous_fndecl)
30475     return;
30476
30477   tree old_tree = (arm_previous_fndecl
30478                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30479                    : NULL_TREE);
30480
30481   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30482
30483   /* If current function has no attributes but previous one did,
30484      use the default node.  */
30485   if (! new_tree && old_tree)
30486     new_tree = target_option_default_node;
30487
30488   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30489      the default have been handled by save_restore_target_globals from
30490      arm_pragma_target_parse.  */
30491   if (old_tree == new_tree)
30492     return;
30493
30494   arm_previous_fndecl = fndecl;
30495
30496   /* First set the target options.  */
30497   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30498
30499   save_restore_target_globals (new_tree);
30500 }
30501
30502 /* Implement TARGET_OPTION_PRINT.  */
30503
30504 static void
30505 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30506 {
30507   int flags = ptr->x_target_flags;
30508   const char *fpu_name;
30509
30510   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30511               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30512
30513   fprintf (file, "%*sselected isa %s\n", indent, "",
30514            TARGET_THUMB2_P (flags) ? "thumb2" :
30515            TARGET_THUMB_P (flags) ? "thumb1" :
30516            "arm");
30517
30518   if (ptr->x_arm_arch_string)
30519     fprintf (file, "%*sselected architecture %s\n", indent, "",
30520              ptr->x_arm_arch_string);
30521
30522   if (ptr->x_arm_cpu_string)
30523     fprintf (file, "%*sselected CPU %s\n", indent, "",
30524              ptr->x_arm_cpu_string);
30525
30526   if (ptr->x_arm_tune_string)
30527     fprintf (file, "%*sselected tune %s\n", indent, "",
30528              ptr->x_arm_tune_string);
30529
30530   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30531 }
30532
30533 /* Hook to determine if one function can safely inline another.  */
30534
30535 static bool
30536 arm_can_inline_p (tree caller, tree callee)
30537 {
30538   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30539   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30540   bool can_inline = true;
30541
30542   struct cl_target_option *caller_opts
30543         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30544                                            : target_option_default_node);
30545
30546   struct cl_target_option *callee_opts
30547         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30548                                            : target_option_default_node);
30549
30550   if (callee_opts == caller_opts)
30551     return true;
30552
30553   /* Callee's ISA features should be a subset of the caller's.  */
30554   struct arm_build_target caller_target;
30555   struct arm_build_target callee_target;
30556   caller_target.isa = sbitmap_alloc (isa_num_bits);
30557   callee_target.isa = sbitmap_alloc (isa_num_bits);
30558
30559   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30560                               false);
30561   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30562                               false);
30563   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30564     can_inline = false;
30565
30566   sbitmap_free (caller_target.isa);
30567   sbitmap_free (callee_target.isa);
30568
30569   /* OK to inline between different modes.
30570      Function with mode specific instructions, e.g using asm,
30571      must be explicitly protected with noinline.  */
30572   return can_inline;
30573 }
30574
30575 /* Hook to fix function's alignment affected by target attribute.  */
30576
30577 static void
30578 arm_relayout_function (tree fndecl)
30579 {
30580   if (DECL_USER_ALIGN (fndecl))
30581     return;
30582
30583   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30584
30585   if (!callee_tree)
30586     callee_tree = target_option_default_node;
30587
30588   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30589   SET_DECL_ALIGN
30590     (fndecl,
30591      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30592 }
30593
30594 /* Inner function to process the attribute((target(...))), take an argument and
30595    set the current options from the argument.  If we have a list, recursively
30596    go over the list.  */
30597
30598 static bool
30599 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30600 {
30601   if (TREE_CODE (args) == TREE_LIST)
30602     {
30603       bool ret = true;
30604
30605       for (; args; args = TREE_CHAIN (args))
30606         if (TREE_VALUE (args)
30607             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30608           ret = false;
30609       return ret;
30610     }
30611
30612   else if (TREE_CODE (args) != STRING_CST)
30613     {
30614       error ("attribute %<target%> argument not a string");
30615       return false;
30616     }
30617
30618   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30619   char *q;
30620
30621   while ((q = strtok (argstr, ",")) != NULL)
30622     {
30623       while (ISSPACE (*q)) ++q;
30624
30625       argstr = NULL;
30626       if (!strncmp (q, "thumb", 5))
30627           opts->x_target_flags |= MASK_THUMB;
30628
30629       else if (!strncmp (q, "arm", 3))
30630           opts->x_target_flags &= ~MASK_THUMB;
30631
30632       else if (!strncmp (q, "fpu=", 4))
30633         {
30634           int fpu_index;
30635           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30636                                        &fpu_index, CL_TARGET))
30637             {
30638               error ("invalid fpu for attribute(target(\"%s\"))", q);
30639               return false;
30640             }
30641           if (fpu_index == TARGET_FPU_auto)
30642             {
30643               /* This doesn't really make sense until we support
30644                  general dynamic selection of the architecture and all
30645                  sub-features.  */
30646               sorry ("auto fpu selection not currently permitted here");
30647               return false;
30648             }
30649           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30650         }
30651       else
30652         {
30653           error ("attribute(target(\"%s\")) is unknown", q);
30654           return false;
30655         }
30656     }
30657
30658   return true;
30659 }
30660
30661 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30662
30663 tree
30664 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30665                                  struct gcc_options *opts_set)
30666 {
30667   struct cl_target_option cl_opts;
30668
30669   if (!arm_valid_target_attribute_rec (args, opts))
30670     return NULL_TREE;
30671
30672   cl_target_option_save (&cl_opts, opts);
30673   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30674   arm_option_check_internal (opts);
30675   /* Do any overrides, such as global options arch=xxx.  */
30676   arm_option_override_internal (opts, opts_set);
30677
30678   return build_target_option_node (opts);
30679 }
30680
30681 static void
30682 add_attribute  (const char * mode, tree *attributes)
30683 {
30684   size_t len = strlen (mode);
30685   tree value = build_string (len, mode);
30686
30687   TREE_TYPE (value) = build_array_type (char_type_node,
30688                                         build_index_type (size_int (len)));
30689
30690   *attributes = tree_cons (get_identifier ("target"),
30691                            build_tree_list (NULL_TREE, value),
30692                            *attributes);
30693 }
30694
30695 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30696
30697 static void
30698 arm_insert_attributes (tree fndecl, tree * attributes)
30699 {
30700   const char *mode;
30701
30702   if (! TARGET_FLIP_THUMB)
30703     return;
30704
30705   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30706       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30707    return;
30708
30709   /* Nested definitions must inherit mode.  */
30710   if (current_function_decl)
30711    {
30712      mode = TARGET_THUMB ? "thumb" : "arm";
30713      add_attribute (mode, attributes);
30714      return;
30715    }
30716
30717   /* If there is already a setting don't change it.  */
30718   if (lookup_attribute ("target", *attributes) != NULL)
30719     return;
30720
30721   mode = thumb_flipper ? "thumb" : "arm";
30722   add_attribute (mode, attributes);
30723
30724   thumb_flipper = !thumb_flipper;
30725 }
30726
30727 /* Hook to validate attribute((target("string"))).  */
30728
30729 static bool
30730 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30731                               tree args, int ARG_UNUSED (flags))
30732 {
30733   bool ret = true;
30734   struct gcc_options func_options;
30735   tree cur_tree, new_optimize;
30736   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30737
30738   /* Get the optimization options of the current function.  */
30739   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30740
30741   /* If the function changed the optimization levels as well as setting target
30742      options, start with the optimizations specified.  */
30743   if (!func_optimize)
30744     func_optimize = optimization_default_node;
30745
30746   /* Init func_options.  */
30747   memset (&func_options, 0, sizeof (func_options));
30748   init_options_struct (&func_options, NULL);
30749   lang_hooks.init_options_struct (&func_options);
30750
30751   /* Initialize func_options to the defaults.  */
30752   cl_optimization_restore (&func_options,
30753                            TREE_OPTIMIZATION (func_optimize));
30754
30755   cl_target_option_restore (&func_options,
30756                             TREE_TARGET_OPTION (target_option_default_node));
30757
30758   /* Set func_options flags with new target mode.  */
30759   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30760                                               &global_options_set);
30761
30762   if (cur_tree == NULL_TREE)
30763     ret = false;
30764
30765   new_optimize = build_optimization_node (&func_options);
30766
30767   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30768
30769   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30770
30771   finalize_options_struct (&func_options);
30772
30773   return ret;
30774 }
30775
30776 /* Match an ISA feature bitmap to a named FPU.  We always use the
30777    first entry that exactly matches the feature set, so that we
30778    effectively canonicalize the FPU name for the assembler.  */
30779 static const char*
30780 arm_identify_fpu_from_isa (sbitmap isa)
30781 {
30782   auto_sbitmap fpubits (isa_num_bits);
30783   auto_sbitmap cand_fpubits (isa_num_bits);
30784
30785   bitmap_and (fpubits, isa, isa_all_fpubits);
30786
30787   /* If there are no ISA feature bits relating to the FPU, we must be
30788      doing soft-float.  */
30789   if (bitmap_empty_p (fpubits))
30790     return "softvfp";
30791
30792   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30793     {
30794       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30795       if (bitmap_equal_p (fpubits, cand_fpubits))
30796         return all_fpus[i].name;
30797     }
30798   /* We must find an entry, or things have gone wrong.  */
30799   gcc_unreachable ();
30800 }
30801
30802 void
30803 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30804 {
30805
30806   fprintf (stream, "\t.syntax unified\n");
30807
30808   if (TARGET_THUMB)
30809     {
30810       if (is_called_in_ARM_mode (decl)
30811           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30812               && cfun->is_thunk))
30813         fprintf (stream, "\t.code 32\n");
30814       else if (TARGET_THUMB1)
30815         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30816       else
30817         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30818     }
30819   else
30820     fprintf (stream, "\t.arm\n");
30821
30822   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30823                (TARGET_SOFT_FLOAT
30824                 ? "softvfp"
30825                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30826
30827   if (TARGET_POKE_FUNCTION_NAME)
30828     arm_poke_function_name (stream, (const char *) name);
30829 }
30830
30831 /* If MEM is in the form of [base+offset], extract the two parts
30832    of address and set to BASE and OFFSET, otherwise return false
30833    after clearing BASE and OFFSET.  */
30834
30835 static bool
30836 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30837 {
30838   rtx addr;
30839
30840   gcc_assert (MEM_P (mem));
30841
30842   addr = XEXP (mem, 0);
30843
30844   /* Strip off const from addresses like (const (addr)).  */
30845   if (GET_CODE (addr) == CONST)
30846     addr = XEXP (addr, 0);
30847
30848   if (GET_CODE (addr) == REG)
30849     {
30850       *base = addr;
30851       *offset = const0_rtx;
30852       return true;
30853     }
30854
30855   if (GET_CODE (addr) == PLUS
30856       && GET_CODE (XEXP (addr, 0)) == REG
30857       && CONST_INT_P (XEXP (addr, 1)))
30858     {
30859       *base = XEXP (addr, 0);
30860       *offset = XEXP (addr, 1);
30861       return true;
30862     }
30863
30864   *base = NULL_RTX;
30865   *offset = NULL_RTX;
30866
30867   return false;
30868 }
30869
30870 /* If INSN is a load or store of address in the form of [base+offset],
30871    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30872    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30873    otherwise return FALSE.  */
30874
30875 static bool
30876 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30877 {
30878   rtx x, dest, src;
30879
30880   gcc_assert (INSN_P (insn));
30881   x = PATTERN (insn);
30882   if (GET_CODE (x) != SET)
30883     return false;
30884
30885   src = SET_SRC (x);
30886   dest = SET_DEST (x);
30887   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30888     {
30889       *is_load = false;
30890       extract_base_offset_in_addr (dest, base, offset);
30891     }
30892   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30893     {
30894       *is_load = true;
30895       extract_base_offset_in_addr (src, base, offset);
30896     }
30897   else
30898     return false;
30899
30900   return (*base != NULL_RTX && *offset != NULL_RTX);
30901 }
30902
30903 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30904
30905    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30906    and PRI are only calculated for these instructions.  For other instruction,
30907    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30908    instruction fusion can be supported by returning different priorities.
30909
30910    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30911
30912 static void
30913 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30914                            int *fusion_pri, int *pri)
30915 {
30916   int tmp, off_val;
30917   bool is_load;
30918   rtx base, offset;
30919
30920   gcc_assert (INSN_P (insn));
30921
30922   tmp = max_pri - 1;
30923   if (!fusion_load_store (insn, &base, &offset, &is_load))
30924     {
30925       *pri = tmp;
30926       *fusion_pri = tmp;
30927       return;
30928     }
30929
30930   /* Load goes first.  */
30931   if (is_load)
30932     *fusion_pri = tmp - 1;
30933   else
30934     *fusion_pri = tmp - 2;
30935
30936   tmp /= 2;
30937
30938   /* INSN with smaller base register goes first.  */
30939   tmp -= ((REGNO (base) & 0xff) << 20);
30940
30941   /* INSN with smaller offset goes first.  */
30942   off_val = (int)(INTVAL (offset));
30943   if (off_val >= 0)
30944     tmp -= (off_val & 0xfffff);
30945   else
30946     tmp += ((- off_val) & 0xfffff);
30947
30948   *pri = tmp;
30949   return;
30950 }
30951
30952
30953 /* Construct and return a PARALLEL RTX vector with elements numbering the
30954    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30955    the vector - from the perspective of the architecture.  This does not
30956    line up with GCC's perspective on lane numbers, so we end up with
30957    different masks depending on our target endian-ness.  The diagram
30958    below may help.  We must draw the distinction when building masks
30959    which select one half of the vector.  An instruction selecting
30960    architectural low-lanes for a big-endian target, must be described using
30961    a mask selecting GCC high-lanes.
30962
30963                  Big-Endian             Little-Endian
30964
30965 GCC             0   1   2   3           3   2   1   0
30966               | x | x | x | x |       | x | x | x | x |
30967 Architecture    3   2   1   0           3   2   1   0
30968
30969 Low Mask:         { 2, 3 }                { 0, 1 }
30970 High Mask:        { 0, 1 }                { 2, 3 }
30971 */
30972
30973 rtx
30974 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30975 {
30976   int nunits = GET_MODE_NUNITS (mode);
30977   rtvec v = rtvec_alloc (nunits / 2);
30978   int high_base = nunits / 2;
30979   int low_base = 0;
30980   int base;
30981   rtx t1;
30982   int i;
30983
30984   if (BYTES_BIG_ENDIAN)
30985     base = high ? low_base : high_base;
30986   else
30987     base = high ? high_base : low_base;
30988
30989   for (i = 0; i < nunits / 2; i++)
30990     RTVEC_ELT (v, i) = GEN_INT (base + i);
30991
30992   t1 = gen_rtx_PARALLEL (mode, v);
30993   return t1;
30994 }
30995
30996 /* Check OP for validity as a PARALLEL RTX vector with elements
30997    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30998    from the perspective of the architecture.  See the diagram above
30999    arm_simd_vect_par_cnst_half_p for more details.  */
31000
31001 bool
31002 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31003                                        bool high)
31004 {
31005   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31006   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31007   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31008   int i = 0;
31009
31010   if (!VECTOR_MODE_P (mode))
31011     return false;
31012
31013   if (count_op != count_ideal)
31014     return false;
31015
31016   for (i = 0; i < count_ideal; i++)
31017     {
31018       rtx elt_op = XVECEXP (op, 0, i);
31019       rtx elt_ideal = XVECEXP (ideal, 0, i);
31020
31021       if (!CONST_INT_P (elt_op)
31022           || INTVAL (elt_ideal) != INTVAL (elt_op))
31023         return false;
31024     }
31025   return true;
31026 }
31027
31028 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31029    in Thumb1.  */
31030 static bool
31031 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31032                          const_tree)
31033 {
31034   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31035   if (vcall_offset && TARGET_THUMB1)
31036     return false;
31037
31038   /* Otherwise ok.  */
31039   return true;
31040 }
31041
31042 /* Generate RTL for a conditional branch with rtx comparison CODE in
31043    mode CC_MODE. The destination of the unlikely conditional branch
31044    is LABEL_REF.  */
31045
31046 void
31047 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31048                           rtx label_ref)
31049 {
31050   rtx x;
31051   x = gen_rtx_fmt_ee (code, VOIDmode,
31052                       gen_rtx_REG (cc_mode, CC_REGNUM),
31053                       const0_rtx);
31054
31055   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31056                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31057                             pc_rtx);
31058   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31059 }
31060
31061 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31062
31063    For pure-code sections there is no letter code for this attribute, so
31064    output all the section flags numerically when this is needed.  */
31065
31066 static bool
31067 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31068 {
31069
31070   if (flags & SECTION_ARM_PURECODE)
31071     {
31072       *num = 0x20000000;
31073
31074       if (!(flags & SECTION_DEBUG))
31075         *num |= 0x2;
31076       if (flags & SECTION_EXCLUDE)
31077         *num |= 0x80000000;
31078       if (flags & SECTION_WRITE)
31079         *num |= 0x1;
31080       if (flags & SECTION_CODE)
31081         *num |= 0x4;
31082       if (flags & SECTION_MERGE)
31083         *num |= 0x10;
31084       if (flags & SECTION_STRINGS)
31085         *num |= 0x20;
31086       if (flags & SECTION_TLS)
31087         *num |= 0x400;
31088       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31089         *num |= 0x200;
31090
31091         return true;
31092     }
31093
31094   return false;
31095 }
31096
31097 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31098
31099    If pure-code is passed as an option, make sure all functions are in
31100    sections that have the SHF_ARM_PURECODE attribute.  */
31101
31102 static section *
31103 arm_function_section (tree decl, enum node_frequency freq,
31104                       bool startup, bool exit)
31105 {
31106   const char * section_name;
31107   section * sec;
31108
31109   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31110     return default_function_section (decl, freq, startup, exit);
31111
31112   if (!target_pure_code)
31113     return default_function_section (decl, freq, startup, exit);
31114
31115
31116   section_name = DECL_SECTION_NAME (decl);
31117
31118   /* If a function is not in a named section then it falls under the 'default'
31119      text section, also known as '.text'.  We can preserve previous behavior as
31120      the default text section already has the SHF_ARM_PURECODE section
31121      attribute.  */
31122   if (!section_name)
31123     {
31124       section *default_sec = default_function_section (decl, freq, startup,
31125                                                        exit);
31126
31127       /* If default_sec is not null, then it must be a special section like for
31128          example .text.startup.  We set the pure-code attribute and return the
31129          same section to preserve existing behavior.  */
31130       if (default_sec)
31131           default_sec->common.flags |= SECTION_ARM_PURECODE;
31132       return default_sec;
31133     }
31134
31135   /* Otherwise look whether a section has already been created with
31136      'section_name'.  */
31137   sec = get_named_section (decl, section_name, 0);
31138   if (!sec)
31139     /* If that is not the case passing NULL as the section's name to
31140        'get_named_section' will create a section with the declaration's
31141        section name.  */
31142     sec = get_named_section (decl, NULL, 0);
31143
31144   /* Set the SHF_ARM_PURECODE attribute.  */
31145   sec->common.flags |= SECTION_ARM_PURECODE;
31146
31147   return sec;
31148 }
31149
31150 /* Implements the TARGET_SECTION_FLAGS hook.
31151
31152    If DECL is a function declaration and pure-code is passed as an option
31153    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31154    section's name and RELOC indicates whether the declarations initializer may
31155    contain runtime relocations.  */
31156
31157 static unsigned int
31158 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31159 {
31160   unsigned int flags = default_section_type_flags (decl, name, reloc);
31161
31162   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31163     flags |= SECTION_ARM_PURECODE;
31164
31165   return flags;
31166 }
31167
31168 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31169
31170 static void
31171 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31172                            rtx op0, rtx op1,
31173                            rtx *quot_p, rtx *rem_p)
31174 {
31175   if (mode == SImode)
31176     gcc_assert (!TARGET_IDIV);
31177
31178   scalar_int_mode libval_mode
31179     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31180
31181   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31182                                         libval_mode,
31183                                         op0, GET_MODE (op0),
31184                                         op1, GET_MODE (op1));
31185
31186   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31187   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31188                                        GET_MODE_SIZE (mode));
31189
31190   gcc_assert (quotient);
31191   gcc_assert (remainder);
31192
31193   *quot_p = quotient;
31194   *rem_p = remainder;
31195 }
31196
31197 /*  This function checks for the availability of the coprocessor builtin passed
31198     in BUILTIN for the current target.  Returns true if it is available and
31199     false otherwise.  If a BUILTIN is passed for which this function has not
31200     been implemented it will cause an exception.  */
31201
31202 bool
31203 arm_coproc_builtin_available (enum unspecv builtin)
31204 {
31205   /* None of these builtins are available in Thumb mode if the target only
31206      supports Thumb-1.  */
31207   if (TARGET_THUMB1)
31208     return false;
31209
31210   switch (builtin)
31211     {
31212       case VUNSPEC_CDP:
31213       case VUNSPEC_LDC:
31214       case VUNSPEC_LDCL:
31215       case VUNSPEC_STC:
31216       case VUNSPEC_STCL:
31217       case VUNSPEC_MCR:
31218       case VUNSPEC_MRC:
31219         if (arm_arch4)
31220           return true;
31221         break;
31222       case VUNSPEC_CDP2:
31223       case VUNSPEC_LDC2:
31224       case VUNSPEC_LDC2L:
31225       case VUNSPEC_STC2:
31226       case VUNSPEC_STC2L:
31227       case VUNSPEC_MCR2:
31228       case VUNSPEC_MRC2:
31229         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31230            ARMv8-{A,M}.  */
31231         if (arm_arch5)
31232           return true;
31233         break;
31234       case VUNSPEC_MCRR:
31235       case VUNSPEC_MRRC:
31236         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31237            ARMv8-{A,M}.  */
31238         if (arm_arch6 || arm_arch5te)
31239           return true;
31240         break;
31241       case VUNSPEC_MCRR2:
31242       case VUNSPEC_MRRC2:
31243         if (arm_arch6)
31244           return true;
31245         break;
31246       default:
31247         gcc_unreachable ();
31248     }
31249   return false;
31250 }
31251
31252 /* This function returns true if OP is a valid memory operand for the ldc and
31253    stc coprocessor instructions and false otherwise.  */
31254
31255 bool
31256 arm_coproc_ldc_stc_legitimate_address (rtx op)
31257 {
31258   HOST_WIDE_INT range;
31259   /* Has to be a memory operand.  */
31260   if (!MEM_P (op))
31261     return false;
31262
31263   op = XEXP (op, 0);
31264
31265   /* We accept registers.  */
31266   if (REG_P (op))
31267     return true;
31268
31269   switch GET_CODE (op)
31270     {
31271       case PLUS:
31272         {
31273           /* Or registers with an offset.  */
31274           if (!REG_P (XEXP (op, 0)))
31275             return false;
31276
31277           op = XEXP (op, 1);
31278
31279           /* The offset must be an immediate though.  */
31280           if (!CONST_INT_P (op))
31281             return false;
31282
31283           range = INTVAL (op);
31284
31285           /* Within the range of [-1020,1020].  */
31286           if (!IN_RANGE (range, -1020, 1020))
31287             return false;
31288
31289           /* And a multiple of 4.  */
31290           return (range % 4) == 0;
31291         }
31292       case PRE_INC:
31293       case POST_INC:
31294       case PRE_DEC:
31295       case POST_DEC:
31296         return REG_P (XEXP (op, 0));
31297       default:
31298         gcc_unreachable ();
31299     }
31300   return false;
31301 }
31302
31303 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31304
31305    In VFPv1, VFP registers could only be accessed in the mode they were
31306    set, so subregs would be invalid there.  However, we don't support
31307    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31308
31309    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31310    VFP registers in little-endian order.  We can't describe that accurately to
31311    GCC, so avoid taking subregs of such values.
31312
31313    The only exception is going from a 128-bit to a 64-bit type.  In that
31314    case the data layout happens to be consistent for big-endian, so we
31315    explicitly allow that case.  */
31316
31317 static bool
31318 arm_can_change_mode_class (machine_mode from, machine_mode to,
31319                            reg_class_t rclass)
31320 {
31321   if (TARGET_BIG_END
31322       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31323       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31324           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31325       && reg_classes_intersect_p (VFP_REGS, rclass))
31326     return false;
31327   return true;
31328 }
31329
31330 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31331    strcpy from constants will be faster.  */
31332
31333 static HOST_WIDE_INT
31334 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31335 {
31336   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31337   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31338     return MAX (align, BITS_PER_WORD * factor);
31339   return align;
31340 }
31341
31342 #if CHECKING_P
31343 namespace selftest {
31344
31345 /* Scan the static data tables generated by parsecpu.awk looking for
31346    potential issues with the data.  We primarily check for
31347    inconsistencies in the option extensions at present (extensions
31348    that duplicate others but aren't marked as aliases).  Furthermore,
31349    for correct canonicalization later options must never be a subset
31350    of an earlier option.  Any extension should also only specify other
31351    feature bits and never an architecture bit.  The architecture is inferred
31352    from the declaration of the extension.  */
31353 static void
31354 arm_test_cpu_arch_data (void)
31355 {
31356   const arch_option *arch;
31357   const cpu_option *cpu;
31358   auto_sbitmap target_isa (isa_num_bits);
31359   auto_sbitmap isa1 (isa_num_bits);
31360   auto_sbitmap isa2 (isa_num_bits);
31361
31362   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31363     {
31364       const cpu_arch_extension *ext1, *ext2;
31365
31366       if (arch->common.extensions == NULL)
31367         continue;
31368
31369       arm_initialize_isa (target_isa, arch->common.isa_bits);
31370
31371       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31372         {
31373           if (ext1->alias)
31374             continue;
31375
31376           arm_initialize_isa (isa1, ext1->isa_bits);
31377           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31378             {
31379               if (ext2->alias || ext1->remove != ext2->remove)
31380                 continue;
31381
31382               arm_initialize_isa (isa2, ext2->isa_bits);
31383               /* If the option is a subset of the parent option, it doesn't
31384                  add anything and so isn't useful.  */
31385               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31386
31387               /* If the extension specifies any architectural bits then
31388                  disallow it.  Extensions should only specify feature bits.  */
31389               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31390             }
31391         }
31392     }
31393
31394   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31395     {
31396       const cpu_arch_extension *ext1, *ext2;
31397
31398       if (cpu->common.extensions == NULL)
31399         continue;
31400
31401       arm_initialize_isa (target_isa, arch->common.isa_bits);
31402
31403       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31404         {
31405           if (ext1->alias)
31406             continue;
31407
31408           arm_initialize_isa (isa1, ext1->isa_bits);
31409           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31410             {
31411               if (ext2->alias || ext1->remove != ext2->remove)
31412                 continue;
31413
31414               arm_initialize_isa (isa2, ext2->isa_bits);
31415               /* If the option is a subset of the parent option, it doesn't
31416                  add anything and so isn't useful.  */
31417               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31418
31419               /* If the extension specifies any architectural bits then
31420                  disallow it.  Extensions should only specify feature bits.  */
31421               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31422             }
31423         }
31424     }
31425 }
31426
31427 /* Scan the static data tables generated by parsecpu.awk looking for
31428    potential issues with the data.  Here we check for consistency between the
31429    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31430    a feature bit that is not defined by any FPU flag.  */
31431 static void
31432 arm_test_fpu_data (void)
31433 {
31434   auto_sbitmap isa_all_fpubits (isa_num_bits);
31435   auto_sbitmap fpubits (isa_num_bits);
31436   auto_sbitmap tmpset (isa_num_bits);
31437
31438   static const enum isa_feature fpu_bitlist[]
31439     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31440   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31441
31442   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31443   {
31444     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31445     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31446     bitmap_clear (isa_all_fpubits);
31447     bitmap_copy (isa_all_fpubits, tmpset);
31448   }
31449
31450   if (!bitmap_empty_p (isa_all_fpubits))
31451     {
31452         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31453                          " group that are not defined by any FPU.\n"
31454                          "       Check your arm-cpus.in.\n");
31455         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31456     }
31457 }
31458
31459 static void
31460 arm_run_selftests (void)
31461 {
31462   arm_test_cpu_arch_data ();
31463   arm_test_fpu_data ();
31464 }
31465 } /* Namespace selftest.  */
31466
31467 #undef TARGET_RUN_TARGET_SELFTESTS
31468 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31469 #endif /* CHECKING_P */
31470
31471 struct gcc_target targetm = TARGET_INITIALIZER;
31472
31473 #include "gt-arm.h"