gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 320 \f
 321 /* Table of machine attributes.  */
 322 static const struct attribute_spec arm_attribute_table[] =
 323 {
 324   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 325        affects_type_identity } */
 326   /* Function calls made to this symbol must be done indirectly, because
 327      it may lie outside of the 26 bit addressing range of a normal function
 328      call.  */
 329   { "long_call",    0, 0, false, true,  true,  NULL, false },
 330   /* Whereas these functions are always known to reside within the 26 bit
 331      addressing range.  */
 332   { "short_call",   0, 0, false, true,  true,  NULL, false },
 333   /* Specify the procedure call conventions for a function.  */
 334   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 335     false },
 336   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 337   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 338     false },
 339   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 340     false },
 341   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 342     false },
 343 #ifdef ARM_PE
 344   /* ARM/PE has three new attributes:
 345      interfacearm - ?
 346      dllexport - for exporting a function/variable that will live in a dll
 347      dllimport - for importing a function/variable from a dll
 348
 349      Microsoft allows multiple declspecs in one __declspec, separating
 350      them with spaces.  We do NOT support this.  Instead, use __declspec
 351      multiple times.
 352   */
 353   { "dllimport",    0, 0, true,  false, false, NULL, false },
 354   { "dllexport",    0, 0, true,  false, false, NULL, false },
 355   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 356     false },
 357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 358   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 360   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 361     false },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 365     arm_handle_cmse_nonsecure_entry, false },
 366   { "cmse_nonsecure_call", 0, 0, true, false, false,
 367     arm_handle_cmse_nonsecure_call, true },
 368   { NULL,           0, 0, false, false, false, NULL, false }
 369 };
 370 \f
 371 /* Initialize the GCC target structure.  */
 372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 373 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 375 #endif
 376
 377 #undef TARGET_LEGITIMIZE_ADDRESS
 378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 379
 380 #undef  TARGET_ATTRIBUTE_TABLE
 381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 382
 383 #undef  TARGET_INSERT_ATTRIBUTES
 384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 385
 386 #undef TARGET_ASM_FILE_START
 387 #define TARGET_ASM_FILE_START arm_file_start
 388 #undef TARGET_ASM_FILE_END
 389 #define TARGET_ASM_FILE_END arm_file_end
 390
 391 #undef  TARGET_ASM_ALIGNED_SI_OP
 392 #define TARGET_ASM_ALIGNED_SI_OP NULL
 393 #undef  TARGET_ASM_INTEGER
 394 #define TARGET_ASM_INTEGER arm_assemble_integer
 395
 396 #undef TARGET_PRINT_OPERAND
 397 #define TARGET_PRINT_OPERAND arm_print_operand
 398 #undef TARGET_PRINT_OPERAND_ADDRESS
 399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 402
 403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 405
 406 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 408
 409 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 411
 412 #undef TARGET_CAN_INLINE_P
 413 #define TARGET_CAN_INLINE_P arm_can_inline_p
 414
 415 #undef TARGET_RELAYOUT_FUNCTION
 416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 417
 418 #undef  TARGET_OPTION_OVERRIDE
 419 #define TARGET_OPTION_OVERRIDE arm_option_override
 420
 421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 423
 424 #undef TARGET_OPTION_SAVE
 425 #define TARGET_OPTION_SAVE arm_option_save
 426
 427 #undef TARGET_OPTION_RESTORE
 428 #define TARGET_OPTION_RESTORE arm_option_restore
 429
 430 #undef TARGET_OPTION_PRINT
 431 #define TARGET_OPTION_PRINT arm_option_print
 432
 433 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 435
 436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_P
 440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 444
 445 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 447
 448 #undef  TARGET_SCHED_ADJUST_COST
 449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 450
 451 #undef TARGET_SET_CURRENT_FUNCTION
 452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 453
 454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 456
 457 #undef TARGET_SCHED_REORDER
 458 #define TARGET_SCHED_REORDER arm_sched_reorder
 459
 460 #undef TARGET_REGISTER_MOVE_COST
 461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 462
 463 #undef TARGET_MEMORY_MOVE_COST
 464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 465
 466 #undef TARGET_ENCODE_SECTION_INFO
 467 #ifdef ARM_PE
 468 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 469 #else
 470 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 471 #endif
 472
 473 #undef  TARGET_STRIP_NAME_ENCODING
 474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 475
 476 #undef  TARGET_ASM_INTERNAL_LABEL
 477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 478
 479 #undef TARGET_FLOATN_MODE
 480 #define TARGET_FLOATN_MODE arm_floatn_mode
 481
 482 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 484
 485 #undef  TARGET_FUNCTION_VALUE
 486 #define TARGET_FUNCTION_VALUE arm_function_value
 487
 488 #undef  TARGET_LIBCALL_VALUE
 489 #define TARGET_LIBCALL_VALUE arm_libcall_value
 490
 491 #undef TARGET_FUNCTION_VALUE_REGNO_P
 492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 493
 494 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 496 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 498
 499 #undef  TARGET_RTX_COSTS
 500 #define TARGET_RTX_COSTS arm_rtx_costs
 501 #undef  TARGET_ADDRESS_COST
 502 #define TARGET_ADDRESS_COST arm_address_cost
 503
 504 #undef TARGET_SHIFT_TRUNCATION_MASK
 505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 514   arm_autovectorize_vector_sizes
 515
 516 #undef  TARGET_MACHINE_DEPENDENT_REORG
 517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 518
 519 #undef  TARGET_INIT_BUILTINS
 520 #define TARGET_INIT_BUILTINS  arm_init_builtins
 521 #undef  TARGET_EXPAND_BUILTIN
 522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 523 #undef  TARGET_BUILTIN_DECL
 524 #define TARGET_BUILTIN_DECL arm_builtin_decl
 525
 526 #undef TARGET_INIT_LIBFUNCS
 527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 528
 529 #undef TARGET_PROMOTE_FUNCTION_MODE
 530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 531 #undef TARGET_PROMOTE_PROTOTYPES
 532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 533 #undef TARGET_PASS_BY_REFERENCE
 534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 535 #undef TARGET_ARG_PARTIAL_BYTES
 536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 537 #undef TARGET_FUNCTION_ARG
 538 #define TARGET_FUNCTION_ARG arm_function_arg
 539 #undef TARGET_FUNCTION_ARG_ADVANCE
 540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 541 #undef TARGET_FUNCTION_ARG_PADDING
 542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 543 #undef TARGET_FUNCTION_ARG_BOUNDARY
 544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 545
 546 #undef  TARGET_SETUP_INCOMING_VARARGS
 547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 548
 549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 551
 552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 554 #undef TARGET_TRAMPOLINE_INIT
 555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 558
 559 #undef TARGET_WARN_FUNC_RETURN
 560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 561
 562 #undef TARGET_DEFAULT_SHORT_ENUMS
 563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 564
 565 #undef TARGET_ALIGN_ANON_BITFIELD
 566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 567
 568 #undef TARGET_NARROW_VOLATILE_BITFIELD
 569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 570
 571 #undef TARGET_CXX_GUARD_TYPE
 572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 573
 574 #undef TARGET_CXX_GUARD_MASK_BIT
 575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 576
 577 #undef TARGET_CXX_GET_COOKIE_SIZE
 578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 579
 580 #undef TARGET_CXX_COOKIE_HAS_SIZE
 581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 582
 583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 585
 586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 588
 589 #undef TARGET_CXX_USE_AEABI_ATEXIT
 590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 591
 592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 594   arm_cxx_determine_class_data_visibility
 595
 596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 598
 599 #undef TARGET_RETURN_IN_MSB
 600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 601
 602 #undef TARGET_RETURN_IN_MEMORY
 603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 604
 605 #undef TARGET_MUST_PASS_IN_STACK
 606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 607
 608 #if ARM_UNWIND_INFO
 609 #undef TARGET_ASM_UNWIND_EMIT
 610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 611
 612 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 613 #undef TARGET_ASM_TTYPE
 614 #define TARGET_ASM_TTYPE arm_output_ttype
 615
 616 #undef TARGET_ARM_EABI_UNWINDER
 617 #define TARGET_ARM_EABI_UNWINDER true
 618
 619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 621
 622 #endif /* ARM_UNWIND_INFO */
 623
 624 #undef TARGET_ASM_INIT_SECTIONS
 625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 626
 627 #undef TARGET_DWARF_REGISTER_SPAN
 628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 629
 630 #undef  TARGET_CANNOT_COPY_INSN_P
 631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 632
 633 #ifdef HAVE_AS_TLS
 634 #undef TARGET_HAVE_TLS
 635 #define TARGET_HAVE_TLS true
 636 #endif
 637
 638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 640
 641 #undef TARGET_LEGITIMATE_CONSTANT_P
 642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 643
 644 #undef TARGET_CANNOT_FORCE_CONST_MEM
 645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 646
 647 #undef TARGET_MAX_ANCHOR_OFFSET
 648 #define TARGET_MAX_ANCHOR_OFFSET 4095
 649
 650 /* The minimum is set such that the total size of the block
 651    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 652    divisible by eight, ensuring natural spacing of anchors.  */
 653 #undef TARGET_MIN_ANCHOR_OFFSET
 654 #define TARGET_MIN_ANCHOR_OFFSET -4088
 655
 656 #undef TARGET_SCHED_ISSUE_RATE
 657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 658
 659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 661   arm_first_cycle_multipass_dfa_lookahead
 662
 663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 665   arm_first_cycle_multipass_dfa_lookahead_guard
 666
 667 #undef TARGET_MANGLE_TYPE
 668 #define TARGET_MANGLE_TYPE arm_mangle_type
 669
 670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 672
 673 #undef TARGET_BUILD_BUILTIN_VA_LIST
 674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 675 #undef TARGET_EXPAND_BUILTIN_VA_START
 676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 679
 680 #ifdef HAVE_AS_TLS
 681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 683 #endif
 684
 685 #undef TARGET_LEGITIMATE_ADDRESS_P
 686 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 687
 688 #undef TARGET_PREFERRED_RELOAD_CLASS
 689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 690
 691 #undef TARGET_PROMOTED_TYPE
 692 #define TARGET_PROMOTED_TYPE arm_promoted_type
 693
 694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 696
 697 #undef TARGET_COMPUTE_FRAME_LAYOUT
 698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 699
 700 #undef TARGET_FRAME_POINTER_REQUIRED
 701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 702
 703 #undef TARGET_CAN_ELIMINATE
 704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 705
 706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 708
 709 #undef TARGET_CLASS_LIKELY_SPILLED_P
 710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 711
 712 #undef TARGET_VECTORIZE_BUILTINS
 713 #define TARGET_VECTORIZE_BUILTINS
 714
 715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 717   arm_builtin_vectorized_function
 718
 719 #undef TARGET_VECTOR_ALIGNMENT
 720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 721
 722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 724   arm_vector_alignment_reachable
 725
 726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 728   arm_builtin_support_vector_misalignment
 729
 730 #undef TARGET_PREFERRED_RENAME_CLASS
 731 #define TARGET_PREFERRED_RENAME_CLASS \
 732   arm_preferred_rename_class
 733
 734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 736   arm_vectorize_vec_perm_const_ok
 737
 738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 740   arm_builtin_vectorization_cost
 741 #undef TARGET_VECTORIZE_ADD_STMT_COST
 742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 743
 744 #undef TARGET_CANONICALIZE_COMPARISON
 745 #define TARGET_CANONICALIZE_COMPARISON \
 746   arm_canonicalize_comparison
 747
 748 #undef TARGET_ASAN_SHADOW_OFFSET
 749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 750
 751 #undef MAX_INSN_PER_IT_BLOCK
 752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 753
 754 #undef TARGET_CAN_USE_DOLOOP_P
 755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 756
 757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 759
 760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 762
 763 #undef TARGET_SCHED_FUSION_PRIORITY
 764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 765
 766 #undef  TARGET_ASM_FUNCTION_SECTION
 767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 768
 769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 771
 772 #undef TARGET_SECTION_TYPE_FLAGS
 773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 774
 775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 777
 778 #undef TARGET_C_EXCESS_PRECISION
 779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 780
 781 /* Although the architecture reserves bits 0 and 1, only the former is
 782    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 785
 786 #undef TARGET_FIXED_CONDITION_CODE_REGS
 787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 788
 789 #undef TARGET_HARD_REGNO_NREGS
 790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 791 #undef TARGET_HARD_REGNO_MODE_OK
 792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 793
 794 #undef TARGET_MODES_TIEABLE_P
 795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 796
 797 #undef TARGET_CAN_CHANGE_MODE_CLASS
 798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 799
 800 #undef TARGET_CONSTANT_ALIGNMENT
 801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 802 \f
 803 /* Obstack for minipool constant handling.  */
 804 static struct obstack minipool_obstack;
 805 static char *         minipool_startobj;
 806
 807 /* The maximum number of insns skipped which
 808    will be conditionalised if possible.  */
 809 static int max_insns_skipped = 5;
 810
 811 extern FILE * asm_out_file;
 812
 813 /* True if we are currently building a constant table.  */
 814 int making_const_table;
 815
 816 /* The processor for which instructions should be scheduled.  */
 817 enum processor_type arm_tune = TARGET_CPU_arm_none;
 818
 819 /* The current tuning set.  */
 820 const struct tune_params *current_tune;
 821
 822 /* Which floating point hardware to schedule for.  */
 823 int arm_fpu_attr;
 824
 825 /* Used for Thumb call_via trampolines.  */
 826 rtx thumb_call_via_label[14];
 827 static int thumb_call_reg_needed;
 828
 829 /* The bits in this mask specify which instruction scheduling options should
 830    be used.  */
 831 unsigned int tune_flags = 0;
 832
 833 /* The highest ARM architecture version supported by the
 834    target.  */
 835 enum base_architecture arm_base_arch = BASE_ARCH_0;
 836
 837 /* Active target architecture and tuning.  */
 838
 839 struct arm_build_target arm_active_target;
 840
 841 /* The following are used in the arm.md file as equivalents to bits
 842    in the above two flag variables.  */
 843
 844 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 845 int arm_arch3m = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 848 int arm_arch4 = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 851 int arm_arch4t = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 854 int arm_arch5 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 857 int arm_arch5e = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 860 int arm_arch5te = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 863 int arm_arch6 = 0;
 864
 865 /* Nonzero if this chip supports the ARM 6K extensions.  */
 866 int arm_arch6k = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 869 int arm_arch6kz = 0;
 870
 871 /* Nonzero if instructions present in ARMv6-M can be used.  */
 872 int arm_arch6m = 0;
 873
 874 /* Nonzero if this chip supports the ARM 7 extensions.  */
 875 int arm_arch7 = 0;
 876
 877 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 878 int arm_arch_lpae = 0;
 879
 880 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 881 int arm_arch_notm = 0;
 882
 883 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 884 int arm_arch7em = 0;
 885
 886 /* Nonzero if instructions present in ARMv8 can be used.  */
 887 int arm_arch8 = 0;
 888
 889 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 890 int arm_arch8_1 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 893 int arm_arch8_2 = 0;
 894
 895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 896    Architecture 8.2.  */
 897 int arm_fp16_inst = 0;
 898
 899 /* Nonzero if this chip can benefit from load scheduling.  */
 900 int arm_ld_sched = 0;
 901
 902 /* Nonzero if this chip is a StrongARM.  */
 903 int arm_tune_strongarm = 0;
 904
 905 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 906 int arm_arch_iwmmxt = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 909 int arm_arch_iwmmxt2 = 0;
 910
 911 /* Nonzero if this chip is an XScale.  */
 912 int arm_arch_xscale = 0;
 913
 914 /* Nonzero if tuning for XScale  */
 915 int arm_tune_xscale = 0;
 916
 917 /* Nonzero if we want to tune for stores that access the write-buffer.
 918    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 919 int arm_tune_wbuf = 0;
 920
 921 /* Nonzero if tuning for Cortex-A9.  */
 922 int arm_tune_cortex_a9 = 0;
 923
 924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 925    preprocessor.
 926    XXX This is a bit of a hack, it's intended to help work around
 927    problems in GLD which doesn't understand that armv5t code is
 928    interworking clean.  */
 929 int arm_cpp_interwork = 0;
 930
 931 /* Nonzero if chip supports Thumb 1.  */
 932 int arm_arch_thumb1;
 933
 934 /* Nonzero if chip supports Thumb 2.  */
 935 int arm_arch_thumb2;
 936
 937 /* Nonzero if chip supports integer division instruction.  */
 938 int arm_arch_arm_hwdiv;
 939 int arm_arch_thumb_hwdiv;
 940
 941 /* Nonzero if chip disallows volatile memory access in IT block.  */
 942 int arm_arch_no_volatile_ce;
 943
 944 /* Nonzero if we should use Neon to handle 64-bits operations rather
 945    than core registers.  */
 946 int prefer_neon_for_64bits = 0;
 947
 948 /* Nonzero if we shouldn't use literal pools.  */
 949 bool arm_disable_literal_pool = false;
 950
 951 /* The register number to be used for the PIC offset register.  */
 952 unsigned arm_pic_register = INVALID_REGNUM;
 953
 954 enum arm_pcs arm_pcs_default;
 955
 956 /* For an explanation of these variables, see final_prescan_insn below.  */
 957 int arm_ccfsm_state;
 958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 959 enum arm_cond_code arm_current_cc;
 960
 961 rtx arm_target_insn;
 962 int arm_target_label;
 963 /* The number of conditionally executed insns, including the current insn.  */
 964 int arm_condexec_count = 0;
 965 /* A bitmask specifying the patterns for the IT block.
 966    Zero means do not output an IT block before this insn. */
 967 int arm_condexec_mask = 0;
 968 /* The number of bits used in arm_condexec_mask.  */
 969 int arm_condexec_masklen = 0;
 970
 971 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 972 int arm_arch_crc = 0;
 973
 974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 975 int arm_arch_dotprod = 0;
 976
 977 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 978 int arm_arch_cmse = 0;
 979
 980 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 981 int arm_m_profile_small_mul = 0;
 982
 983 /* The condition codes of the ARM, and the inverse function.  */
 984 static const char * const arm_condition_codes[] =
 985 {
 986   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 987   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 988 };
 989
 990 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 991 int arm_regs_in_sequence[] =
 992 {
 993   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 994 };
 995
 996 #define ARM_LSL_NAME "lsl"
 997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 998
 999 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1002 \f
1003 /* Initialization code.  */
1004
1005 struct cpu_tune
1006 {
1007   enum processor_type scheduler;
1008   unsigned int tune_flags;
1009   const struct tune_params *tune;
1010 };
1011
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1014   {                                                             \
1015     num_slots,                                                  \
1016     l1_size,                                                    \
1017     l1_line_size                                                \
1018   }
1019
1020 /* arm generic vectorizer costs.  */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023   1,                                    /* scalar_stmt_cost.  */
1024   1,                                    /* scalar load_cost.  */
1025   1,                                    /* scalar_store_cost.  */
1026   1,                                    /* vec_stmt_cost.  */
1027   1,                                    /* vec_to_scalar_cost.  */
1028   1,                                    /* scalar_to_vec_cost.  */
1029   1,                                    /* vec_align_load_cost.  */
1030   1,                                    /* vec_unalign_load_cost.  */
1031   1,                                    /* vec_unalign_store_cost.  */
1032   1,                                    /* vec_store_cost.  */
1033   3,                                    /* cond_taken_branch_cost.  */
1034   1,                                    /* cond_not_taken_branch_cost.  */
1035 };
1036
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1038 #include "aarch-cost-tables.h"
1039
1040
1041
1042 const struct cpu_cost_table cortexa9_extra_costs =
1043 {
1044   /* ALU */
1045   {
1046     0,                  /* arith.  */
1047     0,                  /* logical.  */
1048     0,                  /* shift.  */
1049     COSTS_N_INSNS (1),  /* shift_reg.  */
1050     COSTS_N_INSNS (1),  /* arith_shift.  */
1051     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1052     0,                  /* log_shift.  */
1053     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1054     COSTS_N_INSNS (1),  /* extend.  */
1055     COSTS_N_INSNS (2),  /* extend_arith.  */
1056     COSTS_N_INSNS (1),  /* bfi.  */
1057     COSTS_N_INSNS (1),  /* bfx.  */
1058     0,                  /* clz.  */
1059     0,                  /* rev.  */
1060     0,                  /* non_exec.  */
1061     true                /* non_exec_costs_exec.  */
1062   },
1063   {
1064     /* MULT SImode */
1065     {
1066       COSTS_N_INSNS (3),        /* simple.  */
1067       COSTS_N_INSNS (3),        /* flag_setting.  */
1068       COSTS_N_INSNS (2),        /* extend.  */
1069       COSTS_N_INSNS (3),        /* add.  */
1070       COSTS_N_INSNS (2),        /* extend_add.  */
1071       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1072     },
1073     /* MULT DImode */
1074     {
1075       0,                        /* simple (N/A).  */
1076       0,                        /* flag_setting (N/A).  */
1077       COSTS_N_INSNS (4),        /* extend.  */
1078       0,                        /* add (N/A).  */
1079       COSTS_N_INSNS (4),        /* extend_add.  */
1080       0                         /* idiv (N/A).  */
1081     }
1082   },
1083   /* LD/ST */
1084   {
1085     COSTS_N_INSNS (2),  /* load.  */
1086     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1087     COSTS_N_INSNS (2),  /* ldrd.  */
1088     COSTS_N_INSNS (2),  /* ldm_1st.  */
1089     1,                  /* ldm_regs_per_insn_1st.  */
1090     2,                  /* ldm_regs_per_insn_subsequent.  */
1091     COSTS_N_INSNS (5),  /* loadf.  */
1092     COSTS_N_INSNS (5),  /* loadd.  */
1093     COSTS_N_INSNS (1),  /* load_unaligned.  */
1094     COSTS_N_INSNS (2),  /* store.  */
1095     COSTS_N_INSNS (2),  /* strd.  */
1096     COSTS_N_INSNS (2),  /* stm_1st.  */
1097     1,                  /* stm_regs_per_insn_1st.  */
1098     2,                  /* stm_regs_per_insn_subsequent.  */
1099     COSTS_N_INSNS (1),  /* storef.  */
1100     COSTS_N_INSNS (1),  /* stored.  */
1101     COSTS_N_INSNS (1),  /* store_unaligned.  */
1102     COSTS_N_INSNS (1),  /* loadv.  */
1103     COSTS_N_INSNS (1)   /* storev.  */
1104   },
1105   {
1106     /* FP SFmode */
1107     {
1108       COSTS_N_INSNS (14),       /* div.  */
1109       COSTS_N_INSNS (4),        /* mult.  */
1110       COSTS_N_INSNS (7),        /* mult_addsub. */
1111       COSTS_N_INSNS (30),       /* fma.  */
1112       COSTS_N_INSNS (3),        /* addsub.  */
1113       COSTS_N_INSNS (1),        /* fpconst.  */
1114       COSTS_N_INSNS (1),        /* neg.  */
1115       COSTS_N_INSNS (3),        /* compare.  */
1116       COSTS_N_INSNS (3),        /* widen.  */
1117       COSTS_N_INSNS (3),        /* narrow.  */
1118       COSTS_N_INSNS (3),        /* toint.  */
1119       COSTS_N_INSNS (3),        /* fromint.  */
1120       COSTS_N_INSNS (3)         /* roundint.  */
1121     },
1122     /* FP DFmode */
1123     {
1124       COSTS_N_INSNS (24),       /* div.  */
1125       COSTS_N_INSNS (5),        /* mult.  */
1126       COSTS_N_INSNS (8),        /* mult_addsub.  */
1127       COSTS_N_INSNS (30),       /* fma.  */
1128       COSTS_N_INSNS (3),        /* addsub.  */
1129       COSTS_N_INSNS (1),        /* fpconst.  */
1130       COSTS_N_INSNS (1),        /* neg.  */
1131       COSTS_N_INSNS (3),        /* compare.  */
1132       COSTS_N_INSNS (3),        /* widen.  */
1133       COSTS_N_INSNS (3),        /* narrow.  */
1134       COSTS_N_INSNS (3),        /* toint.  */
1135       COSTS_N_INSNS (3),        /* fromint.  */
1136       COSTS_N_INSNS (3)         /* roundint.  */
1137     }
1138   },
1139   /* Vector */
1140   {
1141     COSTS_N_INSNS (1)   /* alu.  */
1142   }
1143 };
1144
1145 const struct cpu_cost_table cortexa8_extra_costs =
1146 {
1147   /* ALU */
1148   {
1149     0,                  /* arith.  */
1150     0,                  /* logical.  */
1151     COSTS_N_INSNS (1),  /* shift.  */
1152     0,                  /* shift_reg.  */
1153     COSTS_N_INSNS (1),  /* arith_shift.  */
1154     0,                  /* arith_shift_reg.  */
1155     COSTS_N_INSNS (1),  /* log_shift.  */
1156     0,                  /* log_shift_reg.  */
1157     0,                  /* extend.  */
1158     0,                  /* extend_arith.  */
1159     0,                  /* bfi.  */
1160     0,                  /* bfx.  */
1161     0,                  /* clz.  */
1162     0,                  /* rev.  */
1163     0,                  /* non_exec.  */
1164     true                /* non_exec_costs_exec.  */
1165   },
1166   {
1167     /* MULT SImode */
1168     {
1169       COSTS_N_INSNS (1),        /* simple.  */
1170       COSTS_N_INSNS (1),        /* flag_setting.  */
1171       COSTS_N_INSNS (1),        /* extend.  */
1172       COSTS_N_INSNS (1),        /* add.  */
1173       COSTS_N_INSNS (1),        /* extend_add.  */
1174       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1175     },
1176     /* MULT DImode */
1177     {
1178       0,                        /* simple (N/A).  */
1179       0,                        /* flag_setting (N/A).  */
1180       COSTS_N_INSNS (2),        /* extend.  */
1181       0,                        /* add (N/A).  */
1182       COSTS_N_INSNS (2),        /* extend_add.  */
1183       0                         /* idiv (N/A).  */
1184     }
1185   },
1186   /* LD/ST */
1187   {
1188     COSTS_N_INSNS (1),  /* load.  */
1189     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1190     COSTS_N_INSNS (1),  /* ldrd.  */
1191     COSTS_N_INSNS (1),  /* ldm_1st.  */
1192     1,                  /* ldm_regs_per_insn_1st.  */
1193     2,                  /* ldm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (1),  /* loadf.  */
1195     COSTS_N_INSNS (1),  /* loadd.  */
1196     COSTS_N_INSNS (1),  /* load_unaligned.  */
1197     COSTS_N_INSNS (1),  /* store.  */
1198     COSTS_N_INSNS (1),  /* strd.  */
1199     COSTS_N_INSNS (1),  /* stm_1st.  */
1200     1,                  /* stm_regs_per_insn_1st.  */
1201     2,                  /* stm_regs_per_insn_subsequent.  */
1202     COSTS_N_INSNS (1),  /* storef.  */
1203     COSTS_N_INSNS (1),  /* stored.  */
1204     COSTS_N_INSNS (1),  /* store_unaligned.  */
1205     COSTS_N_INSNS (1),  /* loadv.  */
1206     COSTS_N_INSNS (1)   /* storev.  */
1207   },
1208   {
1209     /* FP SFmode */
1210     {
1211       COSTS_N_INSNS (36),       /* div.  */
1212       COSTS_N_INSNS (11),       /* mult.  */
1213       COSTS_N_INSNS (20),       /* mult_addsub. */
1214       COSTS_N_INSNS (30),       /* fma.  */
1215       COSTS_N_INSNS (9),        /* addsub.  */
1216       COSTS_N_INSNS (3),        /* fpconst.  */
1217       COSTS_N_INSNS (3),        /* neg.  */
1218       COSTS_N_INSNS (6),        /* compare.  */
1219       COSTS_N_INSNS (4),        /* widen.  */
1220       COSTS_N_INSNS (4),        /* narrow.  */
1221       COSTS_N_INSNS (8),        /* toint.  */
1222       COSTS_N_INSNS (8),        /* fromint.  */
1223       COSTS_N_INSNS (8)         /* roundint.  */
1224     },
1225     /* FP DFmode */
1226     {
1227       COSTS_N_INSNS (64),       /* div.  */
1228       COSTS_N_INSNS (16),       /* mult.  */
1229       COSTS_N_INSNS (25),       /* mult_addsub.  */
1230       COSTS_N_INSNS (30),       /* fma.  */
1231       COSTS_N_INSNS (9),        /* addsub.  */
1232       COSTS_N_INSNS (3),        /* fpconst.  */
1233       COSTS_N_INSNS (3),        /* neg.  */
1234       COSTS_N_INSNS (6),        /* compare.  */
1235       COSTS_N_INSNS (6),        /* widen.  */
1236       COSTS_N_INSNS (6),        /* narrow.  */
1237       COSTS_N_INSNS (8),        /* toint.  */
1238       COSTS_N_INSNS (8),        /* fromint.  */
1239       COSTS_N_INSNS (8)         /* roundint.  */
1240     }
1241   },
1242   /* Vector */
1243   {
1244     COSTS_N_INSNS (1)   /* alu.  */
1245   }
1246 };
1247
1248 const struct cpu_cost_table cortexa5_extra_costs =
1249 {
1250   /* ALU */
1251   {
1252     0,                  /* arith.  */
1253     0,                  /* logical.  */
1254     COSTS_N_INSNS (1),  /* shift.  */
1255     COSTS_N_INSNS (1),  /* shift_reg.  */
1256     COSTS_N_INSNS (1),  /* arith_shift.  */
1257     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1258     COSTS_N_INSNS (1),  /* log_shift.  */
1259     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1260     COSTS_N_INSNS (1),  /* extend.  */
1261     COSTS_N_INSNS (1),  /* extend_arith.  */
1262     COSTS_N_INSNS (1),  /* bfi.  */
1263     COSTS_N_INSNS (1),  /* bfx.  */
1264     COSTS_N_INSNS (1),  /* clz.  */
1265     COSTS_N_INSNS (1),  /* rev.  */
1266     0,                  /* non_exec.  */
1267     true                /* non_exec_costs_exec.  */
1268   },
1269
1270   {
1271     /* MULT SImode */
1272     {
1273       0,                        /* simple.  */
1274       COSTS_N_INSNS (1),        /* flag_setting.  */
1275       COSTS_N_INSNS (1),        /* extend.  */
1276       COSTS_N_INSNS (1),        /* add.  */
1277       COSTS_N_INSNS (1),        /* extend_add.  */
1278       COSTS_N_INSNS (7)         /* idiv.  */
1279     },
1280     /* MULT DImode */
1281     {
1282       0,                        /* simple (N/A).  */
1283       0,                        /* flag_setting (N/A).  */
1284       COSTS_N_INSNS (1),        /* extend.  */
1285       0,                        /* add.  */
1286       COSTS_N_INSNS (2),        /* extend_add.  */
1287       0                         /* idiv (N/A).  */
1288     }
1289   },
1290   /* LD/ST */
1291   {
1292     COSTS_N_INSNS (1),  /* load.  */
1293     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1294     COSTS_N_INSNS (6),  /* ldrd.  */
1295     COSTS_N_INSNS (1),  /* ldm_1st.  */
1296     1,                  /* ldm_regs_per_insn_1st.  */
1297     2,                  /* ldm_regs_per_insn_subsequent.  */
1298     COSTS_N_INSNS (2),  /* loadf.  */
1299     COSTS_N_INSNS (4),  /* loadd.  */
1300     COSTS_N_INSNS (1),  /* load_unaligned.  */
1301     COSTS_N_INSNS (1),  /* store.  */
1302     COSTS_N_INSNS (3),  /* strd.  */
1303     COSTS_N_INSNS (1),  /* stm_1st.  */
1304     1,                  /* stm_regs_per_insn_1st.  */
1305     2,                  /* stm_regs_per_insn_subsequent.  */
1306     COSTS_N_INSNS (2),  /* storef.  */
1307     COSTS_N_INSNS (2),  /* stored.  */
1308     COSTS_N_INSNS (1),  /* store_unaligned.  */
1309     COSTS_N_INSNS (1),  /* loadv.  */
1310     COSTS_N_INSNS (1)   /* storev.  */
1311   },
1312   {
1313     /* FP SFmode */
1314     {
1315       COSTS_N_INSNS (15),       /* div.  */
1316       COSTS_N_INSNS (3),        /* mult.  */
1317       COSTS_N_INSNS (7),        /* mult_addsub. */
1318       COSTS_N_INSNS (7),        /* fma.  */
1319       COSTS_N_INSNS (3),        /* addsub.  */
1320       COSTS_N_INSNS (3),        /* fpconst.  */
1321       COSTS_N_INSNS (3),        /* neg.  */
1322       COSTS_N_INSNS (3),        /* compare.  */
1323       COSTS_N_INSNS (3),        /* widen.  */
1324       COSTS_N_INSNS (3),        /* narrow.  */
1325       COSTS_N_INSNS (3),        /* toint.  */
1326       COSTS_N_INSNS (3),        /* fromint.  */
1327       COSTS_N_INSNS (3)         /* roundint.  */
1328     },
1329     /* FP DFmode */
1330     {
1331       COSTS_N_INSNS (30),       /* div.  */
1332       COSTS_N_INSNS (6),        /* mult.  */
1333       COSTS_N_INSNS (10),       /* mult_addsub.  */
1334       COSTS_N_INSNS (7),        /* fma.  */
1335       COSTS_N_INSNS (3),        /* addsub.  */
1336       COSTS_N_INSNS (3),        /* fpconst.  */
1337       COSTS_N_INSNS (3),        /* neg.  */
1338       COSTS_N_INSNS (3),        /* compare.  */
1339       COSTS_N_INSNS (3),        /* widen.  */
1340       COSTS_N_INSNS (3),        /* narrow.  */
1341       COSTS_N_INSNS (3),        /* toint.  */
1342       COSTS_N_INSNS (3),        /* fromint.  */
1343       COSTS_N_INSNS (3)         /* roundint.  */
1344     }
1345   },
1346   /* Vector */
1347   {
1348     COSTS_N_INSNS (1)   /* alu.  */
1349   }
1350 };
1351
1352
1353 const struct cpu_cost_table cortexa7_extra_costs =
1354 {
1355   /* ALU */
1356   {
1357     0,                  /* arith.  */
1358     0,                  /* logical.  */
1359     COSTS_N_INSNS (1),  /* shift.  */
1360     COSTS_N_INSNS (1),  /* shift_reg.  */
1361     COSTS_N_INSNS (1),  /* arith_shift.  */
1362     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1363     COSTS_N_INSNS (1),  /* log_shift.  */
1364     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1365     COSTS_N_INSNS (1),  /* extend.  */
1366     COSTS_N_INSNS (1),  /* extend_arith.  */
1367     COSTS_N_INSNS (1),  /* bfi.  */
1368     COSTS_N_INSNS (1),  /* bfx.  */
1369     COSTS_N_INSNS (1),  /* clz.  */
1370     COSTS_N_INSNS (1),  /* rev.  */
1371     0,                  /* non_exec.  */
1372     true                /* non_exec_costs_exec.  */
1373   },
1374
1375   {
1376     /* MULT SImode */
1377     {
1378       0,                        /* simple.  */
1379       COSTS_N_INSNS (1),        /* flag_setting.  */
1380       COSTS_N_INSNS (1),        /* extend.  */
1381       COSTS_N_INSNS (1),        /* add.  */
1382       COSTS_N_INSNS (1),        /* extend_add.  */
1383       COSTS_N_INSNS (7)         /* idiv.  */
1384     },
1385     /* MULT DImode */
1386     {
1387       0,                        /* simple (N/A).  */
1388       0,                        /* flag_setting (N/A).  */
1389       COSTS_N_INSNS (1),        /* extend.  */
1390       0,                        /* add.  */
1391       COSTS_N_INSNS (2),        /* extend_add.  */
1392       0                         /* idiv (N/A).  */
1393     }
1394   },
1395   /* LD/ST */
1396   {
1397     COSTS_N_INSNS (1),  /* load.  */
1398     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1399     COSTS_N_INSNS (3),  /* ldrd.  */
1400     COSTS_N_INSNS (1),  /* ldm_1st.  */
1401     1,                  /* ldm_regs_per_insn_1st.  */
1402     2,                  /* ldm_regs_per_insn_subsequent.  */
1403     COSTS_N_INSNS (2),  /* loadf.  */
1404     COSTS_N_INSNS (2),  /* loadd.  */
1405     COSTS_N_INSNS (1),  /* load_unaligned.  */
1406     COSTS_N_INSNS (1),  /* store.  */
1407     COSTS_N_INSNS (3),  /* strd.  */
1408     COSTS_N_INSNS (1),  /* stm_1st.  */
1409     1,                  /* stm_regs_per_insn_1st.  */
1410     2,                  /* stm_regs_per_insn_subsequent.  */
1411     COSTS_N_INSNS (2),  /* storef.  */
1412     COSTS_N_INSNS (2),  /* stored.  */
1413     COSTS_N_INSNS (1),  /* store_unaligned.  */
1414     COSTS_N_INSNS (1),  /* loadv.  */
1415     COSTS_N_INSNS (1)   /* storev.  */
1416   },
1417   {
1418     /* FP SFmode */
1419     {
1420       COSTS_N_INSNS (15),       /* div.  */
1421       COSTS_N_INSNS (3),        /* mult.  */
1422       COSTS_N_INSNS (7),        /* mult_addsub. */
1423       COSTS_N_INSNS (7),        /* fma.  */
1424       COSTS_N_INSNS (3),        /* addsub.  */
1425       COSTS_N_INSNS (3),        /* fpconst.  */
1426       COSTS_N_INSNS (3),        /* neg.  */
1427       COSTS_N_INSNS (3),        /* compare.  */
1428       COSTS_N_INSNS (3),        /* widen.  */
1429       COSTS_N_INSNS (3),        /* narrow.  */
1430       COSTS_N_INSNS (3),        /* toint.  */
1431       COSTS_N_INSNS (3),        /* fromint.  */
1432       COSTS_N_INSNS (3)         /* roundint.  */
1433     },
1434     /* FP DFmode */
1435     {
1436       COSTS_N_INSNS (30),       /* div.  */
1437       COSTS_N_INSNS (6),        /* mult.  */
1438       COSTS_N_INSNS (10),       /* mult_addsub.  */
1439       COSTS_N_INSNS (7),        /* fma.  */
1440       COSTS_N_INSNS (3),        /* addsub.  */
1441       COSTS_N_INSNS (3),        /* fpconst.  */
1442       COSTS_N_INSNS (3),        /* neg.  */
1443       COSTS_N_INSNS (3),        /* compare.  */
1444       COSTS_N_INSNS (3),        /* widen.  */
1445       COSTS_N_INSNS (3),        /* narrow.  */
1446       COSTS_N_INSNS (3),        /* toint.  */
1447       COSTS_N_INSNS (3),        /* fromint.  */
1448       COSTS_N_INSNS (3)         /* roundint.  */
1449     }
1450   },
1451   /* Vector */
1452   {
1453     COSTS_N_INSNS (1)   /* alu.  */
1454   }
1455 };
1456
1457 const struct cpu_cost_table cortexa12_extra_costs =
1458 {
1459   /* ALU */
1460   {
1461     0,                  /* arith.  */
1462     0,                  /* logical.  */
1463     0,                  /* shift.  */
1464     COSTS_N_INSNS (1),  /* shift_reg.  */
1465     COSTS_N_INSNS (1),  /* arith_shift.  */
1466     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1467     COSTS_N_INSNS (1),  /* log_shift.  */
1468     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1469     0,                  /* extend.  */
1470     COSTS_N_INSNS (1),  /* extend_arith.  */
1471     0,                  /* bfi.  */
1472     COSTS_N_INSNS (1),  /* bfx.  */
1473     COSTS_N_INSNS (1),  /* clz.  */
1474     COSTS_N_INSNS (1),  /* rev.  */
1475     0,                  /* non_exec.  */
1476     true                /* non_exec_costs_exec.  */
1477   },
1478   /* MULT SImode */
1479   {
1480     {
1481       COSTS_N_INSNS (2),        /* simple.  */
1482       COSTS_N_INSNS (3),        /* flag_setting.  */
1483       COSTS_N_INSNS (2),        /* extend.  */
1484       COSTS_N_INSNS (3),        /* add.  */
1485       COSTS_N_INSNS (2),        /* extend_add.  */
1486       COSTS_N_INSNS (18)        /* idiv.  */
1487     },
1488     /* MULT DImode */
1489     {
1490       0,                        /* simple (N/A).  */
1491       0,                        /* flag_setting (N/A).  */
1492       COSTS_N_INSNS (3),        /* extend.  */
1493       0,                        /* add (N/A).  */
1494       COSTS_N_INSNS (3),        /* extend_add.  */
1495       0                         /* idiv (N/A).  */
1496     }
1497   },
1498   /* LD/ST */
1499   {
1500     COSTS_N_INSNS (3),  /* load.  */
1501     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1502     COSTS_N_INSNS (3),  /* ldrd.  */
1503     COSTS_N_INSNS (3),  /* ldm_1st.  */
1504     1,                  /* ldm_regs_per_insn_1st.  */
1505     2,                  /* ldm_regs_per_insn_subsequent.  */
1506     COSTS_N_INSNS (3),  /* loadf.  */
1507     COSTS_N_INSNS (3),  /* loadd.  */
1508     0,                  /* load_unaligned.  */
1509     0,                  /* store.  */
1510     0,                  /* strd.  */
1511     0,                  /* stm_1st.  */
1512     1,                  /* stm_regs_per_insn_1st.  */
1513     2,                  /* stm_regs_per_insn_subsequent.  */
1514     COSTS_N_INSNS (2),  /* storef.  */
1515     COSTS_N_INSNS (2),  /* stored.  */
1516     0,                  /* store_unaligned.  */
1517     COSTS_N_INSNS (1),  /* loadv.  */
1518     COSTS_N_INSNS (1)   /* storev.  */
1519   },
1520   {
1521     /* FP SFmode */
1522     {
1523       COSTS_N_INSNS (17),       /* div.  */
1524       COSTS_N_INSNS (4),        /* mult.  */
1525       COSTS_N_INSNS (8),        /* mult_addsub. */
1526       COSTS_N_INSNS (8),        /* fma.  */
1527       COSTS_N_INSNS (4),        /* addsub.  */
1528       COSTS_N_INSNS (2),        /* fpconst. */
1529       COSTS_N_INSNS (2),        /* neg.  */
1530       COSTS_N_INSNS (2),        /* compare.  */
1531       COSTS_N_INSNS (4),        /* widen.  */
1532       COSTS_N_INSNS (4),        /* narrow.  */
1533       COSTS_N_INSNS (4),        /* toint.  */
1534       COSTS_N_INSNS (4),        /* fromint.  */
1535       COSTS_N_INSNS (4)         /* roundint.  */
1536     },
1537     /* FP DFmode */
1538     {
1539       COSTS_N_INSNS (31),       /* div.  */
1540       COSTS_N_INSNS (4),        /* mult.  */
1541       COSTS_N_INSNS (8),        /* mult_addsub.  */
1542       COSTS_N_INSNS (8),        /* fma.  */
1543       COSTS_N_INSNS (4),        /* addsub.  */
1544       COSTS_N_INSNS (2),        /* fpconst.  */
1545       COSTS_N_INSNS (2),        /* neg.  */
1546       COSTS_N_INSNS (2),        /* compare.  */
1547       COSTS_N_INSNS (4),        /* widen.  */
1548       COSTS_N_INSNS (4),        /* narrow.  */
1549       COSTS_N_INSNS (4),        /* toint.  */
1550       COSTS_N_INSNS (4),        /* fromint.  */
1551       COSTS_N_INSNS (4)         /* roundint.  */
1552     }
1553   },
1554   /* Vector */
1555   {
1556     COSTS_N_INSNS (1)   /* alu.  */
1557   }
1558 };
1559
1560 const struct cpu_cost_table cortexa15_extra_costs =
1561 {
1562   /* ALU */
1563   {
1564     0,                  /* arith.  */
1565     0,                  /* logical.  */
1566     0,                  /* shift.  */
1567     0,                  /* shift_reg.  */
1568     COSTS_N_INSNS (1),  /* arith_shift.  */
1569     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1570     COSTS_N_INSNS (1),  /* log_shift.  */
1571     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1572     0,                  /* extend.  */
1573     COSTS_N_INSNS (1),  /* extend_arith.  */
1574     COSTS_N_INSNS (1),  /* bfi.  */
1575     0,                  /* bfx.  */
1576     0,                  /* clz.  */
1577     0,                  /* rev.  */
1578     0,                  /* non_exec.  */
1579     true                /* non_exec_costs_exec.  */
1580   },
1581   /* MULT SImode */
1582   {
1583     {
1584       COSTS_N_INSNS (2),        /* simple.  */
1585       COSTS_N_INSNS (3),        /* flag_setting.  */
1586       COSTS_N_INSNS (2),        /* extend.  */
1587       COSTS_N_INSNS (2),        /* add.  */
1588       COSTS_N_INSNS (2),        /* extend_add.  */
1589       COSTS_N_INSNS (18)        /* idiv.  */
1590     },
1591     /* MULT DImode */
1592     {
1593       0,                        /* simple (N/A).  */
1594       0,                        /* flag_setting (N/A).  */
1595       COSTS_N_INSNS (3),        /* extend.  */
1596       0,                        /* add (N/A).  */
1597       COSTS_N_INSNS (3),        /* extend_add.  */
1598       0                         /* idiv (N/A).  */
1599     }
1600   },
1601   /* LD/ST */
1602   {
1603     COSTS_N_INSNS (3),  /* load.  */
1604     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1605     COSTS_N_INSNS (3),  /* ldrd.  */
1606     COSTS_N_INSNS (4),  /* ldm_1st.  */
1607     1,                  /* ldm_regs_per_insn_1st.  */
1608     2,                  /* ldm_regs_per_insn_subsequent.  */
1609     COSTS_N_INSNS (4),  /* loadf.  */
1610     COSTS_N_INSNS (4),  /* loadd.  */
1611     0,                  /* load_unaligned.  */
1612     0,                  /* store.  */
1613     0,                  /* strd.  */
1614     COSTS_N_INSNS (1),  /* stm_1st.  */
1615     1,                  /* stm_regs_per_insn_1st.  */
1616     2,                  /* stm_regs_per_insn_subsequent.  */
1617     0,                  /* storef.  */
1618     0,                  /* stored.  */
1619     0,                  /* store_unaligned.  */
1620     COSTS_N_INSNS (1),  /* loadv.  */
1621     COSTS_N_INSNS (1)   /* storev.  */
1622   },
1623   {
1624     /* FP SFmode */
1625     {
1626       COSTS_N_INSNS (17),       /* div.  */
1627       COSTS_N_INSNS (4),        /* mult.  */
1628       COSTS_N_INSNS (8),        /* mult_addsub. */
1629       COSTS_N_INSNS (8),        /* fma.  */
1630       COSTS_N_INSNS (4),        /* addsub.  */
1631       COSTS_N_INSNS (2),        /* fpconst. */
1632       COSTS_N_INSNS (2),        /* neg.  */
1633       COSTS_N_INSNS (5),        /* compare.  */
1634       COSTS_N_INSNS (4),        /* widen.  */
1635       COSTS_N_INSNS (4),        /* narrow.  */
1636       COSTS_N_INSNS (4),        /* toint.  */
1637       COSTS_N_INSNS (4),        /* fromint.  */
1638       COSTS_N_INSNS (4)         /* roundint.  */
1639     },
1640     /* FP DFmode */
1641     {
1642       COSTS_N_INSNS (31),       /* div.  */
1643       COSTS_N_INSNS (4),        /* mult.  */
1644       COSTS_N_INSNS (8),        /* mult_addsub.  */
1645       COSTS_N_INSNS (8),        /* fma.  */
1646       COSTS_N_INSNS (4),        /* addsub.  */
1647       COSTS_N_INSNS (2),        /* fpconst.  */
1648       COSTS_N_INSNS (2),        /* neg.  */
1649       COSTS_N_INSNS (2),        /* compare.  */
1650       COSTS_N_INSNS (4),        /* widen.  */
1651       COSTS_N_INSNS (4),        /* narrow.  */
1652       COSTS_N_INSNS (4),        /* toint.  */
1653       COSTS_N_INSNS (4),        /* fromint.  */
1654       COSTS_N_INSNS (4)         /* roundint.  */
1655     }
1656   },
1657   /* Vector */
1658   {
1659     COSTS_N_INSNS (1)   /* alu.  */
1660   }
1661 };
1662
1663 const struct cpu_cost_table v7m_extra_costs =
1664 {
1665   /* ALU */
1666   {
1667     0,                  /* arith.  */
1668     0,                  /* logical.  */
1669     0,                  /* shift.  */
1670     0,                  /* shift_reg.  */
1671     0,                  /* arith_shift.  */
1672     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1673     0,                  /* log_shift.  */
1674     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1675     0,                  /* extend.  */
1676     COSTS_N_INSNS (1),  /* extend_arith.  */
1677     0,                  /* bfi.  */
1678     0,                  /* bfx.  */
1679     0,                  /* clz.  */
1680     0,                  /* rev.  */
1681     COSTS_N_INSNS (1),  /* non_exec.  */
1682     false               /* non_exec_costs_exec.  */
1683   },
1684   {
1685     /* MULT SImode */
1686     {
1687       COSTS_N_INSNS (1),        /* simple.  */
1688       COSTS_N_INSNS (1),        /* flag_setting.  */
1689       COSTS_N_INSNS (2),        /* extend.  */
1690       COSTS_N_INSNS (1),        /* add.  */
1691       COSTS_N_INSNS (3),        /* extend_add.  */
1692       COSTS_N_INSNS (8)         /* idiv.  */
1693     },
1694     /* MULT DImode */
1695     {
1696       0,                        /* simple (N/A).  */
1697       0,                        /* flag_setting (N/A).  */
1698       COSTS_N_INSNS (2),        /* extend.  */
1699       0,                        /* add (N/A).  */
1700       COSTS_N_INSNS (3),        /* extend_add.  */
1701       0                         /* idiv (N/A).  */
1702     }
1703   },
1704   /* LD/ST */
1705   {
1706     COSTS_N_INSNS (2),  /* load.  */
1707     0,                  /* load_sign_extend.  */
1708     COSTS_N_INSNS (3),  /* ldrd.  */
1709     COSTS_N_INSNS (2),  /* ldm_1st.  */
1710     1,                  /* ldm_regs_per_insn_1st.  */
1711     1,                  /* ldm_regs_per_insn_subsequent.  */
1712     COSTS_N_INSNS (2),  /* loadf.  */
1713     COSTS_N_INSNS (3),  /* loadd.  */
1714     COSTS_N_INSNS (1),  /* load_unaligned.  */
1715     COSTS_N_INSNS (2),  /* store.  */
1716     COSTS_N_INSNS (3),  /* strd.  */
1717     COSTS_N_INSNS (2),  /* stm_1st.  */
1718     1,                  /* stm_regs_per_insn_1st.  */
1719     1,                  /* stm_regs_per_insn_subsequent.  */
1720     COSTS_N_INSNS (2),  /* storef.  */
1721     COSTS_N_INSNS (3),  /* stored.  */
1722     COSTS_N_INSNS (1),  /* store_unaligned.  */
1723     COSTS_N_INSNS (1),  /* loadv.  */
1724     COSTS_N_INSNS (1)   /* storev.  */
1725   },
1726   {
1727     /* FP SFmode */
1728     {
1729       COSTS_N_INSNS (7),        /* div.  */
1730       COSTS_N_INSNS (2),        /* mult.  */
1731       COSTS_N_INSNS (5),        /* mult_addsub.  */
1732       COSTS_N_INSNS (3),        /* fma.  */
1733       COSTS_N_INSNS (1),        /* addsub.  */
1734       0,                        /* fpconst.  */
1735       0,                        /* neg.  */
1736       0,                        /* compare.  */
1737       0,                        /* widen.  */
1738       0,                        /* narrow.  */
1739       0,                        /* toint.  */
1740       0,                        /* fromint.  */
1741       0                         /* roundint.  */
1742     },
1743     /* FP DFmode */
1744     {
1745       COSTS_N_INSNS (15),       /* div.  */
1746       COSTS_N_INSNS (5),        /* mult.  */
1747       COSTS_N_INSNS (7),        /* mult_addsub.  */
1748       COSTS_N_INSNS (7),        /* fma.  */
1749       COSTS_N_INSNS (3),        /* addsub.  */
1750       0,                        /* fpconst.  */
1751       0,                        /* neg.  */
1752       0,                        /* compare.  */
1753       0,                        /* widen.  */
1754       0,                        /* narrow.  */
1755       0,                        /* toint.  */
1756       0,                        /* fromint.  */
1757       0                         /* roundint.  */
1758     }
1759   },
1760   /* Vector */
1761   {
1762     COSTS_N_INSNS (1)   /* alu.  */
1763   }
1764 };
1765
1766 const struct tune_params arm_slowmul_tune =
1767 {
1768   &generic_extra_costs,                 /* Insn extra costs.  */
1769   NULL,                                 /* Sched adj cost.  */
1770   arm_default_branch_cost,
1771   &arm_default_vec_cost,
1772   3,                                            /* Constant limit.  */
1773   5,                                            /* Max cond insns.  */
1774   8,                                            /* Memset max inline.  */
1775   1,                                            /* Issue rate.  */
1776   ARM_PREFETCH_NOT_BENEFICIAL,
1777   tune_params::PREF_CONST_POOL_TRUE,
1778   tune_params::PREF_LDRD_FALSE,
1779   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1780   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1781   tune_params::DISPARAGE_FLAGS_NEITHER,
1782   tune_params::PREF_NEON_64_FALSE,
1783   tune_params::PREF_NEON_STRINGOPS_FALSE,
1784   tune_params::FUSE_NOTHING,
1785   tune_params::SCHED_AUTOPREF_OFF
1786 };
1787
1788 const struct tune_params arm_fastmul_tune =
1789 {
1790   &generic_extra_costs,                 /* Insn extra costs.  */
1791   NULL,                                 /* Sched adj cost.  */
1792   arm_default_branch_cost,
1793   &arm_default_vec_cost,
1794   1,                                            /* Constant limit.  */
1795   5,                                            /* Max cond insns.  */
1796   8,                                            /* Memset max inline.  */
1797   1,                                            /* Issue rate.  */
1798   ARM_PREFETCH_NOT_BENEFICIAL,
1799   tune_params::PREF_CONST_POOL_TRUE,
1800   tune_params::PREF_LDRD_FALSE,
1801   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1802   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1803   tune_params::DISPARAGE_FLAGS_NEITHER,
1804   tune_params::PREF_NEON_64_FALSE,
1805   tune_params::PREF_NEON_STRINGOPS_FALSE,
1806   tune_params::FUSE_NOTHING,
1807   tune_params::SCHED_AUTOPREF_OFF
1808 };
1809
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1812
1813 const struct tune_params arm_strongarm_tune =
1814 {
1815   &generic_extra_costs,                 /* Insn extra costs.  */
1816   NULL,                                 /* Sched adj cost.  */
1817   arm_default_branch_cost,
1818   &arm_default_vec_cost,
1819   1,                                            /* Constant limit.  */
1820   3,                                            /* Max cond insns.  */
1821   8,                                            /* Memset max inline.  */
1822   1,                                            /* Issue rate.  */
1823   ARM_PREFETCH_NOT_BENEFICIAL,
1824   tune_params::PREF_CONST_POOL_TRUE,
1825   tune_params::PREF_LDRD_FALSE,
1826   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1827   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1828   tune_params::DISPARAGE_FLAGS_NEITHER,
1829   tune_params::PREF_NEON_64_FALSE,
1830   tune_params::PREF_NEON_STRINGOPS_FALSE,
1831   tune_params::FUSE_NOTHING,
1832   tune_params::SCHED_AUTOPREF_OFF
1833 };
1834
1835 const struct tune_params arm_xscale_tune =
1836 {
1837   &generic_extra_costs,                 /* Insn extra costs.  */
1838   xscale_sched_adjust_cost,
1839   arm_default_branch_cost,
1840   &arm_default_vec_cost,
1841   2,                                            /* Constant limit.  */
1842   3,                                            /* Max cond insns.  */
1843   8,                                            /* Memset max inline.  */
1844   1,                                            /* Issue rate.  */
1845   ARM_PREFETCH_NOT_BENEFICIAL,
1846   tune_params::PREF_CONST_POOL_TRUE,
1847   tune_params::PREF_LDRD_FALSE,
1848   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1849   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1850   tune_params::DISPARAGE_FLAGS_NEITHER,
1851   tune_params::PREF_NEON_64_FALSE,
1852   tune_params::PREF_NEON_STRINGOPS_FALSE,
1853   tune_params::FUSE_NOTHING,
1854   tune_params::SCHED_AUTOPREF_OFF
1855 };
1856
1857 const struct tune_params arm_9e_tune =
1858 {
1859   &generic_extra_costs,                 /* Insn extra costs.  */
1860   NULL,                                 /* Sched adj cost.  */
1861   arm_default_branch_cost,
1862   &arm_default_vec_cost,
1863   1,                                            /* Constant limit.  */
1864   5,                                            /* Max cond insns.  */
1865   8,                                            /* Memset max inline.  */
1866   1,                                            /* Issue rate.  */
1867   ARM_PREFETCH_NOT_BENEFICIAL,
1868   tune_params::PREF_CONST_POOL_TRUE,
1869   tune_params::PREF_LDRD_FALSE,
1870   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1871   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1872   tune_params::DISPARAGE_FLAGS_NEITHER,
1873   tune_params::PREF_NEON_64_FALSE,
1874   tune_params::PREF_NEON_STRINGOPS_FALSE,
1875   tune_params::FUSE_NOTHING,
1876   tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_marvell_pj4_tune =
1880 {
1881   &generic_extra_costs,                 /* Insn extra costs.  */
1882   NULL,                                 /* Sched adj cost.  */
1883   arm_default_branch_cost,
1884   &arm_default_vec_cost,
1885   1,                                            /* Constant limit.  */
1886   5,                                            /* Max cond insns.  */
1887   8,                                            /* Memset max inline.  */
1888   2,                                            /* Issue rate.  */
1889   ARM_PREFETCH_NOT_BENEFICIAL,
1890   tune_params::PREF_CONST_POOL_TRUE,
1891   tune_params::PREF_LDRD_FALSE,
1892   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1893   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1894   tune_params::DISPARAGE_FLAGS_NEITHER,
1895   tune_params::PREF_NEON_64_FALSE,
1896   tune_params::PREF_NEON_STRINGOPS_FALSE,
1897   tune_params::FUSE_NOTHING,
1898   tune_params::SCHED_AUTOPREF_OFF
1899 };
1900
1901 const struct tune_params arm_v6t2_tune =
1902 {
1903   &generic_extra_costs,                 /* Insn extra costs.  */
1904   NULL,                                 /* Sched adj cost.  */
1905   arm_default_branch_cost,
1906   &arm_default_vec_cost,
1907   1,                                            /* Constant limit.  */
1908   5,                                            /* Max cond insns.  */
1909   8,                                            /* Memset max inline.  */
1910   1,                                            /* Issue rate.  */
1911   ARM_PREFETCH_NOT_BENEFICIAL,
1912   tune_params::PREF_CONST_POOL_FALSE,
1913   tune_params::PREF_LDRD_FALSE,
1914   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1915   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1916   tune_params::DISPARAGE_FLAGS_NEITHER,
1917   tune_params::PREF_NEON_64_FALSE,
1918   tune_params::PREF_NEON_STRINGOPS_FALSE,
1919   tune_params::FUSE_NOTHING,
1920   tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923
1924 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1925 const struct tune_params arm_cortex_tune =
1926 {
1927   &generic_extra_costs,
1928   NULL,                                 /* Sched adj cost.  */
1929   arm_default_branch_cost,
1930   &arm_default_vec_cost,
1931   1,                                            /* Constant limit.  */
1932   5,                                            /* Max cond insns.  */
1933   8,                                            /* Memset max inline.  */
1934   2,                                            /* Issue rate.  */
1935   ARM_PREFETCH_NOT_BENEFICIAL,
1936   tune_params::PREF_CONST_POOL_FALSE,
1937   tune_params::PREF_LDRD_FALSE,
1938   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1939   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1940   tune_params::DISPARAGE_FLAGS_NEITHER,
1941   tune_params::PREF_NEON_64_FALSE,
1942   tune_params::PREF_NEON_STRINGOPS_FALSE,
1943   tune_params::FUSE_NOTHING,
1944   tune_params::SCHED_AUTOPREF_OFF
1945 };
1946
1947 const struct tune_params arm_cortex_a8_tune =
1948 {
1949   &cortexa8_extra_costs,
1950   NULL,                                 /* Sched adj cost.  */
1951   arm_default_branch_cost,
1952   &arm_default_vec_cost,
1953   1,                                            /* Constant limit.  */
1954   5,                                            /* Max cond insns.  */
1955   8,                                            /* Memset max inline.  */
1956   2,                                            /* Issue rate.  */
1957   ARM_PREFETCH_NOT_BENEFICIAL,
1958   tune_params::PREF_CONST_POOL_FALSE,
1959   tune_params::PREF_LDRD_FALSE,
1960   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1961   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1962   tune_params::DISPARAGE_FLAGS_NEITHER,
1963   tune_params::PREF_NEON_64_FALSE,
1964   tune_params::PREF_NEON_STRINGOPS_TRUE,
1965   tune_params::FUSE_NOTHING,
1966   tune_params::SCHED_AUTOPREF_OFF
1967 };
1968
1969 const struct tune_params arm_cortex_a7_tune =
1970 {
1971   &cortexa7_extra_costs,
1972   NULL,                                 /* Sched adj cost.  */
1973   arm_default_branch_cost,
1974   &arm_default_vec_cost,
1975   1,                                            /* Constant limit.  */
1976   5,                                            /* Max cond insns.  */
1977   8,                                            /* Memset max inline.  */
1978   2,                                            /* Issue rate.  */
1979   ARM_PREFETCH_NOT_BENEFICIAL,
1980   tune_params::PREF_CONST_POOL_FALSE,
1981   tune_params::PREF_LDRD_FALSE,
1982   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1983   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1984   tune_params::DISPARAGE_FLAGS_NEITHER,
1985   tune_params::PREF_NEON_64_FALSE,
1986   tune_params::PREF_NEON_STRINGOPS_TRUE,
1987   tune_params::FUSE_NOTHING,
1988   tune_params::SCHED_AUTOPREF_OFF
1989 };
1990
1991 const struct tune_params arm_cortex_a15_tune =
1992 {
1993   &cortexa15_extra_costs,
1994   NULL,                                 /* Sched adj cost.  */
1995   arm_default_branch_cost,
1996   &arm_default_vec_cost,
1997   1,                                            /* Constant limit.  */
1998   2,                                            /* Max cond insns.  */
1999   8,                                            /* Memset max inline.  */
2000   3,                                            /* Issue rate.  */
2001   ARM_PREFETCH_NOT_BENEFICIAL,
2002   tune_params::PREF_CONST_POOL_FALSE,
2003   tune_params::PREF_LDRD_TRUE,
2004   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2005   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2006   tune_params::DISPARAGE_FLAGS_ALL,
2007   tune_params::PREF_NEON_64_FALSE,
2008   tune_params::PREF_NEON_STRINGOPS_TRUE,
2009   tune_params::FUSE_NOTHING,
2010   tune_params::SCHED_AUTOPREF_FULL
2011 };
2012
2013 const struct tune_params arm_cortex_a35_tune =
2014 {
2015   &cortexa53_extra_costs,
2016   NULL,                                 /* Sched adj cost.  */
2017   arm_default_branch_cost,
2018   &arm_default_vec_cost,
2019   1,                                            /* Constant limit.  */
2020   5,                                            /* Max cond insns.  */
2021   8,                                            /* Memset max inline.  */
2022   1,                                            /* Issue rate.  */
2023   ARM_PREFETCH_NOT_BENEFICIAL,
2024   tune_params::PREF_CONST_POOL_FALSE,
2025   tune_params::PREF_LDRD_FALSE,
2026   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2027   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2028   tune_params::DISPARAGE_FLAGS_NEITHER,
2029   tune_params::PREF_NEON_64_FALSE,
2030   tune_params::PREF_NEON_STRINGOPS_TRUE,
2031   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032   tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a53_tune =
2036 {
2037   &cortexa53_extra_costs,
2038   NULL,                                 /* Sched adj cost.  */
2039   arm_default_branch_cost,
2040   &arm_default_vec_cost,
2041   1,                                            /* Constant limit.  */
2042   5,                                            /* Max cond insns.  */
2043   8,                                            /* Memset max inline.  */
2044   2,                                            /* Issue rate.  */
2045   ARM_PREFETCH_NOT_BENEFICIAL,
2046   tune_params::PREF_CONST_POOL_FALSE,
2047   tune_params::PREF_LDRD_FALSE,
2048   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2050   tune_params::DISPARAGE_FLAGS_NEITHER,
2051   tune_params::PREF_NEON_64_FALSE,
2052   tune_params::PREF_NEON_STRINGOPS_TRUE,
2053   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054   tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a57_tune =
2058 {
2059   &cortexa57_extra_costs,
2060   NULL,                                 /* Sched adj cost.  */
2061   arm_default_branch_cost,
2062   &arm_default_vec_cost,
2063   1,                                            /* Constant limit.  */
2064   2,                                            /* Max cond insns.  */
2065   8,                                            /* Memset max inline.  */
2066   3,                                            /* Issue rate.  */
2067   ARM_PREFETCH_NOT_BENEFICIAL,
2068   tune_params::PREF_CONST_POOL_FALSE,
2069   tune_params::PREF_LDRD_TRUE,
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2072   tune_params::DISPARAGE_FLAGS_ALL,
2073   tune_params::PREF_NEON_64_FALSE,
2074   tune_params::PREF_NEON_STRINGOPS_TRUE,
2075   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076   tune_params::SCHED_AUTOPREF_FULL
2077 };
2078
2079 const struct tune_params arm_exynosm1_tune =
2080 {
2081   &exynosm1_extra_costs,
2082   NULL,                                         /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   2,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   3,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_TRUE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_ALL,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   tune_params::FUSE_NOTHING,
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_xgene1_tune =
2102 {
2103   &xgene1_extra_costs,
2104   NULL,                                 /* Sched adj cost.  */
2105   arm_default_branch_cost,
2106   &arm_default_vec_cost,
2107   1,                                            /* Constant limit.  */
2108   2,                                            /* Max cond insns.  */
2109   32,                                           /* Memset max inline.  */
2110   4,                                            /* Issue rate.  */
2111   ARM_PREFETCH_NOT_BENEFICIAL,
2112   tune_params::PREF_CONST_POOL_FALSE,
2113   tune_params::PREF_LDRD_TRUE,
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2116   tune_params::DISPARAGE_FLAGS_ALL,
2117   tune_params::PREF_NEON_64_FALSE,
2118   tune_params::PREF_NEON_STRINGOPS_FALSE,
2119   tune_params::FUSE_NOTHING,
2120   tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124    less appealing.  Set max_insns_skipped to a low value.  */
2125
2126 const struct tune_params arm_cortex_a5_tune =
2127 {
2128   &cortexa5_extra_costs,
2129   NULL,                                 /* Sched adj cost.  */
2130   arm_cortex_a5_branch_cost,
2131   &arm_default_vec_cost,
2132   1,                                            /* Constant limit.  */
2133   1,                                            /* Max cond insns.  */
2134   8,                                            /* Memset max inline.  */
2135   2,                                            /* Issue rate.  */
2136   ARM_PREFETCH_NOT_BENEFICIAL,
2137   tune_params::PREF_CONST_POOL_FALSE,
2138   tune_params::PREF_LDRD_FALSE,
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2140   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2141   tune_params::DISPARAGE_FLAGS_NEITHER,
2142   tune_params::PREF_NEON_64_FALSE,
2143   tune_params::PREF_NEON_STRINGOPS_TRUE,
2144   tune_params::FUSE_NOTHING,
2145   tune_params::SCHED_AUTOPREF_OFF
2146 };
2147
2148 const struct tune_params arm_cortex_a9_tune =
2149 {
2150   &cortexa9_extra_costs,
2151   cortex_a9_sched_adjust_cost,
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,                                            /* Constant limit.  */
2155   5,                                            /* Max cond insns.  */
2156   8,                                            /* Memset max inline.  */
2157   2,                                            /* Issue rate.  */
2158   ARM_PREFETCH_BENEFICIAL(4,32,32),
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_FALSE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_NEITHER,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 const struct tune_params arm_cortex_a12_tune =
2171 {
2172   &cortexa12_extra_costs,
2173   NULL,                                 /* Sched adj cost.  */
2174   arm_default_branch_cost,
2175   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2176   1,                                            /* Constant limit.  */
2177   2,                                            /* Max cond insns.  */
2178   8,                                            /* Memset max inline.  */
2179   2,                                            /* Issue rate.  */
2180   ARM_PREFETCH_NOT_BENEFICIAL,
2181   tune_params::PREF_CONST_POOL_FALSE,
2182   tune_params::PREF_LDRD_TRUE,
2183   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2184   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2185   tune_params::DISPARAGE_FLAGS_ALL,
2186   tune_params::PREF_NEON_64_FALSE,
2187   tune_params::PREF_NEON_STRINGOPS_TRUE,
2188   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189   tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a73_tune =
2193 {
2194   &cortexa57_extra_costs,
2195   NULL,                                         /* Sched adj cost.  */
2196   arm_default_branch_cost,
2197   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2198   1,                                            /* Constant limit.  */
2199   2,                                            /* Max cond insns.  */
2200   8,                                            /* Memset max inline.  */
2201   2,                                            /* Issue rate.  */
2202   ARM_PREFETCH_NOT_BENEFICIAL,
2203   tune_params::PREF_CONST_POOL_FALSE,
2204   tune_params::PREF_LDRD_TRUE,
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2206   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2207   tune_params::DISPARAGE_FLAGS_ALL,
2208   tune_params::PREF_NEON_64_FALSE,
2209   tune_params::PREF_NEON_STRINGOPS_TRUE,
2210   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211   tune_params::SCHED_AUTOPREF_FULL
2212 };
2213
2214 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2215    cycle to execute each.  An LDR from the constant pool also takes two cycles
2216    to execute, but mildly increases pipelining opportunity (consecutive
2217    loads/stores can be pipelined together, saving one cycle), and may also
2218    improve icache utilisation.  Hence we prefer the constant pool for such
2219    processors.  */
2220
2221 const struct tune_params arm_v7m_tune =
2222 {
2223   &v7m_extra_costs,
2224   NULL,                                 /* Sched adj cost.  */
2225   arm_cortex_m_branch_cost,
2226   &arm_default_vec_cost,
2227   1,                                            /* Constant limit.  */
2228   2,                                            /* Max cond insns.  */
2229   8,                                            /* Memset max inline.  */
2230   1,                                            /* Issue rate.  */
2231   ARM_PREFETCH_NOT_BENEFICIAL,
2232   tune_params::PREF_CONST_POOL_TRUE,
2233   tune_params::PREF_LDRD_FALSE,
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2235   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2236   tune_params::DISPARAGE_FLAGS_NEITHER,
2237   tune_params::PREF_NEON_64_FALSE,
2238   tune_params::PREF_NEON_STRINGOPS_FALSE,
2239   tune_params::FUSE_NOTHING,
2240   tune_params::SCHED_AUTOPREF_OFF
2241 };
2242
2243 /* Cortex-M7 tuning.  */
2244
2245 const struct tune_params arm_cortex_m7_tune =
2246 {
2247   &v7m_extra_costs,
2248   NULL,                                 /* Sched adj cost.  */
2249   arm_cortex_m7_branch_cost,
2250   &arm_default_vec_cost,
2251   0,                                            /* Constant limit.  */
2252   1,                                            /* Max cond insns.  */
2253   8,                                            /* Memset max inline.  */
2254   2,                                            /* Issue rate.  */
2255   ARM_PREFETCH_NOT_BENEFICIAL,
2256   tune_params::PREF_CONST_POOL_TRUE,
2257   tune_params::PREF_LDRD_FALSE,
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2259   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2260   tune_params::DISPARAGE_FLAGS_NEITHER,
2261   tune_params::PREF_NEON_64_FALSE,
2262   tune_params::PREF_NEON_STRINGOPS_FALSE,
2263   tune_params::FUSE_NOTHING,
2264   tune_params::SCHED_AUTOPREF_OFF
2265 };
2266
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269    cortex-m23.  */
2270 const struct tune_params arm_v6m_tune =
2271 {
2272   &generic_extra_costs,                 /* Insn extra costs.  */
2273   NULL,                                 /* Sched adj cost.  */
2274   arm_default_branch_cost,
2275   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2276   1,                                            /* Constant limit.  */
2277   5,                                            /* Max cond insns.  */
2278   8,                                            /* Memset max inline.  */
2279   1,                                            /* Issue rate.  */
2280   ARM_PREFETCH_NOT_BENEFICIAL,
2281   tune_params::PREF_CONST_POOL_FALSE,
2282   tune_params::PREF_LDRD_FALSE,
2283   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2285   tune_params::DISPARAGE_FLAGS_NEITHER,
2286   tune_params::PREF_NEON_64_FALSE,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_fa726te_tune =
2293 {
2294   &generic_extra_costs,                         /* Insn extra costs.  */
2295   fa726te_sched_adjust_cost,
2296   arm_default_branch_cost,
2297   &arm_default_vec_cost,
2298   1,                                            /* Constant limit.  */
2299   5,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* Auto-generated CPU, FPU and architecture tables.  */
2315 #include "arm-cpu-data.h"
2316
2317 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2318    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319    is thus chosen to be big enough to hold the longest architecture name.  */
2320
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2322
2323 /* Supported TLS relocations.  */
2324
2325 enum tls_reloc {
2326   TLS_GD32,
2327   TLS_LDM32,
2328   TLS_LDO32,
2329   TLS_IE32,
2330   TLS_LE32,
2331   TLS_DESCSEQ   /* GNU scheme */
2332 };
2333
2334 /* The maximum number of insns to be used when loading a constant.  */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2337 {
2338   return size_p ? 1 : current_tune->constant_limit;
2339 }
2340
2341 /* Emit an insn that's a simple single-set.  Both the operands must be known
2342    to be valid.  */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2345 {
2346   return emit_insn (gen_rtx_SET (x, y));
2347 }
2348
2349 /* Return the number of bits set in VALUE.  */
2350 static unsigned
2351 bit_count (unsigned long value)
2352 {
2353   unsigned long count = 0;
2354
2355   while (value)
2356     {
2357       count++;
2358       value &= value - 1;  /* Clear the least-significant set bit.  */
2359     }
2360
2361   return count;
2362 }
2363
2364 /* Return the number of bits set in BMAP.  */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2367 {
2368   unsigned int count = 0;
2369   unsigned int n = 0;
2370   sbitmap_iterator sbi;
2371
2372   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373     count++;
2374   return count;
2375 }
2376
2377 typedef struct
2378 {
2379   machine_mode mode;
2380   const char *name;
2381 } arm_fixed_mode_set;
2382
2383 /* A small helper for setting fixed-point library libfuncs.  */
2384
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387                              const char *funcname, const char *modename,
2388                              int num_suffix)
2389 {
2390   char buffer[50];
2391
2392   if (num_suffix == 0)
2393     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394   else
2395     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2396
2397   set_optab_libfunc (optable, mode, buffer);
2398 }
2399
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402                             machine_mode from, const char *funcname,
2403                             const char *toname, const char *fromname)
2404 {
2405   char buffer[50];
2406   const char *maybe_suffix_2 = "";
2407
2408   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2409   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412     maybe_suffix_2 = "2";
2413
2414   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415            maybe_suffix_2);
2416
2417   set_conv_libfunc (optable, to, from, buffer);
2418 }
2419
2420 /* Set up library functions unique to ARM.  */
2421
2422 static void
2423 arm_init_libfuncs (void)
2424 {
2425   /* For Linux, we have access to kernel support for atomic operations.  */
2426   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2428
2429   /* There are no special library functions unless we are using the
2430      ARM BPABI.  */
2431   if (!TARGET_BPABI)
2432     return;
2433
2434   /* The functions below are described in Section 4 of the "Run-Time
2435      ABI for the ARM architecture", Version 1.0.  */
2436
2437   /* Double-precision floating-point arithmetic.  Table 2.  */
2438   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2443
2444   /* Double-precision comparisons.  Table 3.  */
2445   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446   set_optab_libfunc (ne_optab, DFmode, NULL);
2447   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2452
2453   /* Single-precision floating-point arithmetic.  Table 4.  */
2454   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2459
2460   /* Single-precision comparisons.  Table 5.  */
2461   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462   set_optab_libfunc (ne_optab, SFmode, NULL);
2463   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2468
2469   /* Floating-point to integer conversions.  Table 6.  */
2470   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2478
2479   /* Conversions between floating types.  Table 7.  */
2480   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2482
2483   /* Integer to floating-point conversions.  Table 8.  */
2484   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2492
2493   /* Long long.  Table 9.  */
2494   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2502
2503   /* Integer (32/32->32) division.  \S 4.3.1.  */
2504   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2506
2507   /* The divmod functions are designed so that they can be used for
2508      plain division, even though they return both the quotient and the
2509      remainder.  The quotient is returned in the usual location (i.e.,
2510      r0 for SImode, {r0, r1} for DImode), just as would be expected
2511      for an ordinary division routine.  Because the AAPCS calling
2512      conventions specify that all of { r0, r1, r2, r3 } are
2513      callee-saved registers, there is no need to tell the compiler
2514      explicitly that those registers are clobbered by these
2515      routines.  */
2516   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2518
2519   /* For SImode division the ABI provides div-without-mod routines,
2520      which are faster.  */
2521   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2523
2524   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2525      divmod libcalls instead.  */
2526   set_optab_libfunc (smod_optab, DImode, NULL);
2527   set_optab_libfunc (umod_optab, DImode, NULL);
2528   set_optab_libfunc (smod_optab, SImode, NULL);
2529   set_optab_libfunc (umod_optab, SImode, NULL);
2530
2531   /* Half-precision float operations.  The compiler handles all operations
2532      with NULL libfuncs by converting the SFmode.  */
2533   switch (arm_fp16_format)
2534     {
2535     case ARM_FP16_FORMAT_IEEE:
2536     case ARM_FP16_FORMAT_ALTERNATIVE:
2537
2538       /* Conversions.  */
2539       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541                          ? "__gnu_f2h_ieee"
2542                          : "__gnu_f2h_alternative"));
2543       set_conv_libfunc (sext_optab, SFmode, HFmode,
2544                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545                          ? "__gnu_h2f_ieee"
2546                          : "__gnu_h2f_alternative"));
2547
2548       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550                          ? "__gnu_d2h_ieee"
2551                          : "__gnu_d2h_alternative"));
2552
2553       /* Arithmetic.  */
2554       set_optab_libfunc (add_optab, HFmode, NULL);
2555       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556       set_optab_libfunc (smul_optab, HFmode, NULL);
2557       set_optab_libfunc (neg_optab, HFmode, NULL);
2558       set_optab_libfunc (sub_optab, HFmode, NULL);
2559
2560       /* Comparisons.  */
2561       set_optab_libfunc (eq_optab, HFmode, NULL);
2562       set_optab_libfunc (ne_optab, HFmode, NULL);
2563       set_optab_libfunc (lt_optab, HFmode, NULL);
2564       set_optab_libfunc (le_optab, HFmode, NULL);
2565       set_optab_libfunc (ge_optab, HFmode, NULL);
2566       set_optab_libfunc (gt_optab, HFmode, NULL);
2567       set_optab_libfunc (unord_optab, HFmode, NULL);
2568       break;
2569
2570     default:
2571       break;
2572     }
2573
2574   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2575   {
2576     const arm_fixed_mode_set fixed_arith_modes[] =
2577       {
2578         { E_QQmode, "qq" },
2579         { E_UQQmode, "uqq" },
2580         { E_HQmode, "hq" },
2581         { E_UHQmode, "uhq" },
2582         { E_SQmode, "sq" },
2583         { E_USQmode, "usq" },
2584         { E_DQmode, "dq" },
2585         { E_UDQmode, "udq" },
2586         { E_TQmode, "tq" },
2587         { E_UTQmode, "utq" },
2588         { E_HAmode, "ha" },
2589         { E_UHAmode, "uha" },
2590         { E_SAmode, "sa" },
2591         { E_USAmode, "usa" },
2592         { E_DAmode, "da" },
2593         { E_UDAmode, "uda" },
2594         { E_TAmode, "ta" },
2595         { E_UTAmode, "uta" }
2596       };
2597     const arm_fixed_mode_set fixed_conv_modes[] =
2598       {
2599         { E_QQmode, "qq" },
2600         { E_UQQmode, "uqq" },
2601         { E_HQmode, "hq" },
2602         { E_UHQmode, "uhq" },
2603         { E_SQmode, "sq" },
2604         { E_USQmode, "usq" },
2605         { E_DQmode, "dq" },
2606         { E_UDQmode, "udq" },
2607         { E_TQmode, "tq" },
2608         { E_UTQmode, "utq" },
2609         { E_HAmode, "ha" },
2610         { E_UHAmode, "uha" },
2611         { E_SAmode, "sa" },
2612         { E_USAmode, "usa" },
2613         { E_DAmode, "da" },
2614         { E_UDAmode, "uda" },
2615         { E_TAmode, "ta" },
2616         { E_UTAmode, "uta" },
2617         { E_QImode, "qi" },
2618         { E_HImode, "hi" },
2619         { E_SImode, "si" },
2620         { E_DImode, "di" },
2621         { E_TImode, "ti" },
2622         { E_SFmode, "sf" },
2623         { E_DFmode, "df" }
2624       };
2625     unsigned int i, j;
2626
2627     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2628       {
2629         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630                                      "add", fixed_arith_modes[i].name, 3);
2631         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632                                      "ssadd", fixed_arith_modes[i].name, 3);
2633         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634                                      "usadd", fixed_arith_modes[i].name, 3);
2635         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636                                      "sub", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638                                      "sssub", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640                                      "ussub", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642                                      "mul", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644                                      "ssmul", fixed_arith_modes[i].name, 3);
2645         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646                                      "usmul", fixed_arith_modes[i].name, 3);
2647         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648                                      "div", fixed_arith_modes[i].name, 3);
2649         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650                                      "udiv", fixed_arith_modes[i].name, 3);
2651         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652                                      "ssdiv", fixed_arith_modes[i].name, 3);
2653         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654                                      "usdiv", fixed_arith_modes[i].name, 3);
2655         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656                                      "neg", fixed_arith_modes[i].name, 2);
2657         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658                                      "ssneg", fixed_arith_modes[i].name, 2);
2659         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660                                      "usneg", fixed_arith_modes[i].name, 2);
2661         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662                                      "ashl", fixed_arith_modes[i].name, 3);
2663         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664                                      "ashr", fixed_arith_modes[i].name, 3);
2665         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666                                      "lshr", fixed_arith_modes[i].name, 3);
2667         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668                                      "ssashl", fixed_arith_modes[i].name, 3);
2669         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670                                      "usashl", fixed_arith_modes[i].name, 3);
2671         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672                                      "cmp", fixed_arith_modes[i].name, 2);
2673       }
2674
2675     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2677         {
2678           if (i == j
2679               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681             continue;
2682
2683           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684                                       fixed_conv_modes[j].mode, "fract",
2685                                       fixed_conv_modes[i].name,
2686                                       fixed_conv_modes[j].name);
2687           arm_set_fixed_conv_libfunc (satfract_optab,
2688                                       fixed_conv_modes[i].mode,
2689                                       fixed_conv_modes[j].mode, "satfract",
2690                                       fixed_conv_modes[i].name,
2691                                       fixed_conv_modes[j].name);
2692           arm_set_fixed_conv_libfunc (fractuns_optab,
2693                                       fixed_conv_modes[i].mode,
2694                                       fixed_conv_modes[j].mode, "fractuns",
2695                                       fixed_conv_modes[i].name,
2696                                       fixed_conv_modes[j].name);
2697           arm_set_fixed_conv_libfunc (satfractuns_optab,
2698                                       fixed_conv_modes[i].mode,
2699                                       fixed_conv_modes[j].mode, "satfractuns",
2700                                       fixed_conv_modes[i].name,
2701                                       fixed_conv_modes[j].name);
2702         }
2703   }
2704
2705   if (TARGET_AAPCS_BASED)
2706     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2707 }
2708
2709 /* On AAPCS systems, this is the "struct __va_list".  */
2710 static GTY(()) tree va_list_type;
2711
2712 /* Return the type to use as __builtin_va_list.  */
2713 static tree
2714 arm_build_builtin_va_list (void)
2715 {
2716   tree va_list_name;
2717   tree ap_field;
2718
2719   if (!TARGET_AAPCS_BASED)
2720     return std_build_builtin_va_list ();
2721
2722   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723      defined as:
2724
2725        struct __va_list
2726        {
2727          void *__ap;
2728        };
2729
2730      The C Library ABI further reinforces this definition in \S
2731      4.1.
2732
2733      We must follow this definition exactly.  The structure tag
2734      name is visible in C++ mangled names, and thus forms a part
2735      of the ABI.  The field name may be used by people who
2736      #include <stdarg.h>.  */
2737   /* Create the type.  */
2738   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739   /* Give it the required name.  */
2740   va_list_name = build_decl (BUILTINS_LOCATION,
2741                              TYPE_DECL,
2742                              get_identifier ("__va_list"),
2743                              va_list_type);
2744   DECL_ARTIFICIAL (va_list_name) = 1;
2745   TYPE_NAME (va_list_type) = va_list_name;
2746   TYPE_STUB_DECL (va_list_type) = va_list_name;
2747   /* Create the __ap field.  */
2748   ap_field = build_decl (BUILTINS_LOCATION,
2749                          FIELD_DECL,
2750                          get_identifier ("__ap"),
2751                          ptr_type_node);
2752   DECL_ARTIFICIAL (ap_field) = 1;
2753   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754   TYPE_FIELDS (va_list_type) = ap_field;
2755   /* Compute its layout.  */
2756   layout_type (va_list_type);
2757
2758   return va_list_type;
2759 }
2760
2761 /* Return an expression of type "void *" pointing to the next
2762    available argument in a variable-argument list.  VALIST is the
2763    user-level va_list object, of type __builtin_va_list.  */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2766 {
2767   if (TREE_TYPE (valist) == error_mark_node)
2768     return error_mark_node;
2769
2770   /* On an AAPCS target, the pointer is stored within "struct
2771      va_list".  */
2772   if (TARGET_AAPCS_BASED)
2773     {
2774       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776                        valist, ap_field, NULL_TREE);
2777     }
2778
2779   return valist;
2780 }
2781
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2785 {
2786   valist = arm_extract_valist_ptr (valist);
2787   std_expand_builtin_va_start (valist, nextarg);
2788 }
2789
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793                           gimple_seq *post_p)
2794 {
2795   valist = arm_extract_valist_ptr (valist);
2796   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2797 }
2798
2799 /* Check any incompatible options that the user has specified.  */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2802 {
2803   int flags = opts->x_target_flags;
2804
2805   /* iWMMXt and NEON are incompatible.  */
2806   if (TARGET_IWMMXT
2807       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808     error ("iWMMXt and NEON are incompatible");
2809
2810   /* Make sure that the processor choice does not conflict with any of the
2811      other command line choices.  */
2812   if (TARGET_ARM_P (flags)
2813       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814     error ("target CPU does not support ARM mode");
2815
2816   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2817   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2819
2820   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2822
2823   /* If this target is normally configured to use APCS frames, warn if they
2824      are turned off and debugging is turned on.  */
2825   if (TARGET_ARM_P (flags)
2826       && write_symbols != NO_DEBUG
2827       && !TARGET_APCS_FRAME
2828       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2830
2831   /* iWMMXt unsupported under Thumb mode.  */
2832   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833     error ("iWMMXt unsupported under Thumb mode");
2834
2835   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836     error ("can not use -mtp=cp15 with 16-bit Thumb");
2837
2838   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2839     {
2840       error ("RTP PIC is incompatible with Thumb");
2841       flag_pic = 0;
2842     }
2843
2844   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845      with MOVT.  */
2846   if ((target_pure_code || target_slow_flash_data)
2847       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2848     {
2849       const char *flag = (target_pure_code ? "-mpure-code" :
2850                                              "-mslow-flash-data");
2851       error ("%s only supports non-pic code on M-profile targets with the "
2852              "MOVT instruction", flag);
2853     }
2854
2855 }
2856
2857 /* Recompute the global settings depending on target attribute options.  */
2858
2859 static void
2860 arm_option_params_internal (void)
2861 {
2862   /* If we are not using the default (ARM mode) section anchor offset
2863      ranges, then set the correct ranges now.  */
2864   if (TARGET_THUMB1)
2865     {
2866       /* Thumb-1 LDR instructions cannot have negative offsets.
2867          Permissible positive offset ranges are 5-bit (for byte loads),
2868          6-bit (for halfword loads), or 7-bit (for word loads).
2869          Empirical results suggest a 7-bit anchor range gives the best
2870          overall code size.  */
2871       targetm.min_anchor_offset = 0;
2872       targetm.max_anchor_offset = 127;
2873     }
2874   else if (TARGET_THUMB2)
2875     {
2876       /* The minimum is set such that the total size of the block
2877          for a particular anchor is 248 + 1 + 4095 bytes, which is
2878          divisible by eight, ensuring natural spacing of anchors.  */
2879       targetm.min_anchor_offset = -248;
2880       targetm.max_anchor_offset = 4095;
2881     }
2882   else
2883     {
2884       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2886     }
2887
2888   /* Increase the number of conditional instructions with -Os.  */
2889   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2890
2891   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2892   if (TARGET_THUMB2)
2893     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897    mode, false if it should next add an attribute for the opposite mode.  */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override.  */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906   if (opts->x_align_functions <= 0)
2907     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908       && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change.  */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916   arm_configure_build_target (&arm_active_target,
2917                               TREE_TARGET_OPTION (target_option_default_node),
2918                               &global_options_set, false);
2919
2920   arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 /* Implement TARGET_OPTION_SAVE.  */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2926 {
2927   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2930 }
2931
2932 /* Implement TARGET_OPTION_RESTORE.  */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2935 {
2936   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940                               false);
2941 }
2942
2943 /* Reset options between modes that the user has specified.  */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946                               struct gcc_options *opts_set)
2947 {
2948   arm_override_options_after_change_1 (opts);
2949
2950   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2951     {
2952       /* The default is to enable interworking, so this warning message would
2953          be confusing to users who have just compiled with, eg, -march=armv3.  */
2954       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955       opts->x_target_flags &= ~MASK_INTERWORK;
2956     }
2957
2958   if (TARGET_THUMB_P (opts->x_target_flags)
2959       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2960     {
2961       warning (0, "target CPU does not support THUMB instructions");
2962       opts->x_target_flags &= ~MASK_THUMB;
2963     }
2964
2965   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2966     {
2967       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968       opts->x_target_flags &= ~MASK_APCS_FRAME;
2969     }
2970
2971   /* Callee super interworking implies thumb interworking.  Adding
2972      this to the flags here simplifies the logic elsewhere.  */
2973   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974     opts->x_target_flags |= MASK_INTERWORK;
2975
2976   /* need to remember initial values so combinaisons of options like
2977      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2978   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2979
2980   if (! opts_set->x_arm_restrict_it)
2981     opts->x_arm_restrict_it = arm_arch8;
2982
2983   /* ARM execution state and M profile don't have [restrict] IT.  */
2984   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985     opts->x_arm_restrict_it = 0;
2986
2987   /* Enable -munaligned-access by default for
2988      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989      i.e. Thumb2 and ARM state only.
2990      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991      - ARMv8 architecture-base processors.
2992
2993      Disable -munaligned-access by default for
2994      - all pre-ARMv6 architecture-based processors
2995      - ARMv6-M architecture-based processors
2996      - ARMv8-M Baseline processors.  */
2997
2998   if (! opts_set->x_unaligned_access)
2999     {
3000       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3002     }
3003   else if (opts->x_unaligned_access == 1
3004            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3005     {
3006       warning (0, "target CPU does not support unaligned accesses");
3007      opts->x_unaligned_access = 0;
3008     }
3009
3010   /* Don't warn since it's on by default in -O2.  */
3011   if (TARGET_THUMB1_P (opts->x_target_flags))
3012     opts->x_flag_schedule_insns = 0;
3013   else
3014     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3015
3016   /* Disable shrink-wrap when optimizing function for size, since it tends to
3017      generate additional returns.  */
3018   if (optimize_function_for_size_p (cfun)
3019       && TARGET_THUMB2_P (opts->x_target_flags))
3020     opts->x_flag_shrink_wrap = false;
3021   else
3022     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3023
3024   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025      - epilogue_insns - does not accurately model the corresponding insns
3026      emitted in the asm file.  In particular, see the comment in thumb_exit
3027      'Find out how many of the (return) argument registers we can corrupt'.
3028      As a consequence, the epilogue may clobber registers without fipa-ra
3029      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3030      TODO: Accurately model clobbers for epilogue_insns and reenable
3031      fipa-ra.  */
3032   if (TARGET_THUMB1_P (opts->x_target_flags))
3033     opts->x_flag_ipa_ra = 0;
3034   else
3035     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3036
3037   /* Thumb2 inline assembly code should always use unified syntax.
3038      This will apply to ARM and Thumb1 eventually.  */
3039   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3040
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3044 }
3045
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3048
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051    architecture have been specified, but the two are not identical.  */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054                             struct cl_target_option *opts,
3055                             struct gcc_options *opts_set,
3056                             bool warn_compatible)
3057 {
3058   const cpu_option *arm_selected_tune = NULL;
3059   const arch_option *arm_selected_arch = NULL;
3060   const cpu_option *arm_selected_cpu = NULL;
3061   const arm_fpu_desc *arm_selected_fpu = NULL;
3062   const char *tune_opts = NULL;
3063   const char *arch_opts = NULL;
3064   const char *cpu_opts = NULL;
3065
3066   bitmap_clear (target->isa);
3067   target->core_name = NULL;
3068   target->arch_name = NULL;
3069
3070   if (opts_set->x_arm_arch_string)
3071     {
3072       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073                                                       "-march",
3074                                                       opts->x_arm_arch_string);
3075       arch_opts = strchr (opts->x_arm_arch_string, '+');
3076     }
3077
3078   if (opts_set->x_arm_cpu_string)
3079     {
3080       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081                                                     opts->x_arm_cpu_string);
3082       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083       arm_selected_tune = arm_selected_cpu;
3084       /* If taking the tuning from -mcpu, we don't need to rescan the
3085          options for tuning.  */
3086     }
3087
3088   if (opts_set->x_arm_tune_string)
3089     {
3090       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091                                                      opts->x_arm_tune_string);
3092       tune_opts = strchr (opts->x_arm_tune_string, '+');
3093     }
3094
3095   if (arm_selected_arch)
3096     {
3097       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099                                  arch_opts);
3100
3101       if (arm_selected_cpu)
3102         {
3103           auto_sbitmap cpu_isa (isa_num_bits);
3104           auto_sbitmap isa_delta (isa_num_bits);
3105
3106           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108                                      cpu_opts);
3109           bitmap_xor (isa_delta, cpu_isa, target->isa);
3110           /* Ignore any bits that are quirk bits.  */
3111           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112           /* Ignore (for now) any bits that might be set by -mfpu.  */
3113           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3114
3115           if (!bitmap_empty_p (isa_delta))
3116             {
3117               if (warn_compatible)
3118                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119                          arm_selected_cpu->common.name,
3120                          arm_selected_arch->common.name);
3121               /* -march wins for code generation.
3122                  -mcpu wins for default tuning.  */
3123               if (!arm_selected_tune)
3124                 arm_selected_tune = arm_selected_cpu;
3125
3126               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127               target->arch_name = arm_selected_arch->common.name;
3128             }
3129           else
3130             {
3131               /* Architecture and CPU are essentially the same.
3132                  Prefer the CPU setting.  */
3133               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134               target->core_name = arm_selected_cpu->common.name;
3135               /* Copy the CPU's capabilities, so that we inherit the
3136                  appropriate extensions and quirks.  */
3137               bitmap_copy (target->isa, cpu_isa);
3138             }
3139         }
3140       else
3141         {
3142           /* Pick a CPU based on the architecture.  */
3143           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144           target->arch_name = arm_selected_arch->common.name;
3145           /* Note: target->core_name is left unset in this path.  */
3146         }
3147     }
3148   else if (arm_selected_cpu)
3149     {
3150       target->core_name = arm_selected_cpu->common.name;
3151       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153                                  cpu_opts);
3154       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3155     }
3156   /* If the user did not specify a processor or architecture, choose
3157      one for them.  */
3158   else
3159     {
3160       const cpu_option *sel;
3161       auto_sbitmap sought_isa (isa_num_bits);
3162       bitmap_clear (sought_isa);
3163       auto_sbitmap default_isa (isa_num_bits);
3164
3165       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166                                                     TARGET_CPU_DEFAULT);
3167       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168       gcc_assert (arm_selected_cpu->common.name);
3169
3170       /* RWE: All of the selection logic below (to the end of this
3171          'if' clause) looks somewhat suspect.  It appears to be mostly
3172          there to support forcing thumb support when the default CPU
3173          does not have thumb (somewhat dubious in terms of what the
3174          user might be expecting).  I think it should be removed once
3175          support for the pre-thumb era cores is removed.  */
3176       sel = arm_selected_cpu;
3177       arm_initialize_isa (default_isa, sel->common.isa_bits);
3178       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179                                  cpu_opts);
3180
3181       /* Now check to see if the user has specified any command line
3182          switches that require certain abilities from the cpu.  */
3183
3184       if (TARGET_INTERWORK || TARGET_THUMB)
3185         {
3186           bitmap_set_bit (sought_isa, isa_bit_thumb);
3187           bitmap_set_bit (sought_isa, isa_bit_mode32);
3188
3189           /* There are no ARM processors that support both APCS-26 and
3190              interworking.  Therefore we forcibly remove MODE26 from
3191              from the isa features here (if it was set), so that the
3192              search below will always be able to find a compatible
3193              processor.  */
3194           bitmap_clear_bit (default_isa, isa_bit_mode26);
3195         }
3196
3197       /* If there are such requirements and the default CPU does not
3198          satisfy them, we need to run over the complete list of
3199          cores looking for one that is satisfactory.  */
3200       if (!bitmap_empty_p (sought_isa)
3201           && !bitmap_subset_p (sought_isa, default_isa))
3202         {
3203           auto_sbitmap candidate_isa (isa_num_bits);
3204           /* We're only interested in a CPU with at least the
3205              capabilities of the default CPU and the required
3206              additional features.  */
3207           bitmap_ior (default_isa, default_isa, sought_isa);
3208
3209           /* Try to locate a CPU type that supports all of the abilities
3210              of the default CPU, plus the extra abilities requested by
3211              the user.  */
3212           for (sel = all_cores; sel->common.name != NULL; sel++)
3213             {
3214               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215               /* An exact match?  */
3216               if (bitmap_equal_p (default_isa, candidate_isa))
3217                 break;
3218             }
3219
3220           if (sel->common.name == NULL)
3221             {
3222               unsigned current_bit_count = isa_num_bits;
3223               const cpu_option *best_fit = NULL;
3224
3225               /* Ideally we would like to issue an error message here
3226                  saying that it was not possible to find a CPU compatible
3227                  with the default CPU, but which also supports the command
3228                  line options specified by the programmer, and so they
3229                  ought to use the -mcpu=<name> command line option to
3230                  override the default CPU type.
3231
3232                  If we cannot find a CPU that has exactly the
3233                  characteristics of the default CPU and the given
3234                  command line options we scan the array again looking
3235                  for a best match.  The best match must have at least
3236                  the capabilities of the perfect match.  */
3237               for (sel = all_cores; sel->common.name != NULL; sel++)
3238                 {
3239                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3240
3241                   if (bitmap_subset_p (default_isa, candidate_isa))
3242                     {
3243                       unsigned count;
3244
3245                       bitmap_and_compl (candidate_isa, candidate_isa,
3246                                         default_isa);
3247                       count = bitmap_popcount (candidate_isa);
3248
3249                       if (count < current_bit_count)
3250                         {
3251                           best_fit = sel;
3252                           current_bit_count = count;
3253                         }
3254                     }
3255
3256                   gcc_assert (best_fit);
3257                   sel = best_fit;
3258                 }
3259             }
3260           arm_selected_cpu = sel;
3261         }
3262
3263       /* Now we know the CPU, we can finally initialize the target
3264          structure.  */
3265       target->core_name = arm_selected_cpu->common.name;
3266       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268                                  cpu_opts);
3269       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3270     }
3271
3272   gcc_assert (arm_selected_cpu);
3273   gcc_assert (arm_selected_arch);
3274
3275   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3276     {
3277       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278       auto_sbitmap fpu_bits (isa_num_bits);
3279
3280       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282       bitmap_ior (target->isa, target->isa, fpu_bits);
3283     }
3284
3285   if (!arm_selected_tune)
3286     arm_selected_tune = arm_selected_cpu;
3287   else /* Validate the features passed to -mtune.  */
3288     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3289
3290   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3291
3292   /* Finish initializing the target structure.  */
3293   target->arch_pp_name = arm_selected_arch->arch;
3294   target->base_arch = arm_selected_arch->base_arch;
3295   target->profile = arm_selected_arch->profile;
3296
3297   target->tune_flags = tune_data->tune_flags;
3298   target->tune = tune_data->tune;
3299   target->tune_core = tune_data->scheduler;
3300 }
3301
3302 /* Fix up any incompatible options that the user has specified.  */
3303 static void
3304 arm_option_override (void)
3305 {
3306   static const enum isa_feature fpu_bitlist[]
3307     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309   cl_target_option opts;
3310
3311   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3313
3314   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3316
3317   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3318
3319   if (!global_options_set.x_arm_fpu_index)
3320     {
3321       bool ok;
3322       int fpu_index;
3323
3324       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325                                   CL_TARGET);
3326       gcc_assert (ok);
3327       arm_fpu_index = (enum fpu_type) fpu_index;
3328     }
3329
3330   cl_target_option_save (&opts, &global_options);
3331   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332                               true);
3333
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335   SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3337
3338   /* Initialize boolean versions of the architectural flags, for use
3339      in the arm.md file and for enabling feature flags.  */
3340   arm_option_reconfigure_globals ();
3341
3342   arm_tune = arm_active_target.tune_core;
3343   tune_flags = arm_active_target.tune_flags;
3344   current_tune = arm_active_target.tune;
3345
3346   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3347   if (TARGET_APCS_FRAME)
3348     flag_shrink_wrap = false;
3349
3350   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3351     {
3352       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3353       target_flags |= MASK_APCS_FRAME;
3354     }
3355
3356   if (TARGET_POKE_FUNCTION_NAME)
3357     target_flags |= MASK_APCS_FRAME;
3358
3359   if (TARGET_APCS_REENT && flag_pic)
3360     error ("-fpic and -mapcs-reent are incompatible");
3361
3362   if (TARGET_APCS_REENT)
3363     warning (0, "APCS reentrant code not supported.  Ignored");
3364
3365   /* Set up some tuning parameters.  */
3366   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3367   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3368   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3369   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3370   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3371   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3372
3373   /* For arm2/3 there is no need to do any scheduling if we are doing
3374      software floating-point.  */
3375   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3376     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3377
3378   /* Override the default structure alignment for AAPCS ABI.  */
3379   if (!global_options_set.x_arm_structure_size_boundary)
3380     {
3381       if (TARGET_AAPCS_BASED)
3382         arm_structure_size_boundary = 8;
3383     }
3384   else
3385     {
3386       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3387
3388       if (arm_structure_size_boundary != 8
3389           && arm_structure_size_boundary != 32
3390           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3391         {
3392           if (ARM_DOUBLEWORD_ALIGN)
3393             warning (0,
3394                      "structure size boundary can only be set to 8, 32 or 64");
3395           else
3396             warning (0, "structure size boundary can only be set to 8 or 32");
3397           arm_structure_size_boundary
3398             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3399         }
3400     }
3401
3402   if (TARGET_VXWORKS_RTP)
3403     {
3404       if (!global_options_set.x_arm_pic_data_is_text_relative)
3405         arm_pic_data_is_text_relative = 0;
3406     }
3407   else if (flag_pic
3408            && !arm_pic_data_is_text_relative
3409            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3410     /* When text & data segments don't have a fixed displacement, the
3411        intended use is with a single, read only, pic base register.
3412        Unless the user explicitly requested not to do that, set
3413        it.  */
3414     target_flags |= MASK_SINGLE_PIC_BASE;
3415
3416   /* If stack checking is disabled, we can use r10 as the PIC register,
3417      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3418   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3419     {
3420       if (TARGET_VXWORKS_RTP)
3421         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3422       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3423     }
3424
3425   if (flag_pic && TARGET_VXWORKS_RTP)
3426     arm_pic_register = 9;
3427
3428   if (arm_pic_register_string != NULL)
3429     {
3430       int pic_register = decode_reg_name (arm_pic_register_string);
3431
3432       if (!flag_pic)
3433         warning (0, "-mpic-register= is useless without -fpic");
3434
3435       /* Prevent the user from choosing an obviously stupid PIC register.  */
3436       else if (pic_register < 0 || call_used_regs[pic_register]
3437                || pic_register == HARD_FRAME_POINTER_REGNUM
3438                || pic_register == STACK_POINTER_REGNUM
3439                || pic_register >= PC_REGNUM
3440                || (TARGET_VXWORKS_RTP
3441                    && (unsigned int) pic_register != arm_pic_register))
3442         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3443       else
3444         arm_pic_register = pic_register;
3445     }
3446
3447   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3448   if (fix_cm3_ldrd == 2)
3449     {
3450       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3451         fix_cm3_ldrd = 1;
3452       else
3453         fix_cm3_ldrd = 0;
3454     }
3455
3456   /* Hot/Cold partitioning is not currently supported, since we can't
3457      handle literal pool placement in that case.  */
3458   if (flag_reorder_blocks_and_partition)
3459     {
3460       inform (input_location,
3461               "-freorder-blocks-and-partition not supported on this architecture");
3462       flag_reorder_blocks_and_partition = 0;
3463       flag_reorder_blocks = 1;
3464     }
3465
3466   if (flag_pic)
3467     /* Hoisting PIC address calculations more aggressively provides a small,
3468        but measurable, size reduction for PIC code.  Therefore, we decrease
3469        the bar for unrestricted expression hoisting to the cost of PIC address
3470        calculation, which is 2 instructions.  */
3471     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3472                            global_options.x_param_values,
3473                            global_options_set.x_param_values);
3474
3475   /* ARM EABI defaults to strict volatile bitfields.  */
3476   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3477       && abi_version_at_least(2))
3478     flag_strict_volatile_bitfields = 1;
3479
3480   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3481      have deemed it beneficial (signified by setting
3482      prefetch.num_slots to 1 or more).  */
3483   if (flag_prefetch_loop_arrays < 0
3484       && HAVE_prefetch
3485       && optimize >= 3
3486       && current_tune->prefetch.num_slots > 0)
3487     flag_prefetch_loop_arrays = 1;
3488
3489   /* Set up parameters to be used in prefetching algorithm.  Do not
3490      override the defaults unless we are tuning for a core we have
3491      researched values for.  */
3492   if (current_tune->prefetch.num_slots > 0)
3493     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3494                            current_tune->prefetch.num_slots,
3495                            global_options.x_param_values,
3496                            global_options_set.x_param_values);
3497   if (current_tune->prefetch.l1_cache_line_size >= 0)
3498     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3499                            current_tune->prefetch.l1_cache_line_size,
3500                            global_options.x_param_values,
3501                            global_options_set.x_param_values);
3502   if (current_tune->prefetch.l1_cache_size >= 0)
3503     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3504                            current_tune->prefetch.l1_cache_size,
3505                            global_options.x_param_values,
3506                            global_options_set.x_param_values);
3507
3508   /* Use Neon to perform 64-bits operations rather than core
3509      registers.  */
3510   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3511   if (use_neon_for_64bits == 1)
3512      prefer_neon_for_64bits = true;
3513
3514   /* Use the alternative scheduling-pressure algorithm by default.  */
3515   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3516                          global_options.x_param_values,
3517                          global_options_set.x_param_values);
3518
3519   /* Look through ready list and all of queue for instructions
3520      relevant for L2 auto-prefetcher.  */
3521   int param_sched_autopref_queue_depth;
3522
3523   switch (current_tune->sched_autopref)
3524     {
3525     case tune_params::SCHED_AUTOPREF_OFF:
3526       param_sched_autopref_queue_depth = -1;
3527       break;
3528
3529     case tune_params::SCHED_AUTOPREF_RANK:
3530       param_sched_autopref_queue_depth = 0;
3531       break;
3532
3533     case tune_params::SCHED_AUTOPREF_FULL:
3534       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3535       break;
3536
3537     default:
3538       gcc_unreachable ();
3539     }
3540
3541   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3542                          param_sched_autopref_queue_depth,
3543                          global_options.x_param_values,
3544                          global_options_set.x_param_values);
3545
3546   /* Currently, for slow flash data, we just disable literal pools.  We also
3547      disable it for pure-code.  */
3548   if (target_slow_flash_data || target_pure_code)
3549     arm_disable_literal_pool = true;
3550
3551   /* Disable scheduling fusion by default if it's not armv7 processor
3552      or doesn't prefer ldrd/strd.  */
3553   if (flag_schedule_fusion == 2
3554       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3555     flag_schedule_fusion = 0;
3556
3557   /* Need to remember initial options before they are overriden.  */
3558   init_optimize = build_optimization_node (&global_options);
3559
3560   arm_options_perform_arch_sanity_checks ();
3561   arm_option_override_internal (&global_options, &global_options_set);
3562   arm_option_check_internal (&global_options);
3563   arm_option_params_internal ();
3564
3565   /* Create the default target_options structure.  */
3566   target_option_default_node = target_option_current_node
3567     = build_target_option_node (&global_options);
3568
3569   /* Register global variables with the garbage collector.  */
3570   arm_add_gc_roots ();
3571
3572   /* Init initial mode for testing.  */
3573   thumb_flipper = TARGET_THUMB;
3574 }
3575
3576
3577 /* Reconfigure global status flags from the active_target.isa.  */
3578 void
3579 arm_option_reconfigure_globals (void)
3580 {
3581   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3582   arm_base_arch = arm_active_target.base_arch;
3583
3584   /* Initialize boolean versions of the architectural flags, for use
3585      in the arm.md file.  */
3586   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3587   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3588   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3589   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3590   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3591   arm_arch5te = arm_arch5e
3592     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3593   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3594   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3595   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3596   arm_arch6m = arm_arch6 && !arm_arch_notm;
3597   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3598   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3599   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3600   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3601   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3602   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3603   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3604   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3605   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3606   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3607   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3608   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3609   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3610   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3611   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3612   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3613   if (arm_fp16_inst)
3614     {
3615       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3616         error ("selected fp16 options are incompatible");
3617       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3618     }
3619
3620   /* And finally, set up some quirks.  */
3621   arm_arch_no_volatile_ce
3622     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3623   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3624                                             isa_bit_quirk_armv6kz);
3625
3626   /* Use the cp15 method if it is available.  */
3627   if (target_thread_pointer == TP_AUTO)
3628     {
3629       if (arm_arch6k && !TARGET_THUMB1)
3630         target_thread_pointer = TP_CP15;
3631       else
3632         target_thread_pointer = TP_SOFT;
3633     }
3634 }
3635
3636 /* Perform some validation between the desired architecture and the rest of the
3637    options.  */
3638 void
3639 arm_options_perform_arch_sanity_checks (void)
3640 {
3641   /* V5 code we generate is completely interworking capable, so we turn off
3642      TARGET_INTERWORK here to avoid many tests later on.  */
3643
3644   /* XXX However, we must pass the right pre-processor defines to CPP
3645      or GLD can get confused.  This is a hack.  */
3646   if (TARGET_INTERWORK)
3647     arm_cpp_interwork = 1;
3648
3649   if (arm_arch5)
3650     target_flags &= ~MASK_INTERWORK;
3651
3652   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3653     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3654
3655   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3656     error ("iwmmxt abi requires an iwmmxt capable cpu");
3657
3658   /* BPABI targets use linker tricks to allow interworking on cores
3659      without thumb support.  */
3660   if (TARGET_INTERWORK
3661       && !TARGET_BPABI
3662       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3663     {
3664       warning (0, "target CPU does not support interworking" );
3665       target_flags &= ~MASK_INTERWORK;
3666     }
3667
3668   /* If soft-float is specified then don't use FPU.  */
3669   if (TARGET_SOFT_FLOAT)
3670     arm_fpu_attr = FPU_NONE;
3671   else
3672     arm_fpu_attr = FPU_VFP;
3673
3674   if (TARGET_AAPCS_BASED)
3675     {
3676       if (TARGET_CALLER_INTERWORKING)
3677         error ("AAPCS does not support -mcaller-super-interworking");
3678       else
3679         if (TARGET_CALLEE_INTERWORKING)
3680           error ("AAPCS does not support -mcallee-super-interworking");
3681     }
3682
3683   /* __fp16 support currently assumes the core has ldrh.  */
3684   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3685     sorry ("__fp16 and no ldrh");
3686
3687   if (use_cmse && !arm_arch_cmse)
3688     error ("target CPU does not support ARMv8-M Security Extensions");
3689
3690   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3691      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3692   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3693     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3694
3695
3696   if (TARGET_AAPCS_BASED)
3697     {
3698       if (arm_abi == ARM_ABI_IWMMXT)
3699         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3700       else if (TARGET_HARD_FLOAT_ABI)
3701         {
3702           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3703           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3704             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3705         }
3706       else
3707         arm_pcs_default = ARM_PCS_AAPCS;
3708     }
3709   else
3710     {
3711       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3712         sorry ("-mfloat-abi=hard and VFP");
3713
3714       if (arm_abi == ARM_ABI_APCS)
3715         arm_pcs_default = ARM_PCS_APCS;
3716       else
3717         arm_pcs_default = ARM_PCS_ATPCS;
3718     }
3719 }
3720
3721 static void
3722 arm_add_gc_roots (void)
3723 {
3724   gcc_obstack_init(&minipool_obstack);
3725   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3726 }
3727 \f
3728 /* A table of known ARM exception types.
3729    For use with the interrupt function attribute.  */
3730
3731 typedef struct
3732 {
3733   const char *const arg;
3734   const unsigned long return_value;
3735 }
3736 isr_attribute_arg;
3737
3738 static const isr_attribute_arg isr_attribute_args [] =
3739 {
3740   { "IRQ",   ARM_FT_ISR },
3741   { "irq",   ARM_FT_ISR },
3742   { "FIQ",   ARM_FT_FIQ },
3743   { "fiq",   ARM_FT_FIQ },
3744   { "ABORT", ARM_FT_ISR },
3745   { "abort", ARM_FT_ISR },
3746   { "ABORT", ARM_FT_ISR },
3747   { "abort", ARM_FT_ISR },
3748   { "UNDEF", ARM_FT_EXCEPTION },
3749   { "undef", ARM_FT_EXCEPTION },
3750   { "SWI",   ARM_FT_EXCEPTION },
3751   { "swi",   ARM_FT_EXCEPTION },
3752   { NULL,    ARM_FT_NORMAL }
3753 };
3754
3755 /* Returns the (interrupt) function type of the current
3756    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3757
3758 static unsigned long
3759 arm_isr_value (tree argument)
3760 {
3761   const isr_attribute_arg * ptr;
3762   const char *              arg;
3763
3764   if (!arm_arch_notm)
3765     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3766
3767   /* No argument - default to IRQ.  */
3768   if (argument == NULL_TREE)
3769     return ARM_FT_ISR;
3770
3771   /* Get the value of the argument.  */
3772   if (TREE_VALUE (argument) == NULL_TREE
3773       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3774     return ARM_FT_UNKNOWN;
3775
3776   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3777
3778   /* Check it against the list of known arguments.  */
3779   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3780     if (streq (arg, ptr->arg))
3781       return ptr->return_value;
3782
3783   /* An unrecognized interrupt type.  */
3784   return ARM_FT_UNKNOWN;
3785 }
3786
3787 /* Computes the type of the current function.  */
3788
3789 static unsigned long
3790 arm_compute_func_type (void)
3791 {
3792   unsigned long type = ARM_FT_UNKNOWN;
3793   tree a;
3794   tree attr;
3795
3796   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3797
3798   /* Decide if the current function is volatile.  Such functions
3799      never return, and many memory cycles can be saved by not storing
3800      register values that will never be needed again.  This optimization
3801      was added to speed up context switching in a kernel application.  */
3802   if (optimize > 0
3803       && (TREE_NOTHROW (current_function_decl)
3804           || !(flag_unwind_tables
3805                || (flag_exceptions
3806                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3807       && TREE_THIS_VOLATILE (current_function_decl))
3808     type |= ARM_FT_VOLATILE;
3809
3810   if (cfun->static_chain_decl != NULL)
3811     type |= ARM_FT_NESTED;
3812
3813   attr = DECL_ATTRIBUTES (current_function_decl);
3814
3815   a = lookup_attribute ("naked", attr);
3816   if (a != NULL_TREE)
3817     type |= ARM_FT_NAKED;
3818
3819   a = lookup_attribute ("isr", attr);
3820   if (a == NULL_TREE)
3821     a = lookup_attribute ("interrupt", attr);
3822
3823   if (a == NULL_TREE)
3824     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3825   else
3826     type |= arm_isr_value (TREE_VALUE (a));
3827
3828   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3829     type |= ARM_FT_CMSE_ENTRY;
3830
3831   return type;
3832 }
3833
3834 /* Returns the type of the current function.  */
3835
3836 unsigned long
3837 arm_current_func_type (void)
3838 {
3839   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3840     cfun->machine->func_type = arm_compute_func_type ();
3841
3842   return cfun->machine->func_type;
3843 }
3844
3845 bool
3846 arm_allocate_stack_slots_for_args (void)
3847 {
3848   /* Naked functions should not allocate stack slots for arguments.  */
3849   return !IS_NAKED (arm_current_func_type ());
3850 }
3851
3852 static bool
3853 arm_warn_func_return (tree decl)
3854 {
3855   /* Naked functions are implemented entirely in assembly, including the
3856      return sequence, so suppress warnings about this.  */
3857   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3858 }
3859
3860 \f
3861 /* Output assembler code for a block containing the constant parts
3862    of a trampoline, leaving space for the variable parts.
3863
3864    On the ARM, (if r8 is the static chain regnum, and remembering that
3865    referencing pc adds an offset of 8) the trampoline looks like:
3866            ldr          r8, [pc, #0]
3867            ldr          pc, [pc]
3868            .word        static chain value
3869            .word        function's address
3870    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3871
3872 static void
3873 arm_asm_trampoline_template (FILE *f)
3874 {
3875   fprintf (f, "\t.syntax unified\n");
3876
3877   if (TARGET_ARM)
3878     {
3879       fprintf (f, "\t.arm\n");
3880       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3881       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3882     }
3883   else if (TARGET_THUMB2)
3884     {
3885       fprintf (f, "\t.thumb\n");
3886       /* The Thumb-2 trampoline is similar to the arm implementation.
3887          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3888       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3889                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3890       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3891     }
3892   else
3893     {
3894       ASM_OUTPUT_ALIGN (f, 2);
3895       fprintf (f, "\t.code\t16\n");
3896       fprintf (f, ".Ltrampoline_start:\n");
3897       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3898       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3899       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3900       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3901       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3902       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3903     }
3904   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3905   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3906 }
3907
3908 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3909
3910 static void
3911 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3912 {
3913   rtx fnaddr, mem, a_tramp;
3914
3915   emit_block_move (m_tramp, assemble_trampoline_template (),
3916                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3917
3918   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3919   emit_move_insn (mem, chain_value);
3920
3921   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3922   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3923   emit_move_insn (mem, fnaddr);
3924
3925   a_tramp = XEXP (m_tramp, 0);
3926   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3927                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3928                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3929 }
3930
3931 /* Thumb trampolines should be entered in thumb mode, so set
3932    the bottom bit of the address.  */
3933
3934 static rtx
3935 arm_trampoline_adjust_address (rtx addr)
3936 {
3937   if (TARGET_THUMB)
3938     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3939                                 NULL, 0, OPTAB_LIB_WIDEN);
3940   return addr;
3941 }
3942 \f
3943 /* Return 1 if it is possible to return using a single instruction.
3944    If SIBLING is non-null, this is a test for a return before a sibling
3945    call.  SIBLING is the call insn, so we can examine its register usage.  */
3946
3947 int
3948 use_return_insn (int iscond, rtx sibling)
3949 {
3950   int regno;
3951   unsigned int func_type;
3952   unsigned long saved_int_regs;
3953   unsigned HOST_WIDE_INT stack_adjust;
3954   arm_stack_offsets *offsets;
3955
3956   /* Never use a return instruction before reload has run.  */
3957   if (!reload_completed)
3958     return 0;
3959
3960   func_type = arm_current_func_type ();
3961
3962   /* Naked, volatile and stack alignment functions need special
3963      consideration.  */
3964   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3965     return 0;
3966
3967   /* So do interrupt functions that use the frame pointer and Thumb
3968      interrupt functions.  */
3969   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3970     return 0;
3971
3972   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3973       && !optimize_function_for_size_p (cfun))
3974     return 0;
3975
3976   offsets = arm_get_frame_offsets ();
3977   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3978
3979   /* As do variadic functions.  */
3980   if (crtl->args.pretend_args_size
3981       || cfun->machine->uses_anonymous_args
3982       /* Or if the function calls __builtin_eh_return () */
3983       || crtl->calls_eh_return
3984       /* Or if the function calls alloca */
3985       || cfun->calls_alloca
3986       /* Or if there is a stack adjustment.  However, if the stack pointer
3987          is saved on the stack, we can use a pre-incrementing stack load.  */
3988       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3989                                  && stack_adjust == 4))
3990       /* Or if the static chain register was saved above the frame, under the
3991          assumption that the stack pointer isn't saved on the stack.  */
3992       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3993           && arm_compute_static_chain_stack_bytes() != 0))
3994     return 0;
3995
3996   saved_int_regs = offsets->saved_regs_mask;
3997
3998   /* Unfortunately, the insn
3999
4000        ldmib sp, {..., sp, ...}
4001
4002      triggers a bug on most SA-110 based devices, such that the stack
4003      pointer won't be correctly restored if the instruction takes a
4004      page fault.  We work around this problem by popping r3 along with
4005      the other registers, since that is never slower than executing
4006      another instruction.
4007
4008      We test for !arm_arch5 here, because code for any architecture
4009      less than this could potentially be run on one of the buggy
4010      chips.  */
4011   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4012     {
4013       /* Validate that r3 is a call-clobbered register (always true in
4014          the default abi) ...  */
4015       if (!call_used_regs[3])
4016         return 0;
4017
4018       /* ... that it isn't being used for a return value ... */
4019       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4020         return 0;
4021
4022       /* ... or for a tail-call argument ...  */
4023       if (sibling)
4024         {
4025           gcc_assert (CALL_P (sibling));
4026
4027           if (find_regno_fusage (sibling, USE, 3))
4028             return 0;
4029         }
4030
4031       /* ... and that there are no call-saved registers in r0-r2
4032          (always true in the default ABI).  */
4033       if (saved_int_regs & 0x7)
4034         return 0;
4035     }
4036
4037   /* Can't be done if interworking with Thumb, and any registers have been
4038      stacked.  */
4039   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4040     return 0;
4041
4042   /* On StrongARM, conditional returns are expensive if they aren't
4043      taken and multiple registers have been stacked.  */
4044   if (iscond && arm_tune_strongarm)
4045     {
4046       /* Conditional return when just the LR is stored is a simple
4047          conditional-load instruction, that's not expensive.  */
4048       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4049         return 0;
4050
4051       if (flag_pic
4052           && arm_pic_register != INVALID_REGNUM
4053           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4054         return 0;
4055     }
4056
4057   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4058      several instructions if anything needs to be popped.  */
4059   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4060     return 0;
4061
4062   /* If there are saved registers but the LR isn't saved, then we need
4063      two instructions for the return.  */
4064   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4065     return 0;
4066
4067   /* Can't be done if any of the VFP regs are pushed,
4068      since this also requires an insn.  */
4069   if (TARGET_HARD_FLOAT)
4070     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4071       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4072         return 0;
4073
4074   if (TARGET_REALLY_IWMMXT)
4075     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4076       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4077         return 0;
4078
4079   return 1;
4080 }
4081
4082 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4083    shrink-wrapping if possible.  This is the case if we need to emit a
4084    prologue, which we can test by looking at the offsets.  */
4085 bool
4086 use_simple_return_p (void)
4087 {
4088   arm_stack_offsets *offsets;
4089
4090   /* Note this function can be called before or after reload.  */
4091   if (!reload_completed)
4092     arm_compute_frame_layout ();
4093
4094   offsets = arm_get_frame_offsets ();
4095   return offsets->outgoing_args != 0;
4096 }
4097
4098 /* Return TRUE if int I is a valid immediate ARM constant.  */
4099
4100 int
4101 const_ok_for_arm (HOST_WIDE_INT i)
4102 {
4103   int lowbit;
4104
4105   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4106      be all zero, or all one.  */
4107   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4108       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4109           != ((~(unsigned HOST_WIDE_INT) 0)
4110               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4111     return FALSE;
4112
4113   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4114
4115   /* Fast return for 0 and small values.  We must do this for zero, since
4116      the code below can't handle that one case.  */
4117   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4118     return TRUE;
4119
4120   /* Get the number of trailing zeros.  */
4121   lowbit = ffs((int) i) - 1;
4122
4123   /* Only even shifts are allowed in ARM mode so round down to the
4124      nearest even number.  */
4125   if (TARGET_ARM)
4126     lowbit &= ~1;
4127
4128   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4129     return TRUE;
4130
4131   if (TARGET_ARM)
4132     {
4133       /* Allow rotated constants in ARM mode.  */
4134       if (lowbit <= 4
4135            && ((i & ~0xc000003f) == 0
4136                || (i & ~0xf000000f) == 0
4137                || (i & ~0xfc000003) == 0))
4138         return TRUE;
4139     }
4140   else if (TARGET_THUMB2)
4141     {
4142       HOST_WIDE_INT v;
4143
4144       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4145       v = i & 0xff;
4146       v |= v << 16;
4147       if (i == v || i == (v | (v << 8)))
4148         return TRUE;
4149
4150       /* Allow repeated pattern 0xXY00XY00.  */
4151       v = i & 0xff00;
4152       v |= v << 16;
4153       if (i == v)
4154         return TRUE;
4155     }
4156   else if (TARGET_HAVE_MOVT)
4157     {
4158       /* Thumb-1 Targets with MOVT.  */
4159       if (i > 0xffff)
4160         return FALSE;
4161       else
4162         return TRUE;
4163     }
4164
4165   return FALSE;
4166 }
4167
4168 /* Return true if I is a valid constant for the operation CODE.  */
4169 int
4170 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4171 {
4172   if (const_ok_for_arm (i))
4173     return 1;
4174
4175   switch (code)
4176     {
4177     case SET:
4178       /* See if we can use movw.  */
4179       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4180         return 1;
4181       else
4182         /* Otherwise, try mvn.  */
4183         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4184
4185     case PLUS:
4186       /* See if we can use addw or subw.  */
4187       if (TARGET_THUMB2
4188           && ((i & 0xfffff000) == 0
4189               || ((-i) & 0xfffff000) == 0))
4190         return 1;
4191       /* Fall through.  */
4192     case COMPARE:
4193     case EQ:
4194     case NE:
4195     case GT:
4196     case LE:
4197     case LT:
4198     case GE:
4199     case GEU:
4200     case LTU:
4201     case GTU:
4202     case LEU:
4203     case UNORDERED:
4204     case ORDERED:
4205     case UNEQ:
4206     case UNGE:
4207     case UNLT:
4208     case UNGT:
4209     case UNLE:
4210       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4211
4212     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4213     case XOR:
4214       return 0;
4215
4216     case IOR:
4217       if (TARGET_THUMB2)
4218         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4219       return 0;
4220
4221     case AND:
4222       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4223
4224     default:
4225       gcc_unreachable ();
4226     }
4227 }
4228
4229 /* Return true if I is a valid di mode constant for the operation CODE.  */
4230 int
4231 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4232 {
4233   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4234   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4235   rtx hi = GEN_INT (hi_val);
4236   rtx lo = GEN_INT (lo_val);
4237
4238   if (TARGET_THUMB1)
4239     return 0;
4240
4241   switch (code)
4242     {
4243     case AND:
4244     case IOR:
4245     case XOR:
4246       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4247               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4248     case PLUS:
4249       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4250
4251     default:
4252       return 0;
4253     }
4254 }
4255
4256 /* Emit a sequence of insns to handle a large constant.
4257    CODE is the code of the operation required, it can be any of SET, PLUS,
4258    IOR, AND, XOR, MINUS;
4259    MODE is the mode in which the operation is being performed;
4260    VAL is the integer to operate on;
4261    SOURCE is the other operand (a register, or a null-pointer for SET);
4262    SUBTARGETS means it is safe to create scratch registers if that will
4263    either produce a simpler sequence, or we will want to cse the values.
4264    Return value is the number of insns emitted.  */
4265
4266 /* ??? Tweak this for thumb2.  */
4267 int
4268 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4269                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4270 {
4271   rtx cond;
4272
4273   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4274     cond = COND_EXEC_TEST (PATTERN (insn));
4275   else
4276     cond = NULL_RTX;
4277
4278   if (subtargets || code == SET
4279       || (REG_P (target) && REG_P (source)
4280           && REGNO (target) != REGNO (source)))
4281     {
4282       /* After arm_reorg has been called, we can't fix up expensive
4283          constants by pushing them into memory so we must synthesize
4284          them in-line, regardless of the cost.  This is only likely to
4285          be more costly on chips that have load delay slots and we are
4286          compiling without running the scheduler (so no splitting
4287          occurred before the final instruction emission).
4288
4289          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4290       */
4291       if (!cfun->machine->after_arm_reorg
4292           && !cond
4293           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4294                                 1, 0)
4295               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4296                  + (code != SET))))
4297         {
4298           if (code == SET)
4299             {
4300               /* Currently SET is the only monadic value for CODE, all
4301                  the rest are diadic.  */
4302               if (TARGET_USE_MOVT)
4303                 arm_emit_movpair (target, GEN_INT (val));
4304               else
4305                 emit_set_insn (target, GEN_INT (val));
4306
4307               return 1;
4308             }
4309           else
4310             {
4311               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4312
4313               if (TARGET_USE_MOVT)
4314                 arm_emit_movpair (temp, GEN_INT (val));
4315               else
4316                 emit_set_insn (temp, GEN_INT (val));
4317
4318               /* For MINUS, the value is subtracted from, since we never
4319                  have subtraction of a constant.  */
4320               if (code == MINUS)
4321                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4322               else
4323                 emit_set_insn (target,
4324                                gen_rtx_fmt_ee (code, mode, source, temp));
4325               return 2;
4326             }
4327         }
4328     }
4329
4330   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4331                            1);
4332 }
4333
4334 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4335    ARM/THUMB2 immediates, and add up to VAL.
4336    Thr function return value gives the number of insns required.  */
4337 static int
4338 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4339                             struct four_ints *return_sequence)
4340 {
4341   int best_consecutive_zeros = 0;
4342   int i;
4343   int best_start = 0;
4344   int insns1, insns2;
4345   struct four_ints tmp_sequence;
4346
4347   /* If we aren't targeting ARM, the best place to start is always at
4348      the bottom, otherwise look more closely.  */
4349   if (TARGET_ARM)
4350     {
4351       for (i = 0; i < 32; i += 2)
4352         {
4353           int consecutive_zeros = 0;
4354
4355           if (!(val & (3 << i)))
4356             {
4357               while ((i < 32) && !(val & (3 << i)))
4358                 {
4359                   consecutive_zeros += 2;
4360                   i += 2;
4361                 }
4362               if (consecutive_zeros > best_consecutive_zeros)
4363                 {
4364                   best_consecutive_zeros = consecutive_zeros;
4365                   best_start = i - consecutive_zeros;
4366                 }
4367               i -= 2;
4368             }
4369         }
4370     }
4371
4372   /* So long as it won't require any more insns to do so, it's
4373      desirable to emit a small constant (in bits 0...9) in the last
4374      insn.  This way there is more chance that it can be combined with
4375      a later addressing insn to form a pre-indexed load or store
4376      operation.  Consider:
4377
4378            *((volatile int *)0xe0000100) = 1;
4379            *((volatile int *)0xe0000110) = 2;
4380
4381      We want this to wind up as:
4382
4383             mov rA, #0xe0000000
4384             mov rB, #1
4385             str rB, [rA, #0x100]
4386             mov rB, #2
4387             str rB, [rA, #0x110]
4388
4389      rather than having to synthesize both large constants from scratch.
4390
4391      Therefore, we calculate how many insns would be required to emit
4392      the constant starting from `best_start', and also starting from
4393      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4394      yield a shorter sequence, we may as well use zero.  */
4395   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4396   if (best_start != 0
4397       && ((HOST_WIDE_INT_1U << best_start) < val))
4398     {
4399       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4400       if (insns2 <= insns1)
4401         {
4402           *return_sequence = tmp_sequence;
4403           insns1 = insns2;
4404         }
4405     }
4406
4407   return insns1;
4408 }
4409
4410 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4411 static int
4412 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4413                              struct four_ints *return_sequence, int i)
4414 {
4415   int remainder = val & 0xffffffff;
4416   int insns = 0;
4417
4418   /* Try and find a way of doing the job in either two or three
4419      instructions.
4420
4421      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4422      location.  We start at position I.  This may be the MSB, or
4423      optimial_immediate_sequence may have positioned it at the largest block
4424      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4425      wrapping around to the top of the word when we drop off the bottom.
4426      In the worst case this code should produce no more than four insns.
4427
4428      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4429      constants, shifted to any arbitrary location.  We should always start
4430      at the MSB.  */
4431   do
4432     {
4433       int end;
4434       unsigned int b1, b2, b3, b4;
4435       unsigned HOST_WIDE_INT result;
4436       int loc;
4437
4438       gcc_assert (insns < 4);
4439
4440       if (i <= 0)
4441         i += 32;
4442
4443       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4444       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4445         {
4446           loc = i;
4447           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4448             /* We can use addw/subw for the last 12 bits.  */
4449             result = remainder;
4450           else
4451             {
4452               /* Use an 8-bit shifted/rotated immediate.  */
4453               end = i - 8;
4454               if (end < 0)
4455                 end += 32;
4456               result = remainder & ((0x0ff << end)
4457                                    | ((i < end) ? (0xff >> (32 - end))
4458                                                 : 0));
4459               i -= 8;
4460             }
4461         }
4462       else
4463         {
4464           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4465              arbitrary shifts.  */
4466           i -= TARGET_ARM ? 2 : 1;
4467           continue;
4468         }
4469
4470       /* Next, see if we can do a better job with a thumb2 replicated
4471          constant.
4472
4473          We do it this way around to catch the cases like 0x01F001E0 where
4474          two 8-bit immediates would work, but a replicated constant would
4475          make it worse.
4476
4477          TODO: 16-bit constants that don't clear all the bits, but still win.
4478          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4479       if (TARGET_THUMB2)
4480         {
4481           b1 = (remainder & 0xff000000) >> 24;
4482           b2 = (remainder & 0x00ff0000) >> 16;
4483           b3 = (remainder & 0x0000ff00) >> 8;
4484           b4 = remainder & 0xff;
4485
4486           if (loc > 24)
4487             {
4488               /* The 8-bit immediate already found clears b1 (and maybe b2),
4489                  but must leave b3 and b4 alone.  */
4490
4491               /* First try to find a 32-bit replicated constant that clears
4492                  almost everything.  We can assume that we can't do it in one,
4493                  or else we wouldn't be here.  */
4494               unsigned int tmp = b1 & b2 & b3 & b4;
4495               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4496                                   + (tmp << 24);
4497               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4498                                             + (tmp == b3) + (tmp == b4);
4499               if (tmp
4500                   && (matching_bytes >= 3
4501                       || (matching_bytes == 2
4502                           && const_ok_for_op (remainder & ~tmp2, code))))
4503                 {
4504                   /* At least 3 of the bytes match, and the fourth has at
4505                      least as many bits set, or two of the bytes match
4506                      and it will only require one more insn to finish.  */
4507                   result = tmp2;
4508                   i = tmp != b1 ? 32
4509                       : tmp != b2 ? 24
4510                       : tmp != b3 ? 16
4511                       : 8;
4512                 }
4513
4514               /* Second, try to find a 16-bit replicated constant that can
4515                  leave three of the bytes clear.  If b2 or b4 is already
4516                  zero, then we can.  If the 8-bit from above would not
4517                  clear b2 anyway, then we still win.  */
4518               else if (b1 == b3 && (!b2 || !b4
4519                                || (remainder & 0x00ff0000 & ~result)))
4520                 {
4521                   result = remainder & 0xff00ff00;
4522                   i = 24;
4523                 }
4524             }
4525           else if (loc > 16)
4526             {
4527               /* The 8-bit immediate already found clears b2 (and maybe b3)
4528                  and we don't get here unless b1 is alredy clear, but it will
4529                  leave b4 unchanged.  */
4530
4531               /* If we can clear b2 and b4 at once, then we win, since the
4532                  8-bits couldn't possibly reach that far.  */
4533               if (b2 == b4)
4534                 {
4535                   result = remainder & 0x00ff00ff;
4536                   i = 16;
4537                 }
4538             }
4539         }
4540
4541       return_sequence->i[insns++] = result;
4542       remainder &= ~result;
4543
4544       if (code == SET || code == MINUS)
4545         code = PLUS;
4546     }
4547   while (remainder);
4548
4549   return insns;
4550 }
4551
4552 /* Emit an instruction with the indicated PATTERN.  If COND is
4553    non-NULL, conditionalize the execution of the instruction on COND
4554    being true.  */
4555
4556 static void
4557 emit_constant_insn (rtx cond, rtx pattern)
4558 {
4559   if (cond)
4560     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4561   emit_insn (pattern);
4562 }
4563
4564 /* As above, but extra parameter GENERATE which, if clear, suppresses
4565    RTL generation.  */
4566
4567 static int
4568 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4569                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4570                   int subtargets, int generate)
4571 {
4572   int can_invert = 0;
4573   int can_negate = 0;
4574   int final_invert = 0;
4575   int i;
4576   int set_sign_bit_copies = 0;
4577   int clear_sign_bit_copies = 0;
4578   int clear_zero_bit_copies = 0;
4579   int set_zero_bit_copies = 0;
4580   int insns = 0, neg_insns, inv_insns;
4581   unsigned HOST_WIDE_INT temp1, temp2;
4582   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4583   struct four_ints *immediates;
4584   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4585
4586   /* Find out which operations are safe for a given CODE.  Also do a quick
4587      check for degenerate cases; these can occur when DImode operations
4588      are split.  */
4589   switch (code)
4590     {
4591     case SET:
4592       can_invert = 1;
4593       break;
4594
4595     case PLUS:
4596       can_negate = 1;
4597       break;
4598
4599     case IOR:
4600       if (remainder == 0xffffffff)
4601         {
4602           if (generate)
4603             emit_constant_insn (cond,
4604                                 gen_rtx_SET (target,
4605                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4606           return 1;
4607         }
4608
4609       if (remainder == 0)
4610         {
4611           if (reload_completed && rtx_equal_p (target, source))
4612             return 0;
4613
4614           if (generate)
4615             emit_constant_insn (cond, gen_rtx_SET (target, source));
4616           return 1;
4617         }
4618       break;
4619
4620     case AND:
4621       if (remainder == 0)
4622         {
4623           if (generate)
4624             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4625           return 1;
4626         }
4627       if (remainder == 0xffffffff)
4628         {
4629           if (reload_completed && rtx_equal_p (target, source))
4630             return 0;
4631           if (generate)
4632             emit_constant_insn (cond, gen_rtx_SET (target, source));
4633           return 1;
4634         }
4635       can_invert = 1;
4636       break;
4637
4638     case XOR:
4639       if (remainder == 0)
4640         {
4641           if (reload_completed && rtx_equal_p (target, source))
4642             return 0;
4643           if (generate)
4644             emit_constant_insn (cond, gen_rtx_SET (target, source));
4645           return 1;
4646         }
4647
4648       if (remainder == 0xffffffff)
4649         {
4650           if (generate)
4651             emit_constant_insn (cond,
4652                                 gen_rtx_SET (target,
4653                                              gen_rtx_NOT (mode, source)));
4654           return 1;
4655         }
4656       final_invert = 1;
4657       break;
4658
4659     case MINUS:
4660       /* We treat MINUS as (val - source), since (source - val) is always
4661          passed as (source + (-val)).  */
4662       if (remainder == 0)
4663         {
4664           if (generate)
4665             emit_constant_insn (cond,
4666                                 gen_rtx_SET (target,
4667                                              gen_rtx_NEG (mode, source)));
4668           return 1;
4669         }
4670       if (const_ok_for_arm (val))
4671         {
4672           if (generate)
4673             emit_constant_insn (cond,
4674                                 gen_rtx_SET (target,
4675                                              gen_rtx_MINUS (mode, GEN_INT (val),
4676                                                             source)));
4677           return 1;
4678         }
4679
4680       break;
4681
4682     default:
4683       gcc_unreachable ();
4684     }
4685
4686   /* If we can do it in one insn get out quickly.  */
4687   if (const_ok_for_op (val, code))
4688     {
4689       if (generate)
4690         emit_constant_insn (cond,
4691                             gen_rtx_SET (target,
4692                                          (source
4693                                           ? gen_rtx_fmt_ee (code, mode, source,
4694                                                             GEN_INT (val))
4695                                           : GEN_INT (val))));
4696       return 1;
4697     }
4698
4699   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4700      insn.  */
4701   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4702       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4703     {
4704       if (generate)
4705         {
4706           if (mode == SImode && i == 16)
4707             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4708                smaller insn.  */
4709             emit_constant_insn (cond,
4710                                 gen_zero_extendhisi2
4711                                 (target, gen_lowpart (HImode, source)));
4712           else
4713             /* Extz only supports SImode, but we can coerce the operands
4714                into that mode.  */
4715             emit_constant_insn (cond,
4716                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4717                                               gen_lowpart (SImode, source),
4718                                               GEN_INT (i), const0_rtx));
4719         }
4720
4721       return 1;
4722     }
4723
4724   /* Calculate a few attributes that may be useful for specific
4725      optimizations.  */
4726   /* Count number of leading zeros.  */
4727   for (i = 31; i >= 0; i--)
4728     {
4729       if ((remainder & (1 << i)) == 0)
4730         clear_sign_bit_copies++;
4731       else
4732         break;
4733     }
4734
4735   /* Count number of leading 1's.  */
4736   for (i = 31; i >= 0; i--)
4737     {
4738       if ((remainder & (1 << i)) != 0)
4739         set_sign_bit_copies++;
4740       else
4741         break;
4742     }
4743
4744   /* Count number of trailing zero's.  */
4745   for (i = 0; i <= 31; i++)
4746     {
4747       if ((remainder & (1 << i)) == 0)
4748         clear_zero_bit_copies++;
4749       else
4750         break;
4751     }
4752
4753   /* Count number of trailing 1's.  */
4754   for (i = 0; i <= 31; i++)
4755     {
4756       if ((remainder & (1 << i)) != 0)
4757         set_zero_bit_copies++;
4758       else
4759         break;
4760     }
4761
4762   switch (code)
4763     {
4764     case SET:
4765       /* See if we can do this by sign_extending a constant that is known
4766          to be negative.  This is a good, way of doing it, since the shift
4767          may well merge into a subsequent insn.  */
4768       if (set_sign_bit_copies > 1)
4769         {
4770           if (const_ok_for_arm
4771               (temp1 = ARM_SIGN_EXTEND (remainder
4772                                         << (set_sign_bit_copies - 1))))
4773             {
4774               if (generate)
4775                 {
4776                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4777                   emit_constant_insn (cond,
4778                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4779                   emit_constant_insn (cond,
4780                                       gen_ashrsi3 (target, new_src,
4781                                                    GEN_INT (set_sign_bit_copies - 1)));
4782                 }
4783               return 2;
4784             }
4785           /* For an inverted constant, we will need to set the low bits,
4786              these will be shifted out of harm's way.  */
4787           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4788           if (const_ok_for_arm (~temp1))
4789             {
4790               if (generate)
4791                 {
4792                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4793                   emit_constant_insn (cond,
4794                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4795                   emit_constant_insn (cond,
4796                                       gen_ashrsi3 (target, new_src,
4797                                                    GEN_INT (set_sign_bit_copies - 1)));
4798                 }
4799               return 2;
4800             }
4801         }
4802
4803       /* See if we can calculate the value as the difference between two
4804          valid immediates.  */
4805       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4806         {
4807           int topshift = clear_sign_bit_copies & ~1;
4808
4809           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4810                                    & (0xff000000 >> topshift));
4811
4812           /* If temp1 is zero, then that means the 9 most significant
4813              bits of remainder were 1 and we've caused it to overflow.
4814              When topshift is 0 we don't need to do anything since we
4815              can borrow from 'bit 32'.  */
4816           if (temp1 == 0 && topshift != 0)
4817             temp1 = 0x80000000 >> (topshift - 1);
4818
4819           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4820
4821           if (const_ok_for_arm (temp2))
4822             {
4823               if (generate)
4824                 {
4825                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4826                   emit_constant_insn (cond,
4827                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4828                   emit_constant_insn (cond,
4829                                       gen_addsi3 (target, new_src,
4830                                                   GEN_INT (-temp2)));
4831                 }
4832
4833               return 2;
4834             }
4835         }
4836
4837       /* See if we can generate this by setting the bottom (or the top)
4838          16 bits, and then shifting these into the other half of the
4839          word.  We only look for the simplest cases, to do more would cost
4840          too much.  Be careful, however, not to generate this when the
4841          alternative would take fewer insns.  */
4842       if (val & 0xffff0000)
4843         {
4844           temp1 = remainder & 0xffff0000;
4845           temp2 = remainder & 0x0000ffff;
4846
4847           /* Overlaps outside this range are best done using other methods.  */
4848           for (i = 9; i < 24; i++)
4849             {
4850               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4851                   && !const_ok_for_arm (temp2))
4852                 {
4853                   rtx new_src = (subtargets
4854                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4855                                  : target);
4856                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4857                                             source, subtargets, generate);
4858                   source = new_src;
4859                   if (generate)
4860                     emit_constant_insn
4861                       (cond,
4862                        gen_rtx_SET
4863                        (target,
4864                         gen_rtx_IOR (mode,
4865                                      gen_rtx_ASHIFT (mode, source,
4866                                                      GEN_INT (i)),
4867                                      source)));
4868                   return insns + 1;
4869                 }
4870             }
4871
4872           /* Don't duplicate cases already considered.  */
4873           for (i = 17; i < 24; i++)
4874             {
4875               if (((temp1 | (temp1 >> i)) == remainder)
4876                   && !const_ok_for_arm (temp1))
4877                 {
4878                   rtx new_src = (subtargets
4879                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4880                                  : target);
4881                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4882                                             source, subtargets, generate);
4883                   source = new_src;
4884                   if (generate)
4885                     emit_constant_insn
4886                       (cond,
4887                        gen_rtx_SET (target,
4888                                     gen_rtx_IOR
4889                                     (mode,
4890                                      gen_rtx_LSHIFTRT (mode, source,
4891                                                        GEN_INT (i)),
4892                                      source)));
4893                   return insns + 1;
4894                 }
4895             }
4896         }
4897       break;
4898
4899     case IOR:
4900     case XOR:
4901       /* If we have IOR or XOR, and the constant can be loaded in a
4902          single instruction, and we can find a temporary to put it in,
4903          then this can be done in two instructions instead of 3-4.  */
4904       if (subtargets
4905           /* TARGET can't be NULL if SUBTARGETS is 0 */
4906           || (reload_completed && !reg_mentioned_p (target, source)))
4907         {
4908           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4909             {
4910               if (generate)
4911                 {
4912                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4913
4914                   emit_constant_insn (cond,
4915                                       gen_rtx_SET (sub, GEN_INT (val)));
4916                   emit_constant_insn (cond,
4917                                       gen_rtx_SET (target,
4918                                                    gen_rtx_fmt_ee (code, mode,
4919                                                                    source, sub)));
4920                 }
4921               return 2;
4922             }
4923         }
4924
4925       if (code == XOR)
4926         break;
4927
4928       /*  Convert.
4929           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4930                              and the remainder 0s for e.g. 0xfff00000)
4931           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4932
4933           This can be done in 2 instructions by using shifts with mov or mvn.
4934           e.g. for
4935           x = x | 0xfff00000;
4936           we generate.
4937           mvn   r0, r0, asl #12
4938           mvn   r0, r0, lsr #12  */
4939       if (set_sign_bit_copies > 8
4940           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4941         {
4942           if (generate)
4943             {
4944               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4945               rtx shift = GEN_INT (set_sign_bit_copies);
4946
4947               emit_constant_insn
4948                 (cond,
4949                  gen_rtx_SET (sub,
4950                               gen_rtx_NOT (mode,
4951                                            gen_rtx_ASHIFT (mode,
4952                                                            source,
4953                                                            shift))));
4954               emit_constant_insn
4955                 (cond,
4956                  gen_rtx_SET (target,
4957                               gen_rtx_NOT (mode,
4958                                            gen_rtx_LSHIFTRT (mode, sub,
4959                                                              shift))));
4960             }
4961           return 2;
4962         }
4963
4964       /* Convert
4965           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4966            to
4967           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4968
4969           For eg. r0 = r0 | 0xfff
4970                mvn      r0, r0, lsr #12
4971                mvn      r0, r0, asl #12
4972
4973       */
4974       if (set_zero_bit_copies > 8
4975           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4976         {
4977           if (generate)
4978             {
4979               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4980               rtx shift = GEN_INT (set_zero_bit_copies);
4981
4982               emit_constant_insn
4983                 (cond,
4984                  gen_rtx_SET (sub,
4985                               gen_rtx_NOT (mode,
4986                                            gen_rtx_LSHIFTRT (mode,
4987                                                              source,
4988                                                              shift))));
4989               emit_constant_insn
4990                 (cond,
4991                  gen_rtx_SET (target,
4992                               gen_rtx_NOT (mode,
4993                                            gen_rtx_ASHIFT (mode, sub,
4994                                                            shift))));
4995             }
4996           return 2;
4997         }
4998
4999       /* This will never be reached for Thumb2 because orn is a valid
5000          instruction. This is for Thumb1 and the ARM 32 bit cases.
5001
5002          x = y | constant (such that ~constant is a valid constant)
5003          Transform this to
5004          x = ~(~y & ~constant).
5005       */
5006       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5007         {
5008           if (generate)
5009             {
5010               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5011               emit_constant_insn (cond,
5012                                   gen_rtx_SET (sub,
5013                                                gen_rtx_NOT (mode, source)));
5014               source = sub;
5015               if (subtargets)
5016                 sub = gen_reg_rtx (mode);
5017               emit_constant_insn (cond,
5018                                   gen_rtx_SET (sub,
5019                                                gen_rtx_AND (mode, source,
5020                                                             GEN_INT (temp1))));
5021               emit_constant_insn (cond,
5022                                   gen_rtx_SET (target,
5023                                                gen_rtx_NOT (mode, sub)));
5024             }
5025           return 3;
5026         }
5027       break;
5028
5029     case AND:
5030       /* See if two shifts will do 2 or more insn's worth of work.  */
5031       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5032         {
5033           HOST_WIDE_INT shift_mask = ((0xffffffff
5034                                        << (32 - clear_sign_bit_copies))
5035                                       & 0xffffffff);
5036
5037           if ((remainder | shift_mask) != 0xffffffff)
5038             {
5039               HOST_WIDE_INT new_val
5040                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5041
5042               if (generate)
5043                 {
5044                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5045                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5046                                             new_src, source, subtargets, 1);
5047                   source = new_src;
5048                 }
5049               else
5050                 {
5051                   rtx targ = subtargets ? NULL_RTX : target;
5052                   insns = arm_gen_constant (AND, mode, cond, new_val,
5053                                             targ, source, subtargets, 0);
5054                 }
5055             }
5056
5057           if (generate)
5058             {
5059               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060               rtx shift = GEN_INT (clear_sign_bit_copies);
5061
5062               emit_insn (gen_ashlsi3 (new_src, source, shift));
5063               emit_insn (gen_lshrsi3 (target, new_src, shift));
5064             }
5065
5066           return insns + 2;
5067         }
5068
5069       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5070         {
5071           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5072
5073           if ((remainder | shift_mask) != 0xffffffff)
5074             {
5075               HOST_WIDE_INT new_val
5076                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5077               if (generate)
5078                 {
5079                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5080
5081                   insns = arm_gen_constant (AND, mode, cond, new_val,
5082                                             new_src, source, subtargets, 1);
5083                   source = new_src;
5084                 }
5085               else
5086                 {
5087                   rtx targ = subtargets ? NULL_RTX : target;
5088
5089                   insns = arm_gen_constant (AND, mode, cond, new_val,
5090                                             targ, source, subtargets, 0);
5091                 }
5092             }
5093
5094           if (generate)
5095             {
5096               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5097               rtx shift = GEN_INT (clear_zero_bit_copies);
5098
5099               emit_insn (gen_lshrsi3 (new_src, source, shift));
5100               emit_insn (gen_ashlsi3 (target, new_src, shift));
5101             }
5102
5103           return insns + 2;
5104         }
5105
5106       break;
5107
5108     default:
5109       break;
5110     }
5111
5112   /* Calculate what the instruction sequences would be if we generated it
5113      normally, negated, or inverted.  */
5114   if (code == AND)
5115     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5116     insns = 99;
5117   else
5118     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5119
5120   if (can_negate)
5121     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5122                                             &neg_immediates);
5123   else
5124     neg_insns = 99;
5125
5126   if (can_invert || final_invert)
5127     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5128                                             &inv_immediates);
5129   else
5130     inv_insns = 99;
5131
5132   immediates = &pos_immediates;
5133
5134   /* Is the negated immediate sequence more efficient?  */
5135   if (neg_insns < insns && neg_insns <= inv_insns)
5136     {
5137       insns = neg_insns;
5138       immediates = &neg_immediates;
5139     }
5140   else
5141     can_negate = 0;
5142
5143   /* Is the inverted immediate sequence more efficient?
5144      We must allow for an extra NOT instruction for XOR operations, although
5145      there is some chance that the final 'mvn' will get optimized later.  */
5146   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5147     {
5148       insns = inv_insns;
5149       immediates = &inv_immediates;
5150     }
5151   else
5152     {
5153       can_invert = 0;
5154       final_invert = 0;
5155     }
5156
5157   /* Now output the chosen sequence as instructions.  */
5158   if (generate)
5159     {
5160       for (i = 0; i < insns; i++)
5161         {
5162           rtx new_src, temp1_rtx;
5163
5164           temp1 = immediates->i[i];
5165
5166           if (code == SET || code == MINUS)
5167             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5168           else if ((final_invert || i < (insns - 1)) && subtargets)
5169             new_src = gen_reg_rtx (mode);
5170           else
5171             new_src = target;
5172
5173           if (can_invert)
5174             temp1 = ~temp1;
5175           else if (can_negate)
5176             temp1 = -temp1;
5177
5178           temp1 = trunc_int_for_mode (temp1, mode);
5179           temp1_rtx = GEN_INT (temp1);
5180
5181           if (code == SET)
5182             ;
5183           else if (code == MINUS)
5184             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5185           else
5186             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5187
5188           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5189           source = new_src;
5190
5191           if (code == SET)
5192             {
5193               can_negate = can_invert;
5194               can_invert = 0;
5195               code = PLUS;
5196             }
5197           else if (code == MINUS)
5198             code = PLUS;
5199         }
5200     }
5201
5202   if (final_invert)
5203     {
5204       if (generate)
5205         emit_constant_insn (cond, gen_rtx_SET (target,
5206                                                gen_rtx_NOT (mode, source)));
5207       insns++;
5208     }
5209
5210   return insns;
5211 }
5212
5213 /* Canonicalize a comparison so that we are more likely to recognize it.
5214    This can be done for a few constant compares, where we can make the
5215    immediate value easier to load.  */
5216
5217 static void
5218 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5219                              bool op0_preserve_value)
5220 {
5221   machine_mode mode;
5222   unsigned HOST_WIDE_INT i, maxval;
5223
5224   mode = GET_MODE (*op0);
5225   if (mode == VOIDmode)
5226     mode = GET_MODE (*op1);
5227
5228   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5229
5230   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5231      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5232      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5233      for GTU/LEU in Thumb mode.  */
5234   if (mode == DImode)
5235     {
5236
5237       if (*code == GT || *code == LE
5238           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5239         {
5240           /* Missing comparison.  First try to use an available
5241              comparison.  */
5242           if (CONST_INT_P (*op1))
5243             {
5244               i = INTVAL (*op1);
5245               switch (*code)
5246                 {
5247                 case GT:
5248                 case LE:
5249                   if (i != maxval
5250                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5251                     {
5252                       *op1 = GEN_INT (i + 1);
5253                       *code = *code == GT ? GE : LT;
5254                       return;
5255                     }
5256                   break;
5257                 case GTU:
5258                 case LEU:
5259                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5260                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5261                     {
5262                       *op1 = GEN_INT (i + 1);
5263                       *code = *code == GTU ? GEU : LTU;
5264                       return;
5265                     }
5266                   break;
5267                 default:
5268                   gcc_unreachable ();
5269                 }
5270             }
5271
5272           /* If that did not work, reverse the condition.  */
5273           if (!op0_preserve_value)
5274             {
5275               std::swap (*op0, *op1);
5276               *code = (int)swap_condition ((enum rtx_code)*code);
5277             }
5278         }
5279       return;
5280     }
5281
5282   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5283      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5284      to facilitate possible combining with a cmp into 'ands'.  */
5285   if (mode == SImode
5286       && GET_CODE (*op0) == ZERO_EXTEND
5287       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5288       && GET_MODE (XEXP (*op0, 0)) == QImode
5289       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5290       && subreg_lowpart_p (XEXP (*op0, 0))
5291       && *op1 == const0_rtx)
5292     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5293                         GEN_INT (255));
5294
5295   /* Comparisons smaller than DImode.  Only adjust comparisons against
5296      an out-of-range constant.  */
5297   if (!CONST_INT_P (*op1)
5298       || const_ok_for_arm (INTVAL (*op1))
5299       || const_ok_for_arm (- INTVAL (*op1)))
5300     return;
5301
5302   i = INTVAL (*op1);
5303
5304   switch (*code)
5305     {
5306     case EQ:
5307     case NE:
5308       return;
5309
5310     case GT:
5311     case LE:
5312       if (i != maxval
5313           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5314         {
5315           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5316           *code = *code == GT ? GE : LT;
5317           return;
5318         }
5319       break;
5320
5321     case GE:
5322     case LT:
5323       if (i != ~maxval
5324           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5325         {
5326           *op1 = GEN_INT (i - 1);
5327           *code = *code == GE ? GT : LE;
5328           return;
5329         }
5330       break;
5331
5332     case GTU:
5333     case LEU:
5334       if (i != ~((unsigned HOST_WIDE_INT) 0)
5335           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5336         {
5337           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5338           *code = *code == GTU ? GEU : LTU;
5339           return;
5340         }
5341       break;
5342
5343     case GEU:
5344     case LTU:
5345       if (i != 0
5346           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5347         {
5348           *op1 = GEN_INT (i - 1);
5349           *code = *code == GEU ? GTU : LEU;
5350           return;
5351         }
5352       break;
5353
5354     default:
5355       gcc_unreachable ();
5356     }
5357 }
5358
5359
5360 /* Define how to find the value returned by a function.  */
5361
5362 static rtx
5363 arm_function_value(const_tree type, const_tree func,
5364                    bool outgoing ATTRIBUTE_UNUSED)
5365 {
5366   machine_mode mode;
5367   int unsignedp ATTRIBUTE_UNUSED;
5368   rtx r ATTRIBUTE_UNUSED;
5369
5370   mode = TYPE_MODE (type);
5371
5372   if (TARGET_AAPCS_BASED)
5373     return aapcs_allocate_return_reg (mode, type, func);
5374
5375   /* Promote integer types.  */
5376   if (INTEGRAL_TYPE_P (type))
5377     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5378
5379   /* Promotes small structs returned in a register to full-word size
5380      for big-endian AAPCS.  */
5381   if (arm_return_in_msb (type))
5382     {
5383       HOST_WIDE_INT size = int_size_in_bytes (type);
5384       if (size % UNITS_PER_WORD != 0)
5385         {
5386           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5387           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5388         }
5389     }
5390
5391   return arm_libcall_value_1 (mode);
5392 }
5393
5394 /* libcall hashtable helpers.  */
5395
5396 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5397 {
5398   static inline hashval_t hash (const rtx_def *);
5399   static inline bool equal (const rtx_def *, const rtx_def *);
5400   static inline void remove (rtx_def *);
5401 };
5402
5403 inline bool
5404 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5405 {
5406   return rtx_equal_p (p1, p2);
5407 }
5408
5409 inline hashval_t
5410 libcall_hasher::hash (const rtx_def *p1)
5411 {
5412   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5413 }
5414
5415 typedef hash_table<libcall_hasher> libcall_table_type;
5416
5417 static void
5418 add_libcall (libcall_table_type *htab, rtx libcall)
5419 {
5420   *htab->find_slot (libcall, INSERT) = libcall;
5421 }
5422
5423 static bool
5424 arm_libcall_uses_aapcs_base (const_rtx libcall)
5425 {
5426   static bool init_done = false;
5427   static libcall_table_type *libcall_htab = NULL;
5428
5429   if (!init_done)
5430     {
5431       init_done = true;
5432
5433       libcall_htab = new libcall_table_type (31);
5434       add_libcall (libcall_htab,
5435                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5436       add_libcall (libcall_htab,
5437                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5438       add_libcall (libcall_htab,
5439                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5440       add_libcall (libcall_htab,
5441                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5442
5443       add_libcall (libcall_htab,
5444                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5445       add_libcall (libcall_htab,
5446                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5447       add_libcall (libcall_htab,
5448                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5449       add_libcall (libcall_htab,
5450                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5451
5452       add_libcall (libcall_htab,
5453                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5454       add_libcall (libcall_htab,
5455                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5456       add_libcall (libcall_htab,
5457                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5458       add_libcall (libcall_htab,
5459                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5460       add_libcall (libcall_htab,
5461                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5462       add_libcall (libcall_htab,
5463                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5464       add_libcall (libcall_htab,
5465                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5466       add_libcall (libcall_htab,
5467                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5468
5469       /* Values from double-precision helper functions are returned in core
5470          registers if the selected core only supports single-precision
5471          arithmetic, even if we are using the hard-float ABI.  The same is
5472          true for single-precision helpers, but we will never be using the
5473          hard-float ABI on a CPU which doesn't support single-precision
5474          operations in hardware.  */
5475       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5476       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5477       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5478       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5479       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5480       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5481       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5482       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5483       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5484       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5485       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5486       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5487                                                         SFmode));
5488       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5489                                                         DFmode));
5490       add_libcall (libcall_htab,
5491                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5492     }
5493
5494   return libcall && libcall_htab->find (libcall) != NULL;
5495 }
5496
5497 static rtx
5498 arm_libcall_value_1 (machine_mode mode)
5499 {
5500   if (TARGET_AAPCS_BASED)
5501     return aapcs_libcall_value (mode);
5502   else if (TARGET_IWMMXT_ABI
5503            && arm_vector_mode_supported_p (mode))
5504     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5505   else
5506     return gen_rtx_REG (mode, ARG_REGISTER (1));
5507 }
5508
5509 /* Define how to find the value returned by a library function
5510    assuming the value has mode MODE.  */
5511
5512 static rtx
5513 arm_libcall_value (machine_mode mode, const_rtx libcall)
5514 {
5515   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5516       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5517     {
5518       /* The following libcalls return their result in integer registers,
5519          even though they return a floating point value.  */
5520       if (arm_libcall_uses_aapcs_base (libcall))
5521         return gen_rtx_REG (mode, ARG_REGISTER(1));
5522
5523     }
5524
5525   return arm_libcall_value_1 (mode);
5526 }
5527
5528 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5529
5530 static bool
5531 arm_function_value_regno_p (const unsigned int regno)
5532 {
5533   if (regno == ARG_REGISTER (1)
5534       || (TARGET_32BIT
5535           && TARGET_AAPCS_BASED
5536           && TARGET_HARD_FLOAT
5537           && regno == FIRST_VFP_REGNUM)
5538       || (TARGET_IWMMXT_ABI
5539           && regno == FIRST_IWMMXT_REGNUM))
5540     return true;
5541
5542   return false;
5543 }
5544
5545 /* Determine the amount of memory needed to store the possible return
5546    registers of an untyped call.  */
5547 int
5548 arm_apply_result_size (void)
5549 {
5550   int size = 16;
5551
5552   if (TARGET_32BIT)
5553     {
5554       if (TARGET_HARD_FLOAT_ABI)
5555         size += 32;
5556       if (TARGET_IWMMXT_ABI)
5557         size += 8;
5558     }
5559
5560   return size;
5561 }
5562
5563 /* Decide whether TYPE should be returned in memory (true)
5564    or in a register (false).  FNTYPE is the type of the function making
5565    the call.  */
5566 static bool
5567 arm_return_in_memory (const_tree type, const_tree fntype)
5568 {
5569   HOST_WIDE_INT size;
5570
5571   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5572
5573   if (TARGET_AAPCS_BASED)
5574     {
5575       /* Simple, non-aggregate types (ie not including vectors and
5576          complex) are always returned in a register (or registers).
5577          We don't care about which register here, so we can short-cut
5578          some of the detail.  */
5579       if (!AGGREGATE_TYPE_P (type)
5580           && TREE_CODE (type) != VECTOR_TYPE
5581           && TREE_CODE (type) != COMPLEX_TYPE)
5582         return false;
5583
5584       /* Any return value that is no larger than one word can be
5585          returned in r0.  */
5586       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5587         return false;
5588
5589       /* Check any available co-processors to see if they accept the
5590          type as a register candidate (VFP, for example, can return
5591          some aggregates in consecutive registers).  These aren't
5592          available if the call is variadic.  */
5593       if (aapcs_select_return_coproc (type, fntype) >= 0)
5594         return false;
5595
5596       /* Vector values should be returned using ARM registers, not
5597          memory (unless they're over 16 bytes, which will break since
5598          we only have four call-clobbered registers to play with).  */
5599       if (TREE_CODE (type) == VECTOR_TYPE)
5600         return (size < 0 || size > (4 * UNITS_PER_WORD));
5601
5602       /* The rest go in memory.  */
5603       return true;
5604     }
5605
5606   if (TREE_CODE (type) == VECTOR_TYPE)
5607     return (size < 0 || size > (4 * UNITS_PER_WORD));
5608
5609   if (!AGGREGATE_TYPE_P (type) &&
5610       (TREE_CODE (type) != VECTOR_TYPE))
5611     /* All simple types are returned in registers.  */
5612     return false;
5613
5614   if (arm_abi != ARM_ABI_APCS)
5615     {
5616       /* ATPCS and later return aggregate types in memory only if they are
5617          larger than a word (or are variable size).  */
5618       return (size < 0 || size > UNITS_PER_WORD);
5619     }
5620
5621   /* For the arm-wince targets we choose to be compatible with Microsoft's
5622      ARM and Thumb compilers, which always return aggregates in memory.  */
5623 #ifndef ARM_WINCE
5624   /* All structures/unions bigger than one word are returned in memory.
5625      Also catch the case where int_size_in_bytes returns -1.  In this case
5626      the aggregate is either huge or of variable size, and in either case
5627      we will want to return it via memory and not in a register.  */
5628   if (size < 0 || size > UNITS_PER_WORD)
5629     return true;
5630
5631   if (TREE_CODE (type) == RECORD_TYPE)
5632     {
5633       tree field;
5634
5635       /* For a struct the APCS says that we only return in a register
5636          if the type is 'integer like' and every addressable element
5637          has an offset of zero.  For practical purposes this means
5638          that the structure can have at most one non bit-field element
5639          and that this element must be the first one in the structure.  */
5640
5641       /* Find the first field, ignoring non FIELD_DECL things which will
5642          have been created by C++.  */
5643       for (field = TYPE_FIELDS (type);
5644            field && TREE_CODE (field) != FIELD_DECL;
5645            field = DECL_CHAIN (field))
5646         continue;
5647
5648       if (field == NULL)
5649         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5650
5651       /* Check that the first field is valid for returning in a register.  */
5652
5653       /* ... Floats are not allowed */
5654       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5655         return true;
5656
5657       /* ... Aggregates that are not themselves valid for returning in
5658          a register are not allowed.  */
5659       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5660         return true;
5661
5662       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5663          since they are not addressable.  */
5664       for (field = DECL_CHAIN (field);
5665            field;
5666            field = DECL_CHAIN (field))
5667         {
5668           if (TREE_CODE (field) != FIELD_DECL)
5669             continue;
5670
5671           if (!DECL_BIT_FIELD_TYPE (field))
5672             return true;
5673         }
5674
5675       return false;
5676     }
5677
5678   if (TREE_CODE (type) == UNION_TYPE)
5679     {
5680       tree field;
5681
5682       /* Unions can be returned in registers if every element is
5683          integral, or can be returned in an integer register.  */
5684       for (field = TYPE_FIELDS (type);
5685            field;
5686            field = DECL_CHAIN (field))
5687         {
5688           if (TREE_CODE (field) != FIELD_DECL)
5689             continue;
5690
5691           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5692             return true;
5693
5694           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5695             return true;
5696         }
5697
5698       return false;
5699     }
5700 #endif /* not ARM_WINCE */
5701
5702   /* Return all other types in memory.  */
5703   return true;
5704 }
5705
5706 const struct pcs_attribute_arg
5707 {
5708   const char *arg;
5709   enum arm_pcs value;
5710 } pcs_attribute_args[] =
5711   {
5712     {"aapcs", ARM_PCS_AAPCS},
5713     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5714 #if 0
5715     /* We could recognize these, but changes would be needed elsewhere
5716      * to implement them.  */
5717     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5718     {"atpcs", ARM_PCS_ATPCS},
5719     {"apcs", ARM_PCS_APCS},
5720 #endif
5721     {NULL, ARM_PCS_UNKNOWN}
5722   };
5723
5724 static enum arm_pcs
5725 arm_pcs_from_attribute (tree attr)
5726 {
5727   const struct pcs_attribute_arg *ptr;
5728   const char *arg;
5729
5730   /* Get the value of the argument.  */
5731   if (TREE_VALUE (attr) == NULL_TREE
5732       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5733     return ARM_PCS_UNKNOWN;
5734
5735   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5736
5737   /* Check it against the list of known arguments.  */
5738   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5739     if (streq (arg, ptr->arg))
5740       return ptr->value;
5741
5742   /* An unrecognized interrupt type.  */
5743   return ARM_PCS_UNKNOWN;
5744 }
5745
5746 /* Get the PCS variant to use for this call.  TYPE is the function's type
5747    specification, DECL is the specific declartion.  DECL may be null if
5748    the call could be indirect or if this is a library call.  */
5749 static enum arm_pcs
5750 arm_get_pcs_model (const_tree type, const_tree decl)
5751 {
5752   bool user_convention = false;
5753   enum arm_pcs user_pcs = arm_pcs_default;
5754   tree attr;
5755
5756   gcc_assert (type);
5757
5758   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5759   if (attr)
5760     {
5761       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5762       user_convention = true;
5763     }
5764
5765   if (TARGET_AAPCS_BASED)
5766     {
5767       /* Detect varargs functions.  These always use the base rules
5768          (no argument is ever a candidate for a co-processor
5769          register).  */
5770       bool base_rules = stdarg_p (type);
5771
5772       if (user_convention)
5773         {
5774           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5775             sorry ("non-AAPCS derived PCS variant");
5776           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5777             error ("variadic functions must use the base AAPCS variant");
5778         }
5779
5780       if (base_rules)
5781         return ARM_PCS_AAPCS;
5782       else if (user_convention)
5783         return user_pcs;
5784       else if (decl && flag_unit_at_a_time)
5785         {
5786           /* Local functions never leak outside this compilation unit,
5787              so we are free to use whatever conventions are
5788              appropriate.  */
5789           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5790           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5791           if (i && i->local)
5792             return ARM_PCS_AAPCS_LOCAL;
5793         }
5794     }
5795   else if (user_convention && user_pcs != arm_pcs_default)
5796     sorry ("PCS variant");
5797
5798   /* For everything else we use the target's default.  */
5799   return arm_pcs_default;
5800 }
5801
5802
5803 static void
5804 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5805                     const_tree fntype ATTRIBUTE_UNUSED,
5806                     rtx libcall ATTRIBUTE_UNUSED,
5807                     const_tree fndecl ATTRIBUTE_UNUSED)
5808 {
5809   /* Record the unallocated VFP registers.  */
5810   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5811   pcum->aapcs_vfp_reg_alloc = 0;
5812 }
5813
5814 /* Walk down the type tree of TYPE counting consecutive base elements.
5815    If *MODEP is VOIDmode, then set it to the first valid floating point
5816    type.  If a non-floating point type is found, or if a floating point
5817    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5818    otherwise return the count in the sub-tree.  */
5819 static int
5820 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5821 {
5822   machine_mode mode;
5823   HOST_WIDE_INT size;
5824
5825   switch (TREE_CODE (type))
5826     {
5827     case REAL_TYPE:
5828       mode = TYPE_MODE (type);
5829       if (mode != DFmode && mode != SFmode && mode != HFmode)
5830         return -1;
5831
5832       if (*modep == VOIDmode)
5833         *modep = mode;
5834
5835       if (*modep == mode)
5836         return 1;
5837
5838       break;
5839
5840     case COMPLEX_TYPE:
5841       mode = TYPE_MODE (TREE_TYPE (type));
5842       if (mode != DFmode && mode != SFmode)
5843         return -1;
5844
5845       if (*modep == VOIDmode)
5846         *modep = mode;
5847
5848       if (*modep == mode)
5849         return 2;
5850
5851       break;
5852
5853     case VECTOR_TYPE:
5854       /* Use V2SImode and V4SImode as representatives of all 64-bit
5855          and 128-bit vector types, whether or not those modes are
5856          supported with the present options.  */
5857       size = int_size_in_bytes (type);
5858       switch (size)
5859         {
5860         case 8:
5861           mode = V2SImode;
5862           break;
5863         case 16:
5864           mode = V4SImode;
5865           break;
5866         default:
5867           return -1;
5868         }
5869
5870       if (*modep == VOIDmode)
5871         *modep = mode;
5872
5873       /* Vector modes are considered to be opaque: two vectors are
5874          equivalent for the purposes of being homogeneous aggregates
5875          if they are the same size.  */
5876       if (*modep == mode)
5877         return 1;
5878
5879       break;
5880
5881     case ARRAY_TYPE:
5882       {
5883         int count;
5884         tree index = TYPE_DOMAIN (type);
5885
5886         /* Can't handle incomplete types nor sizes that are not
5887            fixed.  */
5888         if (!COMPLETE_TYPE_P (type)
5889             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5890           return -1;
5891
5892         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5893         if (count == -1
5894             || !index
5895             || !TYPE_MAX_VALUE (index)
5896             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5897             || !TYPE_MIN_VALUE (index)
5898             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5899             || count < 0)
5900           return -1;
5901
5902         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5903                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5904
5905         /* There must be no padding.  */
5906         if (wi::to_wide (TYPE_SIZE (type))
5907             != count * GET_MODE_BITSIZE (*modep))
5908           return -1;
5909
5910         return count;
5911       }
5912
5913     case RECORD_TYPE:
5914       {
5915         int count = 0;
5916         int sub_count;
5917         tree field;
5918
5919         /* Can't handle incomplete types nor sizes that are not
5920            fixed.  */
5921         if (!COMPLETE_TYPE_P (type)
5922             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5923           return -1;
5924
5925         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5926           {
5927             if (TREE_CODE (field) != FIELD_DECL)
5928               continue;
5929
5930             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5931             if (sub_count < 0)
5932               return -1;
5933             count += sub_count;
5934           }
5935
5936         /* There must be no padding.  */
5937         if (wi::to_wide (TYPE_SIZE (type))
5938             != count * GET_MODE_BITSIZE (*modep))
5939           return -1;
5940
5941         return count;
5942       }
5943
5944     case UNION_TYPE:
5945     case QUAL_UNION_TYPE:
5946       {
5947         /* These aren't very interesting except in a degenerate case.  */
5948         int count = 0;
5949         int sub_count;
5950         tree field;
5951
5952         /* Can't handle incomplete types nor sizes that are not
5953            fixed.  */
5954         if (!COMPLETE_TYPE_P (type)
5955             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5956           return -1;
5957
5958         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5959           {
5960             if (TREE_CODE (field) != FIELD_DECL)
5961               continue;
5962
5963             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5964             if (sub_count < 0)
5965               return -1;
5966             count = count > sub_count ? count : sub_count;
5967           }
5968
5969         /* There must be no padding.  */
5970         if (wi::to_wide (TYPE_SIZE (type))
5971             != count * GET_MODE_BITSIZE (*modep))
5972           return -1;
5973
5974         return count;
5975       }
5976
5977     default:
5978       break;
5979     }
5980
5981   return -1;
5982 }
5983
5984 /* Return true if PCS_VARIANT should use VFP registers.  */
5985 static bool
5986 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5987 {
5988   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5989     {
5990       static bool seen_thumb1_vfp = false;
5991
5992       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5993         {
5994           sorry ("Thumb-1 hard-float VFP ABI");
5995           /* sorry() is not immediately fatal, so only display this once.  */
5996           seen_thumb1_vfp = true;
5997         }
5998
5999       return true;
6000     }
6001
6002   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6003     return false;
6004
6005   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6006           (TARGET_VFP_DOUBLE || !is_double));
6007 }
6008
6009 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6010    suitable for passing or returning in VFP registers for the PCS
6011    variant selected.  If it is, then *BASE_MODE is updated to contain
6012    a machine mode describing each element of the argument's type and
6013    *COUNT to hold the number of such elements.  */
6014 static bool
6015 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6016                                        machine_mode mode, const_tree type,
6017                                        machine_mode *base_mode, int *count)
6018 {
6019   machine_mode new_mode = VOIDmode;
6020
6021   /* If we have the type information, prefer that to working things
6022      out from the mode.  */
6023   if (type)
6024     {
6025       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6026
6027       if (ag_count > 0 && ag_count <= 4)
6028         *count = ag_count;
6029       else
6030         return false;
6031     }
6032   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6033            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6034            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6035     {
6036       *count = 1;
6037       new_mode = mode;
6038     }
6039   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6040     {
6041       *count = 2;
6042       new_mode = (mode == DCmode ? DFmode : SFmode);
6043     }
6044   else
6045     return false;
6046
6047
6048   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6049     return false;
6050
6051   *base_mode = new_mode;
6052   return true;
6053 }
6054
6055 static bool
6056 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6057                                machine_mode mode, const_tree type)
6058 {
6059   int count ATTRIBUTE_UNUSED;
6060   machine_mode ag_mode ATTRIBUTE_UNUSED;
6061
6062   if (!use_vfp_abi (pcs_variant, false))
6063     return false;
6064   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6065                                                 &ag_mode, &count);
6066 }
6067
6068 static bool
6069 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6070                              const_tree type)
6071 {
6072   if (!use_vfp_abi (pcum->pcs_variant, false))
6073     return false;
6074
6075   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6076                                                 &pcum->aapcs_vfp_rmode,
6077                                                 &pcum->aapcs_vfp_rcount);
6078 }
6079
6080 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6081    for the behaviour of this function.  */
6082
6083 static bool
6084 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6085                     const_tree type  ATTRIBUTE_UNUSED)
6086 {
6087   int rmode_size
6088     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6089   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6090   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6091   int regno;
6092
6093   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6094     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6095       {
6096         pcum->aapcs_vfp_reg_alloc = mask << regno;
6097         if (mode == BLKmode
6098             || (mode == TImode && ! TARGET_NEON)
6099             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6100           {
6101             int i;
6102             int rcount = pcum->aapcs_vfp_rcount;
6103             int rshift = shift;
6104             machine_mode rmode = pcum->aapcs_vfp_rmode;
6105             rtx par;
6106             if (!TARGET_NEON)
6107               {
6108                 /* Avoid using unsupported vector modes.  */
6109                 if (rmode == V2SImode)
6110                   rmode = DImode;
6111                 else if (rmode == V4SImode)
6112                   {
6113                     rmode = DImode;
6114                     rcount *= 2;
6115                     rshift /= 2;
6116                   }
6117               }
6118             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6119             for (i = 0; i < rcount; i++)
6120               {
6121                 rtx tmp = gen_rtx_REG (rmode,
6122                                        FIRST_VFP_REGNUM + regno + i * rshift);
6123                 tmp = gen_rtx_EXPR_LIST
6124                   (VOIDmode, tmp,
6125                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6126                 XVECEXP (par, 0, i) = tmp;
6127               }
6128
6129             pcum->aapcs_reg = par;
6130           }
6131         else
6132           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6133         return true;
6134       }
6135   return false;
6136 }
6137
6138 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6139    comment there for the behaviour of this function.  */
6140
6141 static rtx
6142 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6143                                machine_mode mode,
6144                                const_tree type ATTRIBUTE_UNUSED)
6145 {
6146   if (!use_vfp_abi (pcs_variant, false))
6147     return NULL;
6148
6149   if (mode == BLKmode
6150       || (GET_MODE_CLASS (mode) == MODE_INT
6151           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6152           && !TARGET_NEON))
6153     {
6154       int count;
6155       machine_mode ag_mode;
6156       int i;
6157       rtx par;
6158       int shift;
6159
6160       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6161                                              &ag_mode, &count);
6162
6163       if (!TARGET_NEON)
6164         {
6165           if (ag_mode == V2SImode)
6166             ag_mode = DImode;
6167           else if (ag_mode == V4SImode)
6168             {
6169               ag_mode = DImode;
6170               count *= 2;
6171             }
6172         }
6173       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6174       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6175       for (i = 0; i < count; i++)
6176         {
6177           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6178           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6179                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6180           XVECEXP (par, 0, i) = tmp;
6181         }
6182
6183       return par;
6184     }
6185
6186   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6187 }
6188
6189 static void
6190 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6191                    machine_mode mode  ATTRIBUTE_UNUSED,
6192                    const_tree type  ATTRIBUTE_UNUSED)
6193 {
6194   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6195   pcum->aapcs_vfp_reg_alloc = 0;
6196   return;
6197 }
6198
6199 #define AAPCS_CP(X)                             \
6200   {                                             \
6201     aapcs_ ## X ## _cum_init,                   \
6202     aapcs_ ## X ## _is_call_candidate,          \
6203     aapcs_ ## X ## _allocate,                   \
6204     aapcs_ ## X ## _is_return_candidate,        \
6205     aapcs_ ## X ## _allocate_return_reg,        \
6206     aapcs_ ## X ## _advance                     \
6207   }
6208
6209 /* Table of co-processors that can be used to pass arguments in
6210    registers.  Idealy no arugment should be a candidate for more than
6211    one co-processor table entry, but the table is processed in order
6212    and stops after the first match.  If that entry then fails to put
6213    the argument into a co-processor register, the argument will go on
6214    the stack.  */
6215 static struct
6216 {
6217   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6218   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6219
6220   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6221      BLKmode) is a candidate for this co-processor's registers; this
6222      function should ignore any position-dependent state in
6223      CUMULATIVE_ARGS and only use call-type dependent information.  */
6224   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6225
6226   /* Return true if the argument does get a co-processor register; it
6227      should set aapcs_reg to an RTX of the register allocated as is
6228      required for a return from FUNCTION_ARG.  */
6229   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6230
6231   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6232      be returned in this co-processor's registers.  */
6233   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6234
6235   /* Allocate and return an RTX element to hold the return type of a call.  This
6236      routine must not fail and will only be called if is_return_candidate
6237      returned true with the same parameters.  */
6238   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6239
6240   /* Finish processing this argument and prepare to start processing
6241      the next one.  */
6242   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6243 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6244   {
6245     AAPCS_CP(vfp)
6246   };
6247
6248 #undef AAPCS_CP
6249
6250 static int
6251 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6252                           const_tree type)
6253 {
6254   int i;
6255
6256   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6257     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6258       return i;
6259
6260   return -1;
6261 }
6262
6263 static int
6264 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6265 {
6266   /* We aren't passed a decl, so we can't check that a call is local.
6267      However, it isn't clear that that would be a win anyway, since it
6268      might limit some tail-calling opportunities.  */
6269   enum arm_pcs pcs_variant;
6270
6271   if (fntype)
6272     {
6273       const_tree fndecl = NULL_TREE;
6274
6275       if (TREE_CODE (fntype) == FUNCTION_DECL)
6276         {
6277           fndecl = fntype;
6278           fntype = TREE_TYPE (fntype);
6279         }
6280
6281       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6282     }
6283   else
6284     pcs_variant = arm_pcs_default;
6285
6286   if (pcs_variant != ARM_PCS_AAPCS)
6287     {
6288       int i;
6289
6290       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6291         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6292                                                         TYPE_MODE (type),
6293                                                         type))
6294           return i;
6295     }
6296   return -1;
6297 }
6298
6299 static rtx
6300 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6301                            const_tree fntype)
6302 {
6303   /* We aren't passed a decl, so we can't check that a call is local.
6304      However, it isn't clear that that would be a win anyway, since it
6305      might limit some tail-calling opportunities.  */
6306   enum arm_pcs pcs_variant;
6307   int unsignedp ATTRIBUTE_UNUSED;
6308
6309   if (fntype)
6310     {
6311       const_tree fndecl = NULL_TREE;
6312
6313       if (TREE_CODE (fntype) == FUNCTION_DECL)
6314         {
6315           fndecl = fntype;
6316           fntype = TREE_TYPE (fntype);
6317         }
6318
6319       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6320     }
6321   else
6322     pcs_variant = arm_pcs_default;
6323
6324   /* Promote integer types.  */
6325   if (type && INTEGRAL_TYPE_P (type))
6326     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6327
6328   if (pcs_variant != ARM_PCS_AAPCS)
6329     {
6330       int i;
6331
6332       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6333         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6334                                                         type))
6335           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6336                                                              mode, type);
6337     }
6338
6339   /* Promotes small structs returned in a register to full-word size
6340      for big-endian AAPCS.  */
6341   if (type && arm_return_in_msb (type))
6342     {
6343       HOST_WIDE_INT size = int_size_in_bytes (type);
6344       if (size % UNITS_PER_WORD != 0)
6345         {
6346           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6347           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6348         }
6349     }
6350
6351   return gen_rtx_REG (mode, R0_REGNUM);
6352 }
6353
6354 static rtx
6355 aapcs_libcall_value (machine_mode mode)
6356 {
6357   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6358       && GET_MODE_SIZE (mode) <= 4)
6359     mode = SImode;
6360
6361   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6362 }
6363
6364 /* Lay out a function argument using the AAPCS rules.  The rule
6365    numbers referred to here are those in the AAPCS.  */
6366 static void
6367 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6368                   const_tree type, bool named)
6369 {
6370   int nregs, nregs2;
6371   int ncrn;
6372
6373   /* We only need to do this once per argument.  */
6374   if (pcum->aapcs_arg_processed)
6375     return;
6376
6377   pcum->aapcs_arg_processed = true;
6378
6379   /* Special case: if named is false then we are handling an incoming
6380      anonymous argument which is on the stack.  */
6381   if (!named)
6382     return;
6383
6384   /* Is this a potential co-processor register candidate?  */
6385   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6386     {
6387       int slot = aapcs_select_call_coproc (pcum, mode, type);
6388       pcum->aapcs_cprc_slot = slot;
6389
6390       /* We don't have to apply any of the rules from part B of the
6391          preparation phase, these are handled elsewhere in the
6392          compiler.  */
6393
6394       if (slot >= 0)
6395         {
6396           /* A Co-processor register candidate goes either in its own
6397              class of registers or on the stack.  */
6398           if (!pcum->aapcs_cprc_failed[slot])
6399             {
6400               /* C1.cp - Try to allocate the argument to co-processor
6401                  registers.  */
6402               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6403                 return;
6404
6405               /* C2.cp - Put the argument on the stack and note that we
6406                  can't assign any more candidates in this slot.  We also
6407                  need to note that we have allocated stack space, so that
6408                  we won't later try to split a non-cprc candidate between
6409                  core registers and the stack.  */
6410               pcum->aapcs_cprc_failed[slot] = true;
6411               pcum->can_split = false;
6412             }
6413
6414           /* We didn't get a register, so this argument goes on the
6415              stack.  */
6416           gcc_assert (pcum->can_split == false);
6417           return;
6418         }
6419     }
6420
6421   /* C3 - For double-word aligned arguments, round the NCRN up to the
6422      next even number.  */
6423   ncrn = pcum->aapcs_ncrn;
6424   if (ncrn & 1)
6425     {
6426       int res = arm_needs_doubleword_align (mode, type);
6427       /* Only warn during RTL expansion of call stmts, otherwise we would
6428          warn e.g. during gimplification even on functions that will be
6429          always inlined, and we'd warn multiple times.  Don't warn when
6430          called in expand_function_start either, as we warn instead in
6431          arm_function_arg_boundary in that case.  */
6432       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6433         inform (input_location, "parameter passing for argument of type "
6434                 "%qT changed in GCC 7.1", type);
6435       else if (res > 0)
6436         ncrn++;
6437     }
6438
6439   nregs = ARM_NUM_REGS2(mode, type);
6440
6441   /* Sigh, this test should really assert that nregs > 0, but a GCC
6442      extension allows empty structs and then gives them empty size; it
6443      then allows such a structure to be passed by value.  For some of
6444      the code below we have to pretend that such an argument has
6445      non-zero size so that we 'locate' it correctly either in
6446      registers or on the stack.  */
6447   gcc_assert (nregs >= 0);
6448
6449   nregs2 = nregs ? nregs : 1;
6450
6451   /* C4 - Argument fits entirely in core registers.  */
6452   if (ncrn + nregs2 <= NUM_ARG_REGS)
6453     {
6454       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6455       pcum->aapcs_next_ncrn = ncrn + nregs;
6456       return;
6457     }
6458
6459   /* C5 - Some core registers left and there are no arguments already
6460      on the stack: split this argument between the remaining core
6461      registers and the stack.  */
6462   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6463     {
6464       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6465       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6466       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6467       return;
6468     }
6469
6470   /* C6 - NCRN is set to 4.  */
6471   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6472
6473   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6474   return;
6475 }
6476
6477 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6478    for a call to a function whose data type is FNTYPE.
6479    For a library call, FNTYPE is NULL.  */
6480 void
6481 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6482                           rtx libname,
6483                           tree fndecl ATTRIBUTE_UNUSED)
6484 {
6485   /* Long call handling.  */
6486   if (fntype)
6487     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6488   else
6489     pcum->pcs_variant = arm_pcs_default;
6490
6491   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6492     {
6493       if (arm_libcall_uses_aapcs_base (libname))
6494         pcum->pcs_variant = ARM_PCS_AAPCS;
6495
6496       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6497       pcum->aapcs_reg = NULL_RTX;
6498       pcum->aapcs_partial = 0;
6499       pcum->aapcs_arg_processed = false;
6500       pcum->aapcs_cprc_slot = -1;
6501       pcum->can_split = true;
6502
6503       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6504         {
6505           int i;
6506
6507           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6508             {
6509               pcum->aapcs_cprc_failed[i] = false;
6510               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6511             }
6512         }
6513       return;
6514     }
6515
6516   /* Legacy ABIs */
6517
6518   /* On the ARM, the offset starts at 0.  */
6519   pcum->nregs = 0;
6520   pcum->iwmmxt_nregs = 0;
6521   pcum->can_split = true;
6522
6523   /* Varargs vectors are treated the same as long long.
6524      named_count avoids having to change the way arm handles 'named' */
6525   pcum->named_count = 0;
6526   pcum->nargs = 0;
6527
6528   if (TARGET_REALLY_IWMMXT && fntype)
6529     {
6530       tree fn_arg;
6531
6532       for (fn_arg = TYPE_ARG_TYPES (fntype);
6533            fn_arg;
6534            fn_arg = TREE_CHAIN (fn_arg))
6535         pcum->named_count += 1;
6536
6537       if (! pcum->named_count)
6538         pcum->named_count = INT_MAX;
6539     }
6540 }
6541
6542 /* Return 1 if double word alignment is required for argument passing.
6543    Return -1 if double word alignment used to be required for argument
6544    passing before PR77728 ABI fix, but is not required anymore.
6545    Return 0 if double word alignment is not required and wasn't requried
6546    before either.  */
6547 static int
6548 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6549 {
6550   if (!type)
6551     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6552
6553   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6554   if (!AGGREGATE_TYPE_P (type))
6555     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6556
6557   /* Array types: Use member alignment of element type.  */
6558   if (TREE_CODE (type) == ARRAY_TYPE)
6559     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6560
6561   int ret = 0;
6562   /* Record/aggregate types: Use greatest member alignment of any member.  */
6563   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6564     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6565       {
6566         if (TREE_CODE (field) == FIELD_DECL)
6567           return 1;
6568         else
6569           /* Before PR77728 fix, we were incorrectly considering also
6570              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6571              Make sure we can warn about that with -Wpsabi.  */
6572           ret = -1;
6573       }
6574
6575   return ret;
6576 }
6577
6578
6579 /* Determine where to put an argument to a function.
6580    Value is zero to push the argument on the stack,
6581    or a hard register in which to store the argument.
6582
6583    MODE is the argument's machine mode.
6584    TYPE is the data type of the argument (as a tree).
6585     This is null for libcalls where that information may
6586     not be available.
6587    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6588     the preceding args and about the function being called.
6589    NAMED is nonzero if this argument is a named parameter
6590     (otherwise it is an extra parameter matching an ellipsis).
6591
6592    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6593    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6594    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6595    defined), say it is passed in the stack (function_prologue will
6596    indeed make it pass in the stack if necessary).  */
6597
6598 static rtx
6599 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6600                   const_tree type, bool named)
6601 {
6602   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6603   int nregs;
6604
6605   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6606      a call insn (op3 of a call_value insn).  */
6607   if (mode == VOIDmode)
6608     return const0_rtx;
6609
6610   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6611     {
6612       aapcs_layout_arg (pcum, mode, type, named);
6613       return pcum->aapcs_reg;
6614     }
6615
6616   /* Varargs vectors are treated the same as long long.
6617      named_count avoids having to change the way arm handles 'named' */
6618   if (TARGET_IWMMXT_ABI
6619       && arm_vector_mode_supported_p (mode)
6620       && pcum->named_count > pcum->nargs + 1)
6621     {
6622       if (pcum->iwmmxt_nregs <= 9)
6623         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6624       else
6625         {
6626           pcum->can_split = false;
6627           return NULL_RTX;
6628         }
6629     }
6630
6631   /* Put doubleword aligned quantities in even register pairs.  */
6632   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6633     {
6634       int res = arm_needs_doubleword_align (mode, type);
6635       if (res < 0 && warn_psabi)
6636         inform (input_location, "parameter passing for argument of type "
6637                 "%qT changed in GCC 7.1", type);
6638       else if (res > 0)
6639         pcum->nregs++;
6640     }
6641
6642   /* Only allow splitting an arg between regs and memory if all preceding
6643      args were allocated to regs.  For args passed by reference we only count
6644      the reference pointer.  */
6645   if (pcum->can_split)
6646     nregs = 1;
6647   else
6648     nregs = ARM_NUM_REGS2 (mode, type);
6649
6650   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6651     return NULL_RTX;
6652
6653   return gen_rtx_REG (mode, pcum->nregs);
6654 }
6655
6656 static unsigned int
6657 arm_function_arg_boundary (machine_mode mode, const_tree type)
6658 {
6659   if (!ARM_DOUBLEWORD_ALIGN)
6660     return PARM_BOUNDARY;
6661
6662   int res = arm_needs_doubleword_align (mode, type);
6663   if (res < 0 && warn_psabi)
6664     inform (input_location, "parameter passing for argument of type %qT "
6665             "changed in GCC 7.1", type);
6666
6667   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6668 }
6669
6670 static int
6671 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6672                        tree type, bool named)
6673 {
6674   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6675   int nregs = pcum->nregs;
6676
6677   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6678     {
6679       aapcs_layout_arg (pcum, mode, type, named);
6680       return pcum->aapcs_partial;
6681     }
6682
6683   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6684     return 0;
6685
6686   if (NUM_ARG_REGS > nregs
6687       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6688       && pcum->can_split)
6689     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6690
6691   return 0;
6692 }
6693
6694 /* Update the data in PCUM to advance over an argument
6695    of mode MODE and data type TYPE.
6696    (TYPE is null for libcalls where that information may not be available.)  */
6697
6698 static void
6699 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6700                           const_tree type, bool named)
6701 {
6702   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6703
6704   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6705     {
6706       aapcs_layout_arg (pcum, mode, type, named);
6707
6708       if (pcum->aapcs_cprc_slot >= 0)
6709         {
6710           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6711                                                               type);
6712           pcum->aapcs_cprc_slot = -1;
6713         }
6714
6715       /* Generic stuff.  */
6716       pcum->aapcs_arg_processed = false;
6717       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6718       pcum->aapcs_reg = NULL_RTX;
6719       pcum->aapcs_partial = 0;
6720     }
6721   else
6722     {
6723       pcum->nargs += 1;
6724       if (arm_vector_mode_supported_p (mode)
6725           && pcum->named_count > pcum->nargs
6726           && TARGET_IWMMXT_ABI)
6727         pcum->iwmmxt_nregs += 1;
6728       else
6729         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6730     }
6731 }
6732
6733 /* Variable sized types are passed by reference.  This is a GCC
6734    extension to the ARM ABI.  */
6735
6736 static bool
6737 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6738                        machine_mode mode ATTRIBUTE_UNUSED,
6739                        const_tree type, bool named ATTRIBUTE_UNUSED)
6740 {
6741   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6742 }
6743 \f
6744 /* Encode the current state of the #pragma [no_]long_calls.  */
6745 typedef enum
6746 {
6747   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6748   LONG,         /* #pragma long_calls is in effect.  */
6749   SHORT         /* #pragma no_long_calls is in effect.  */
6750 } arm_pragma_enum;
6751
6752 static arm_pragma_enum arm_pragma_long_calls = OFF;
6753
6754 void
6755 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6756 {
6757   arm_pragma_long_calls = LONG;
6758 }
6759
6760 void
6761 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6762 {
6763   arm_pragma_long_calls = SHORT;
6764 }
6765
6766 void
6767 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6768 {
6769   arm_pragma_long_calls = OFF;
6770 }
6771 \f
6772 /* Handle an attribute requiring a FUNCTION_DECL;
6773    arguments as in struct attribute_spec.handler.  */
6774 static tree
6775 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6776                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6777 {
6778   if (TREE_CODE (*node) != FUNCTION_DECL)
6779     {
6780       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6781                name);
6782       *no_add_attrs = true;
6783     }
6784
6785   return NULL_TREE;
6786 }
6787
6788 /* Handle an "interrupt" or "isr" attribute;
6789    arguments as in struct attribute_spec.handler.  */
6790 static tree
6791 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6792                           bool *no_add_attrs)
6793 {
6794   if (DECL_P (*node))
6795     {
6796       if (TREE_CODE (*node) != FUNCTION_DECL)
6797         {
6798           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6799                    name);
6800           *no_add_attrs = true;
6801         }
6802       /* FIXME: the argument if any is checked for type attributes;
6803          should it be checked for decl ones?  */
6804     }
6805   else
6806     {
6807       if (TREE_CODE (*node) == FUNCTION_TYPE
6808           || TREE_CODE (*node) == METHOD_TYPE)
6809         {
6810           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6811             {
6812               warning (OPT_Wattributes, "%qE attribute ignored",
6813                        name);
6814               *no_add_attrs = true;
6815             }
6816         }
6817       else if (TREE_CODE (*node) == POINTER_TYPE
6818                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6819                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6820                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6821         {
6822           *node = build_variant_type_copy (*node);
6823           TREE_TYPE (*node) = build_type_attribute_variant
6824             (TREE_TYPE (*node),
6825              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6826           *no_add_attrs = true;
6827         }
6828       else
6829         {
6830           /* Possibly pass this attribute on from the type to a decl.  */
6831           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6832                        | (int) ATTR_FLAG_FUNCTION_NEXT
6833                        | (int) ATTR_FLAG_ARRAY_NEXT))
6834             {
6835               *no_add_attrs = true;
6836               return tree_cons (name, args, NULL_TREE);
6837             }
6838           else
6839             {
6840               warning (OPT_Wattributes, "%qE attribute ignored",
6841                        name);
6842             }
6843         }
6844     }
6845
6846   return NULL_TREE;
6847 }
6848
6849 /* Handle a "pcs" attribute; arguments as in struct
6850    attribute_spec.handler.  */
6851 static tree
6852 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6853                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6854 {
6855   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6856     {
6857       warning (OPT_Wattributes, "%qE attribute ignored", name);
6858       *no_add_attrs = true;
6859     }
6860   return NULL_TREE;
6861 }
6862
6863 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6864 /* Handle the "notshared" attribute.  This attribute is another way of
6865    requesting hidden visibility.  ARM's compiler supports
6866    "__declspec(notshared)"; we support the same thing via an
6867    attribute.  */
6868
6869 static tree
6870 arm_handle_notshared_attribute (tree *node,
6871                                 tree name ATTRIBUTE_UNUSED,
6872                                 tree args ATTRIBUTE_UNUSED,
6873                                 int flags ATTRIBUTE_UNUSED,
6874                                 bool *no_add_attrs)
6875 {
6876   tree decl = TYPE_NAME (*node);
6877
6878   if (decl)
6879     {
6880       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6881       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6882       *no_add_attrs = false;
6883     }
6884   return NULL_TREE;
6885 }
6886 #endif
6887
6888 /* This function returns true if a function with declaration FNDECL and type
6889    FNTYPE uses the stack to pass arguments or return variables and false
6890    otherwise.  This is used for functions with the attributes
6891    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6892    diagnostic messages if the stack is used.  NAME is the name of the attribute
6893    used.  */
6894
6895 static bool
6896 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6897 {
6898   function_args_iterator args_iter;
6899   CUMULATIVE_ARGS args_so_far_v;
6900   cumulative_args_t args_so_far;
6901   bool first_param = true;
6902   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6903
6904   /* Error out if any argument is passed on the stack.  */
6905   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6906   args_so_far = pack_cumulative_args (&args_so_far_v);
6907   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6908     {
6909       rtx arg_rtx;
6910       machine_mode arg_mode = TYPE_MODE (arg_type);
6911
6912       prev_arg_type = arg_type;
6913       if (VOID_TYPE_P (arg_type))
6914         continue;
6915
6916       if (!first_param)
6917         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6918       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6919       if (!arg_rtx
6920           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6921         {
6922           error ("%qE attribute not available to functions with arguments "
6923                  "passed on the stack", name);
6924           return true;
6925         }
6926       first_param = false;
6927     }
6928
6929   /* Error out for variadic functions since we cannot control how many
6930      arguments will be passed and thus stack could be used.  stdarg_p () is not
6931      used for the checking to avoid browsing arguments twice.  */
6932   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6933     {
6934       error ("%qE attribute not available to functions with variable number "
6935              "of arguments", name);
6936       return true;
6937     }
6938
6939   /* Error out if return value is passed on the stack.  */
6940   ret_type = TREE_TYPE (fntype);
6941   if (arm_return_in_memory (ret_type, fntype))
6942     {
6943       error ("%qE attribute not available to functions that return value on "
6944              "the stack", name);
6945       return true;
6946     }
6947   return false;
6948 }
6949
6950 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6951    function will check whether the attribute is allowed here and will add the
6952    attribute to the function declaration tree or otherwise issue a warning.  */
6953
6954 static tree
6955 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6956                                  tree /* args */,
6957                                  int /* flags */,
6958                                  bool *no_add_attrs)
6959 {
6960   tree fndecl;
6961
6962   if (!use_cmse)
6963     {
6964       *no_add_attrs = true;
6965       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6966                name);
6967       return NULL_TREE;
6968     }
6969
6970   /* Ignore attribute for function types.  */
6971   if (TREE_CODE (*node) != FUNCTION_DECL)
6972     {
6973       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6974                name);
6975       *no_add_attrs = true;
6976       return NULL_TREE;
6977     }
6978
6979   fndecl = *node;
6980
6981   /* Warn for static linkage functions.  */
6982   if (!TREE_PUBLIC (fndecl))
6983     {
6984       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6985                "with static linkage", name);
6986       *no_add_attrs = true;
6987       return NULL_TREE;
6988     }
6989
6990   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6991                                                 TREE_TYPE (fndecl));
6992   return NULL_TREE;
6993 }
6994
6995
6996 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6997    function will check whether the attribute is allowed here and will add the
6998    attribute to the function type tree or otherwise issue a diagnostic.  The
6999    reason we check this at declaration time is to only allow the use of the
7000    attribute with declarations of function pointers and not function
7001    declarations.  This function checks NODE is of the expected type and issues
7002    diagnostics otherwise using NAME.  If it is not of the expected type
7003    *NO_ADD_ATTRS will be set to true.  */
7004
7005 static tree
7006 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7007                                  tree /* args */,
7008                                  int /* flags */,
7009                                  bool *no_add_attrs)
7010 {
7011   tree decl = NULL_TREE, fntype = NULL_TREE;
7012   tree type;
7013
7014   if (!use_cmse)
7015     {
7016       *no_add_attrs = true;
7017       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7018                name);
7019       return NULL_TREE;
7020     }
7021
7022   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7023     {
7024       decl = *node;
7025       fntype = TREE_TYPE (decl);
7026     }
7027
7028   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7029     fntype = TREE_TYPE (fntype);
7030
7031   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7032     {
7033         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7034                  "function pointer", name);
7035         *no_add_attrs = true;
7036         return NULL_TREE;
7037     }
7038
7039   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7040
7041   if (*no_add_attrs)
7042     return NULL_TREE;
7043
7044   /* Prevent trees being shared among function types with and without
7045      cmse_nonsecure_call attribute.  */
7046   type = TREE_TYPE (decl);
7047
7048   type = build_distinct_type_copy (type);
7049   TREE_TYPE (decl) = type;
7050   fntype = type;
7051
7052   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7053     {
7054       type = fntype;
7055       fntype = TREE_TYPE (fntype);
7056       fntype = build_distinct_type_copy (fntype);
7057       TREE_TYPE (type) = fntype;
7058     }
7059
7060   /* Construct a type attribute and add it to the function type.  */
7061   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7062                           TYPE_ATTRIBUTES (fntype));
7063   TYPE_ATTRIBUTES (fntype) = attrs;
7064   return NULL_TREE;
7065 }
7066
7067 /* Return 0 if the attributes for two types are incompatible, 1 if they
7068    are compatible, and 2 if they are nearly compatible (which causes a
7069    warning to be generated).  */
7070 static int
7071 arm_comp_type_attributes (const_tree type1, const_tree type2)
7072 {
7073   int l1, l2, s1, s2;
7074
7075   /* Check for mismatch of non-default calling convention.  */
7076   if (TREE_CODE (type1) != FUNCTION_TYPE)
7077     return 1;
7078
7079   /* Check for mismatched call attributes.  */
7080   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7081   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7082   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7083   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7084
7085   /* Only bother to check if an attribute is defined.  */
7086   if (l1 | l2 | s1 | s2)
7087     {
7088       /* If one type has an attribute, the other must have the same attribute.  */
7089       if ((l1 != l2) || (s1 != s2))
7090         return 0;
7091
7092       /* Disallow mixed attributes.  */
7093       if ((l1 & s2) || (l2 & s1))
7094         return 0;
7095     }
7096
7097   /* Check for mismatched ISR attribute.  */
7098   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7099   if (! l1)
7100     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7101   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7102   if (! l2)
7103     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7104   if (l1 != l2)
7105     return 0;
7106
7107   l1 = lookup_attribute ("cmse_nonsecure_call",
7108                          TYPE_ATTRIBUTES (type1)) != NULL;
7109   l2 = lookup_attribute ("cmse_nonsecure_call",
7110                          TYPE_ATTRIBUTES (type2)) != NULL;
7111
7112   if (l1 != l2)
7113     return 0;
7114
7115   return 1;
7116 }
7117
7118 /*  Assigns default attributes to newly defined type.  This is used to
7119     set short_call/long_call attributes for function types of
7120     functions defined inside corresponding #pragma scopes.  */
7121 static void
7122 arm_set_default_type_attributes (tree type)
7123 {
7124   /* Add __attribute__ ((long_call)) to all functions, when
7125      inside #pragma long_calls or __attribute__ ((short_call)),
7126      when inside #pragma no_long_calls.  */
7127   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7128     {
7129       tree type_attr_list, attr_name;
7130       type_attr_list = TYPE_ATTRIBUTES (type);
7131
7132       if (arm_pragma_long_calls == LONG)
7133         attr_name = get_identifier ("long_call");
7134       else if (arm_pragma_long_calls == SHORT)
7135         attr_name = get_identifier ("short_call");
7136       else
7137         return;
7138
7139       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7140       TYPE_ATTRIBUTES (type) = type_attr_list;
7141     }
7142 }
7143 \f
7144 /* Return true if DECL is known to be linked into section SECTION.  */
7145
7146 static bool
7147 arm_function_in_section_p (tree decl, section *section)
7148 {
7149   /* We can only be certain about the prevailing symbol definition.  */
7150   if (!decl_binds_to_current_def_p (decl))
7151     return false;
7152
7153   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7154   if (!DECL_SECTION_NAME (decl))
7155     {
7156       /* Make sure that we will not create a unique section for DECL.  */
7157       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7158         return false;
7159     }
7160
7161   return function_section (decl) == section;
7162 }
7163
7164 /* Return nonzero if a 32-bit "long_call" should be generated for
7165    a call from the current function to DECL.  We generate a long_call
7166    if the function:
7167
7168         a.  has an __attribute__((long call))
7169      or b.  is within the scope of a #pragma long_calls
7170      or c.  the -mlong-calls command line switch has been specified
7171
7172    However we do not generate a long call if the function:
7173
7174         d.  has an __attribute__ ((short_call))
7175      or e.  is inside the scope of a #pragma no_long_calls
7176      or f.  is defined in the same section as the current function.  */
7177
7178 bool
7179 arm_is_long_call_p (tree decl)
7180 {
7181   tree attrs;
7182
7183   if (!decl)
7184     return TARGET_LONG_CALLS;
7185
7186   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7187   if (lookup_attribute ("short_call", attrs))
7188     return false;
7189
7190   /* For "f", be conservative, and only cater for cases in which the
7191      whole of the current function is placed in the same section.  */
7192   if (!flag_reorder_blocks_and_partition
7193       && TREE_CODE (decl) == FUNCTION_DECL
7194       && arm_function_in_section_p (decl, current_function_section ()))
7195     return false;
7196
7197   if (lookup_attribute ("long_call", attrs))
7198     return true;
7199
7200   return TARGET_LONG_CALLS;
7201 }
7202
7203 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7204 static bool
7205 arm_function_ok_for_sibcall (tree decl, tree exp)
7206 {
7207   unsigned long func_type;
7208
7209   if (cfun->machine->sibcall_blocked)
7210     return false;
7211
7212   /* Never tailcall something if we are generating code for Thumb-1.  */
7213   if (TARGET_THUMB1)
7214     return false;
7215
7216   /* The PIC register is live on entry to VxWorks PLT entries, so we
7217      must make the call before restoring the PIC register.  */
7218   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7219     return false;
7220
7221   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7222      may be used both as target of the call and base register for restoring
7223      the VFP registers  */
7224   if (TARGET_APCS_FRAME && TARGET_ARM
7225       && TARGET_HARD_FLOAT
7226       && decl && arm_is_long_call_p (decl))
7227     return false;
7228
7229   /* If we are interworking and the function is not declared static
7230      then we can't tail-call it unless we know that it exists in this
7231      compilation unit (since it might be a Thumb routine).  */
7232   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7233       && !TREE_ASM_WRITTEN (decl))
7234     return false;
7235
7236   func_type = arm_current_func_type ();
7237   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7238   if (IS_INTERRUPT (func_type))
7239     return false;
7240
7241   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7242      generated for entry functions themselves.  */
7243   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7244     return false;
7245
7246   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7247      this would complicate matters for later code generation.  */
7248   if (TREE_CODE (exp) == CALL_EXPR)
7249     {
7250       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7251       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7252         return false;
7253     }
7254
7255   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7256     {
7257       /* Check that the return value locations are the same.  For
7258          example that we aren't returning a value from the sibling in
7259          a VFP register but then need to transfer it to a core
7260          register.  */
7261       rtx a, b;
7262       tree decl_or_type = decl;
7263
7264       /* If it is an indirect function pointer, get the function type.  */
7265       if (!decl)
7266         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7267
7268       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7269       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7270                               cfun->decl, false);
7271       if (!rtx_equal_p (a, b))
7272         return false;
7273     }
7274
7275   /* Never tailcall if function may be called with a misaligned SP.  */
7276   if (IS_STACKALIGN (func_type))
7277     return false;
7278
7279   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7280      references should become a NOP.  Don't convert such calls into
7281      sibling calls.  */
7282   if (TARGET_AAPCS_BASED
7283       && arm_abi == ARM_ABI_AAPCS
7284       && decl
7285       && DECL_WEAK (decl))
7286     return false;
7287
7288   /* We cannot do a tailcall for an indirect call by descriptor if all the
7289      argument registers are used because the only register left to load the
7290      address is IP and it will already contain the static chain.  */
7291   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7292     {
7293       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7294       CUMULATIVE_ARGS cum;
7295       cumulative_args_t cum_v;
7296
7297       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7298       cum_v = pack_cumulative_args (&cum);
7299
7300       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7301         {
7302           tree type = TREE_VALUE (t);
7303           if (!VOID_TYPE_P (type))
7304             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7305         }
7306
7307       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7308         return false;
7309     }
7310
7311   /* Everything else is ok.  */
7312   return true;
7313 }
7314
7315 \f
7316 /* Addressing mode support functions.  */
7317
7318 /* Return nonzero if X is a legitimate immediate operand when compiling
7319    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7320 int
7321 legitimate_pic_operand_p (rtx x)
7322 {
7323   if (GET_CODE (x) == SYMBOL_REF
7324       || (GET_CODE (x) == CONST
7325           && GET_CODE (XEXP (x, 0)) == PLUS
7326           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7327     return 0;
7328
7329   return 1;
7330 }
7331
7332 /* Record that the current function needs a PIC register.  Initialize
7333    cfun->machine->pic_reg if we have not already done so.  */
7334
7335 static void
7336 require_pic_register (void)
7337 {
7338   /* A lot of the logic here is made obscure by the fact that this
7339      routine gets called as part of the rtx cost estimation process.
7340      We don't want those calls to affect any assumptions about the real
7341      function; and further, we can't call entry_of_function() until we
7342      start the real expansion process.  */
7343   if (!crtl->uses_pic_offset_table)
7344     {
7345       gcc_assert (can_create_pseudo_p ());
7346       if (arm_pic_register != INVALID_REGNUM
7347           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7348         {
7349           if (!cfun->machine->pic_reg)
7350             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7351
7352           /* Play games to avoid marking the function as needing pic
7353              if we are being called as part of the cost-estimation
7354              process.  */
7355           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7356             crtl->uses_pic_offset_table = 1;
7357         }
7358       else
7359         {
7360           rtx_insn *seq, *insn;
7361
7362           if (!cfun->machine->pic_reg)
7363             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7364
7365           /* Play games to avoid marking the function as needing pic
7366              if we are being called as part of the cost-estimation
7367              process.  */
7368           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7369             {
7370               crtl->uses_pic_offset_table = 1;
7371               start_sequence ();
7372
7373               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7374                   && arm_pic_register > LAST_LO_REGNUM)
7375                 emit_move_insn (cfun->machine->pic_reg,
7376                                 gen_rtx_REG (Pmode, arm_pic_register));
7377               else
7378                 arm_load_pic_register (0UL);
7379
7380               seq = get_insns ();
7381               end_sequence ();
7382
7383               for (insn = seq; insn; insn = NEXT_INSN (insn))
7384                 if (INSN_P (insn))
7385                   INSN_LOCATION (insn) = prologue_location;
7386
7387               /* We can be called during expansion of PHI nodes, where
7388                  we can't yet emit instructions directly in the final
7389                  insn stream.  Queue the insns on the entry edge, they will
7390                  be committed after everything else is expanded.  */
7391               insert_insn_on_edge (seq,
7392                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7393             }
7394         }
7395     }
7396 }
7397
7398 rtx
7399 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7400 {
7401   if (GET_CODE (orig) == SYMBOL_REF
7402       || GET_CODE (orig) == LABEL_REF)
7403     {
7404       if (reg == 0)
7405         {
7406           gcc_assert (can_create_pseudo_p ());
7407           reg = gen_reg_rtx (Pmode);
7408         }
7409
7410       /* VxWorks does not impose a fixed gap between segments; the run-time
7411          gap can be different from the object-file gap.  We therefore can't
7412          use GOTOFF unless we are absolutely sure that the symbol is in the
7413          same segment as the GOT.  Unfortunately, the flexibility of linker
7414          scripts means that we can't be sure of that in general, so assume
7415          that GOTOFF is never valid on VxWorks.  */
7416       /* References to weak symbols cannot be resolved locally: they
7417          may be overridden by a non-weak definition at link time.  */
7418       rtx_insn *insn;
7419       if ((GET_CODE (orig) == LABEL_REF
7420            || (GET_CODE (orig) == SYMBOL_REF
7421                && SYMBOL_REF_LOCAL_P (orig)
7422                && (SYMBOL_REF_DECL (orig)
7423                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7424           && NEED_GOT_RELOC
7425           && arm_pic_data_is_text_relative)
7426         insn = arm_pic_static_addr (orig, reg);
7427       else
7428         {
7429           rtx pat;
7430           rtx mem;
7431
7432           /* If this function doesn't have a pic register, create one now.  */
7433           require_pic_register ();
7434
7435           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7436
7437           /* Make the MEM as close to a constant as possible.  */
7438           mem = SET_SRC (pat);
7439           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7440           MEM_READONLY_P (mem) = 1;
7441           MEM_NOTRAP_P (mem) = 1;
7442
7443           insn = emit_insn (pat);
7444         }
7445
7446       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7447          by loop.  */
7448       set_unique_reg_note (insn, REG_EQUAL, orig);
7449
7450       return reg;
7451     }
7452   else if (GET_CODE (orig) == CONST)
7453     {
7454       rtx base, offset;
7455
7456       if (GET_CODE (XEXP (orig, 0)) == PLUS
7457           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7458         return orig;
7459
7460       /* Handle the case where we have: const (UNSPEC_TLS).  */
7461       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7462           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7463         return orig;
7464
7465       /* Handle the case where we have:
7466          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7467          CONST_INT.  */
7468       if (GET_CODE (XEXP (orig, 0)) == PLUS
7469           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7470           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7471         {
7472           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7473           return orig;
7474         }
7475
7476       if (reg == 0)
7477         {
7478           gcc_assert (can_create_pseudo_p ());
7479           reg = gen_reg_rtx (Pmode);
7480         }
7481
7482       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7483
7484       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7485       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7486                                        base == reg ? 0 : reg);
7487
7488       if (CONST_INT_P (offset))
7489         {
7490           /* The base register doesn't really matter, we only want to
7491              test the index for the appropriate mode.  */
7492           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7493             {
7494               gcc_assert (can_create_pseudo_p ());
7495               offset = force_reg (Pmode, offset);
7496             }
7497
7498           if (CONST_INT_P (offset))
7499             return plus_constant (Pmode, base, INTVAL (offset));
7500         }
7501
7502       if (GET_MODE_SIZE (mode) > 4
7503           && (GET_MODE_CLASS (mode) == MODE_INT
7504               || TARGET_SOFT_FLOAT))
7505         {
7506           emit_insn (gen_addsi3 (reg, base, offset));
7507           return reg;
7508         }
7509
7510       return gen_rtx_PLUS (Pmode, base, offset);
7511     }
7512
7513   return orig;
7514 }
7515
7516
7517 /* Find a spare register to use during the prolog of a function.  */
7518
7519 static int
7520 thumb_find_work_register (unsigned long pushed_regs_mask)
7521 {
7522   int reg;
7523
7524   /* Check the argument registers first as these are call-used.  The
7525      register allocation order means that sometimes r3 might be used
7526      but earlier argument registers might not, so check them all.  */
7527   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7528     if (!df_regs_ever_live_p (reg))
7529       return reg;
7530
7531   /* Before going on to check the call-saved registers we can try a couple
7532      more ways of deducing that r3 is available.  The first is when we are
7533      pushing anonymous arguments onto the stack and we have less than 4
7534      registers worth of fixed arguments(*).  In this case r3 will be part of
7535      the variable argument list and so we can be sure that it will be
7536      pushed right at the start of the function.  Hence it will be available
7537      for the rest of the prologue.
7538      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7539   if (cfun->machine->uses_anonymous_args
7540       && crtl->args.pretend_args_size > 0)
7541     return LAST_ARG_REGNUM;
7542
7543   /* The other case is when we have fixed arguments but less than 4 registers
7544      worth.  In this case r3 might be used in the body of the function, but
7545      it is not being used to convey an argument into the function.  In theory
7546      we could just check crtl->args.size to see how many bytes are
7547      being passed in argument registers, but it seems that it is unreliable.
7548      Sometimes it will have the value 0 when in fact arguments are being
7549      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7550      check the args_info.nregs field as well.  The problem with this field is
7551      that it makes no allowances for arguments that are passed to the
7552      function but which are not used.  Hence we could miss an opportunity
7553      when a function has an unused argument in r3.  But it is better to be
7554      safe than to be sorry.  */
7555   if (! cfun->machine->uses_anonymous_args
7556       && crtl->args.size >= 0
7557       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7558       && (TARGET_AAPCS_BASED
7559           ? crtl->args.info.aapcs_ncrn < 4
7560           : crtl->args.info.nregs < 4))
7561     return LAST_ARG_REGNUM;
7562
7563   /* Otherwise look for a call-saved register that is going to be pushed.  */
7564   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7565     if (pushed_regs_mask & (1 << reg))
7566       return reg;
7567
7568   if (TARGET_THUMB2)
7569     {
7570       /* Thumb-2 can use high regs.  */
7571       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7572         if (pushed_regs_mask & (1 << reg))
7573           return reg;
7574     }
7575   /* Something went wrong - thumb_compute_save_reg_mask()
7576      should have arranged for a suitable register to be pushed.  */
7577   gcc_unreachable ();
7578 }
7579
7580 static GTY(()) int pic_labelno;
7581
7582 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7583    low register.  */
7584
7585 void
7586 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7587 {
7588   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7589
7590   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7591     return;
7592
7593   gcc_assert (flag_pic);
7594
7595   pic_reg = cfun->machine->pic_reg;
7596   if (TARGET_VXWORKS_RTP)
7597     {
7598       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7599       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7600       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7601
7602       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7603
7604       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7605       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7606     }
7607   else
7608     {
7609       /* We use an UNSPEC rather than a LABEL_REF because this label
7610          never appears in the code stream.  */
7611
7612       labelno = GEN_INT (pic_labelno++);
7613       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7614       l1 = gen_rtx_CONST (VOIDmode, l1);
7615
7616       /* On the ARM the PC register contains 'dot + 8' at the time of the
7617          addition, on the Thumb it is 'dot + 4'.  */
7618       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7619       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7620                                 UNSPEC_GOTSYM_OFF);
7621       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7622
7623       if (TARGET_32BIT)
7624         {
7625           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7626         }
7627       else /* TARGET_THUMB1 */
7628         {
7629           if (arm_pic_register != INVALID_REGNUM
7630               && REGNO (pic_reg) > LAST_LO_REGNUM)
7631             {
7632               /* We will have pushed the pic register, so we should always be
7633                  able to find a work register.  */
7634               pic_tmp = gen_rtx_REG (SImode,
7635                                      thumb_find_work_register (saved_regs));
7636               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7637               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7638               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7639             }
7640           else if (arm_pic_register != INVALID_REGNUM
7641                    && arm_pic_register > LAST_LO_REGNUM
7642                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7643             {
7644               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7645               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7646               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7647             }
7648           else
7649             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7650         }
7651     }
7652
7653   /* Need to emit this whether or not we obey regdecls,
7654      since setjmp/longjmp can cause life info to screw up.  */
7655   emit_use (pic_reg);
7656 }
7657
7658 /* Generate code to load the address of a static var when flag_pic is set.  */
7659 static rtx_insn *
7660 arm_pic_static_addr (rtx orig, rtx reg)
7661 {
7662   rtx l1, labelno, offset_rtx;
7663
7664   gcc_assert (flag_pic);
7665
7666   /* We use an UNSPEC rather than a LABEL_REF because this label
7667      never appears in the code stream.  */
7668   labelno = GEN_INT (pic_labelno++);
7669   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7670   l1 = gen_rtx_CONST (VOIDmode, l1);
7671
7672   /* On the ARM the PC register contains 'dot + 8' at the time of the
7673      addition, on the Thumb it is 'dot + 4'.  */
7674   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7675   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7676                                UNSPEC_SYMBOL_OFFSET);
7677   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7678
7679   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7680 }
7681
7682 /* Return nonzero if X is valid as an ARM state addressing register.  */
7683 static int
7684 arm_address_register_rtx_p (rtx x, int strict_p)
7685 {
7686   int regno;
7687
7688   if (!REG_P (x))
7689     return 0;
7690
7691   regno = REGNO (x);
7692
7693   if (strict_p)
7694     return ARM_REGNO_OK_FOR_BASE_P (regno);
7695
7696   return (regno <= LAST_ARM_REGNUM
7697           || regno >= FIRST_PSEUDO_REGISTER
7698           || regno == FRAME_POINTER_REGNUM
7699           || regno == ARG_POINTER_REGNUM);
7700 }
7701
7702 /* Return TRUE if this rtx is the difference of a symbol and a label,
7703    and will reduce to a PC-relative relocation in the object file.
7704    Expressions like this can be left alone when generating PIC, rather
7705    than forced through the GOT.  */
7706 static int
7707 pcrel_constant_p (rtx x)
7708 {
7709   if (GET_CODE (x) == MINUS)
7710     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7711
7712   return FALSE;
7713 }
7714
7715 /* Return true if X will surely end up in an index register after next
7716    splitting pass.  */
7717 static bool
7718 will_be_in_index_register (const_rtx x)
7719 {
7720   /* arm.md: calculate_pic_address will split this into a register.  */
7721   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7722 }
7723
7724 /* Return nonzero if X is a valid ARM state address operand.  */
7725 int
7726 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7727                                 int strict_p)
7728 {
7729   bool use_ldrd;
7730   enum rtx_code code = GET_CODE (x);
7731
7732   if (arm_address_register_rtx_p (x, strict_p))
7733     return 1;
7734
7735   use_ldrd = (TARGET_LDRD
7736               && (mode == DImode || mode == DFmode));
7737
7738   if (code == POST_INC || code == PRE_DEC
7739       || ((code == PRE_INC || code == POST_DEC)
7740           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7741     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7742
7743   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7744            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7745            && GET_CODE (XEXP (x, 1)) == PLUS
7746            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7747     {
7748       rtx addend = XEXP (XEXP (x, 1), 1);
7749
7750       /* Don't allow ldrd post increment by register because it's hard
7751          to fixup invalid register choices.  */
7752       if (use_ldrd
7753           && GET_CODE (x) == POST_MODIFY
7754           && REG_P (addend))
7755         return 0;
7756
7757       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7758               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7759     }
7760
7761   /* After reload constants split into minipools will have addresses
7762      from a LABEL_REF.  */
7763   else if (reload_completed
7764            && (code == LABEL_REF
7765                || (code == CONST
7766                    && GET_CODE (XEXP (x, 0)) == PLUS
7767                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7768                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7769     return 1;
7770
7771   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7772     return 0;
7773
7774   else if (code == PLUS)
7775     {
7776       rtx xop0 = XEXP (x, 0);
7777       rtx xop1 = XEXP (x, 1);
7778
7779       return ((arm_address_register_rtx_p (xop0, strict_p)
7780                && ((CONST_INT_P (xop1)
7781                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7782                    || (!strict_p && will_be_in_index_register (xop1))))
7783               || (arm_address_register_rtx_p (xop1, strict_p)
7784                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7785     }
7786
7787 #if 0
7788   /* Reload currently can't handle MINUS, so disable this for now */
7789   else if (GET_CODE (x) == MINUS)
7790     {
7791       rtx xop0 = XEXP (x, 0);
7792       rtx xop1 = XEXP (x, 1);
7793
7794       return (arm_address_register_rtx_p (xop0, strict_p)
7795               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7796     }
7797 #endif
7798
7799   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7800            && code == SYMBOL_REF
7801            && CONSTANT_POOL_ADDRESS_P (x)
7802            && ! (flag_pic
7803                  && symbol_mentioned_p (get_pool_constant (x))
7804                  && ! pcrel_constant_p (get_pool_constant (x))))
7805     return 1;
7806
7807   return 0;
7808 }
7809
7810 /* Return true if we can avoid creating a constant pool entry for x.  */
7811 static bool
7812 can_avoid_literal_pool_for_label_p (rtx x)
7813 {
7814   /* Normally we can assign constant values to target registers without
7815      the help of constant pool.  But there are cases we have to use constant
7816      pool like:
7817      1) assign a label to register.
7818      2) sign-extend a 8bit value to 32bit and then assign to register.
7819
7820      Constant pool access in format:
7821      (set (reg r0) (mem (symbol_ref (".LC0"))))
7822      will cause the use of literal pool (later in function arm_reorg).
7823      So here we mark such format as an invalid format, then the compiler
7824      will adjust it into:
7825      (set (reg r0) (symbol_ref (".LC0")))
7826      (set (reg r0) (mem (reg r0))).
7827      No extra register is required, and (mem (reg r0)) won't cause the use
7828      of literal pools.  */
7829   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7830       && CONSTANT_POOL_ADDRESS_P (x))
7831     return 1;
7832   return 0;
7833 }
7834
7835
7836 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7837 static int
7838 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7839 {
7840   bool use_ldrd;
7841   enum rtx_code code = GET_CODE (x);
7842
7843   if (arm_address_register_rtx_p (x, strict_p))
7844     return 1;
7845
7846   use_ldrd = (TARGET_LDRD
7847               && (mode == DImode || mode == DFmode));
7848
7849   if (code == POST_INC || code == PRE_DEC
7850       || ((code == PRE_INC || code == POST_DEC)
7851           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7852     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7853
7854   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7855            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7856            && GET_CODE (XEXP (x, 1)) == PLUS
7857            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7858     {
7859       /* Thumb-2 only has autoincrement by constant.  */
7860       rtx addend = XEXP (XEXP (x, 1), 1);
7861       HOST_WIDE_INT offset;
7862
7863       if (!CONST_INT_P (addend))
7864         return 0;
7865
7866       offset = INTVAL(addend);
7867       if (GET_MODE_SIZE (mode) <= 4)
7868         return (offset > -256 && offset < 256);
7869
7870       return (use_ldrd && offset > -1024 && offset < 1024
7871               && (offset & 3) == 0);
7872     }
7873
7874   /* After reload constants split into minipools will have addresses
7875      from a LABEL_REF.  */
7876   else if (reload_completed
7877            && (code == LABEL_REF
7878                || (code == CONST
7879                    && GET_CODE (XEXP (x, 0)) == PLUS
7880                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7881                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7882     return 1;
7883
7884   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7885     return 0;
7886
7887   else if (code == PLUS)
7888     {
7889       rtx xop0 = XEXP (x, 0);
7890       rtx xop1 = XEXP (x, 1);
7891
7892       return ((arm_address_register_rtx_p (xop0, strict_p)
7893                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7894                    || (!strict_p && will_be_in_index_register (xop1))))
7895               || (arm_address_register_rtx_p (xop1, strict_p)
7896                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7897     }
7898
7899   else if (can_avoid_literal_pool_for_label_p (x))
7900     return 0;
7901
7902   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7903            && code == SYMBOL_REF
7904            && CONSTANT_POOL_ADDRESS_P (x)
7905            && ! (flag_pic
7906                  && symbol_mentioned_p (get_pool_constant (x))
7907                  && ! pcrel_constant_p (get_pool_constant (x))))
7908     return 1;
7909
7910   return 0;
7911 }
7912
7913 /* Return nonzero if INDEX is valid for an address index operand in
7914    ARM state.  */
7915 static int
7916 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7917                         int strict_p)
7918 {
7919   HOST_WIDE_INT range;
7920   enum rtx_code code = GET_CODE (index);
7921
7922   /* Standard coprocessor addressing modes.  */
7923   if (TARGET_HARD_FLOAT
7924       && (mode == SFmode || mode == DFmode))
7925     return (code == CONST_INT && INTVAL (index) < 1024
7926             && INTVAL (index) > -1024
7927             && (INTVAL (index) & 3) == 0);
7928
7929   /* For quad modes, we restrict the constant offset to be slightly less
7930      than what the instruction format permits.  We do this because for
7931      quad mode moves, we will actually decompose them into two separate
7932      double-mode reads or writes.  INDEX must therefore be a valid
7933      (double-mode) offset and so should INDEX+8.  */
7934   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7935     return (code == CONST_INT
7936             && INTVAL (index) < 1016
7937             && INTVAL (index) > -1024
7938             && (INTVAL (index) & 3) == 0);
7939
7940   /* We have no such constraint on double mode offsets, so we permit the
7941      full range of the instruction format.  */
7942   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7943     return (code == CONST_INT
7944             && INTVAL (index) < 1024
7945             && INTVAL (index) > -1024
7946             && (INTVAL (index) & 3) == 0);
7947
7948   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7949     return (code == CONST_INT
7950             && INTVAL (index) < 1024
7951             && INTVAL (index) > -1024
7952             && (INTVAL (index) & 3) == 0);
7953
7954   if (arm_address_register_rtx_p (index, strict_p)
7955       && (GET_MODE_SIZE (mode) <= 4))
7956     return 1;
7957
7958   if (mode == DImode || mode == DFmode)
7959     {
7960       if (code == CONST_INT)
7961         {
7962           HOST_WIDE_INT val = INTVAL (index);
7963
7964           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7965              If vldr is selected it uses arm_coproc_mem_operand.  */
7966           if (TARGET_LDRD)
7967             return val > -256 && val < 256;
7968           else
7969             return val > -4096 && val < 4092;
7970         }
7971
7972       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7973     }
7974
7975   if (GET_MODE_SIZE (mode) <= 4
7976       && ! (arm_arch4
7977             && (mode == HImode
7978                 || mode == HFmode
7979                 || (mode == QImode && outer == SIGN_EXTEND))))
7980     {
7981       if (code == MULT)
7982         {
7983           rtx xiop0 = XEXP (index, 0);
7984           rtx xiop1 = XEXP (index, 1);
7985
7986           return ((arm_address_register_rtx_p (xiop0, strict_p)
7987                    && power_of_two_operand (xiop1, SImode))
7988                   || (arm_address_register_rtx_p (xiop1, strict_p)
7989                       && power_of_two_operand (xiop0, SImode)));
7990         }
7991       else if (code == LSHIFTRT || code == ASHIFTRT
7992                || code == ASHIFT || code == ROTATERT)
7993         {
7994           rtx op = XEXP (index, 1);
7995
7996           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7997                   && CONST_INT_P (op)
7998                   && INTVAL (op) > 0
7999                   && INTVAL (op) <= 31);
8000         }
8001     }
8002
8003   /* For ARM v4 we may be doing a sign-extend operation during the
8004      load.  */
8005   if (arm_arch4)
8006     {
8007       if (mode == HImode
8008           || mode == HFmode
8009           || (outer == SIGN_EXTEND && mode == QImode))
8010         range = 256;
8011       else
8012         range = 4096;
8013     }
8014   else
8015     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8016
8017   return (code == CONST_INT
8018           && INTVAL (index) < range
8019           && INTVAL (index) > -range);
8020 }
8021
8022 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8023    index operand.  i.e. 1, 2, 4 or 8.  */
8024 static bool
8025 thumb2_index_mul_operand (rtx op)
8026 {
8027   HOST_WIDE_INT val;
8028
8029   if (!CONST_INT_P (op))
8030     return false;
8031
8032   val = INTVAL(op);
8033   return (val == 1 || val == 2 || val == 4 || val == 8);
8034 }
8035
8036 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8037 static int
8038 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8039 {
8040   enum rtx_code code = GET_CODE (index);
8041
8042   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8043   /* Standard coprocessor addressing modes.  */
8044   if (TARGET_HARD_FLOAT
8045       && (mode == SFmode || mode == DFmode))
8046     return (code == CONST_INT && INTVAL (index) < 1024
8047             /* Thumb-2 allows only > -256 index range for it's core register
8048                load/stores. Since we allow SF/DF in core registers, we have
8049                to use the intersection between -256~4096 (core) and -1024~1024
8050                (coprocessor).  */
8051             && INTVAL (index) > -256
8052             && (INTVAL (index) & 3) == 0);
8053
8054   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8055     {
8056       /* For DImode assume values will usually live in core regs
8057          and only allow LDRD addressing modes.  */
8058       if (!TARGET_LDRD || mode != DImode)
8059         return (code == CONST_INT
8060                 && INTVAL (index) < 1024
8061                 && INTVAL (index) > -1024
8062                 && (INTVAL (index) & 3) == 0);
8063     }
8064
8065   /* For quad modes, we restrict the constant offset to be slightly less
8066      than what the instruction format permits.  We do this because for
8067      quad mode moves, we will actually decompose them into two separate
8068      double-mode reads or writes.  INDEX must therefore be a valid
8069      (double-mode) offset and so should INDEX+8.  */
8070   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8071     return (code == CONST_INT
8072             && INTVAL (index) < 1016
8073             && INTVAL (index) > -1024
8074             && (INTVAL (index) & 3) == 0);
8075
8076   /* We have no such constraint on double mode offsets, so we permit the
8077      full range of the instruction format.  */
8078   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8079     return (code == CONST_INT
8080             && INTVAL (index) < 1024
8081             && INTVAL (index) > -1024
8082             && (INTVAL (index) & 3) == 0);
8083
8084   if (arm_address_register_rtx_p (index, strict_p)
8085       && (GET_MODE_SIZE (mode) <= 4))
8086     return 1;
8087
8088   if (mode == DImode || mode == DFmode)
8089     {
8090       if (code == CONST_INT)
8091         {
8092           HOST_WIDE_INT val = INTVAL (index);
8093           /* Thumb-2 ldrd only has reg+const addressing modes.
8094              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8095              If vldr is selected it uses arm_coproc_mem_operand.  */
8096           if (TARGET_LDRD)
8097             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8098           else
8099             return IN_RANGE (val, -255, 4095 - 4);
8100         }
8101       else
8102         return 0;
8103     }
8104
8105   if (code == MULT)
8106     {
8107       rtx xiop0 = XEXP (index, 0);
8108       rtx xiop1 = XEXP (index, 1);
8109
8110       return ((arm_address_register_rtx_p (xiop0, strict_p)
8111                && thumb2_index_mul_operand (xiop1))
8112               || (arm_address_register_rtx_p (xiop1, strict_p)
8113                   && thumb2_index_mul_operand (xiop0)));
8114     }
8115   else if (code == ASHIFT)
8116     {
8117       rtx op = XEXP (index, 1);
8118
8119       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8120               && CONST_INT_P (op)
8121               && INTVAL (op) > 0
8122               && INTVAL (op) <= 3);
8123     }
8124
8125   return (code == CONST_INT
8126           && INTVAL (index) < 4096
8127           && INTVAL (index) > -256);
8128 }
8129
8130 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8131 static int
8132 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8133 {
8134   int regno;
8135
8136   if (!REG_P (x))
8137     return 0;
8138
8139   regno = REGNO (x);
8140
8141   if (strict_p)
8142     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8143
8144   return (regno <= LAST_LO_REGNUM
8145           || regno > LAST_VIRTUAL_REGISTER
8146           || regno == FRAME_POINTER_REGNUM
8147           || (GET_MODE_SIZE (mode) >= 4
8148               && (regno == STACK_POINTER_REGNUM
8149                   || regno >= FIRST_PSEUDO_REGISTER
8150                   || x == hard_frame_pointer_rtx
8151                   || x == arg_pointer_rtx)));
8152 }
8153
8154 /* Return nonzero if x is a legitimate index register.  This is the case
8155    for any base register that can access a QImode object.  */
8156 inline static int
8157 thumb1_index_register_rtx_p (rtx x, int strict_p)
8158 {
8159   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8160 }
8161
8162 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8163
8164    The AP may be eliminated to either the SP or the FP, so we use the
8165    least common denominator, e.g. SImode, and offsets from 0 to 64.
8166
8167    ??? Verify whether the above is the right approach.
8168
8169    ??? Also, the FP may be eliminated to the SP, so perhaps that
8170    needs special handling also.
8171
8172    ??? Look at how the mips16 port solves this problem.  It probably uses
8173    better ways to solve some of these problems.
8174
8175    Although it is not incorrect, we don't accept QImode and HImode
8176    addresses based on the frame pointer or arg pointer until the
8177    reload pass starts.  This is so that eliminating such addresses
8178    into stack based ones won't produce impossible code.  */
8179 int
8180 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8181 {
8182   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8183     return 0;
8184
8185   /* ??? Not clear if this is right.  Experiment.  */
8186   if (GET_MODE_SIZE (mode) < 4
8187       && !(reload_in_progress || reload_completed)
8188       && (reg_mentioned_p (frame_pointer_rtx, x)
8189           || reg_mentioned_p (arg_pointer_rtx, x)
8190           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8191           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8192           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8193           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8194     return 0;
8195
8196   /* Accept any base register.  SP only in SImode or larger.  */
8197   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8198     return 1;
8199
8200   /* This is PC relative data before arm_reorg runs.  */
8201   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8202            && GET_CODE (x) == SYMBOL_REF
8203            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8204     return 1;
8205
8206   /* This is PC relative data after arm_reorg runs.  */
8207   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8208            && reload_completed
8209            && (GET_CODE (x) == LABEL_REF
8210                || (GET_CODE (x) == CONST
8211                    && GET_CODE (XEXP (x, 0)) == PLUS
8212                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8213                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8214     return 1;
8215
8216   /* Post-inc indexing only supported for SImode and larger.  */
8217   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8218            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8219     return 1;
8220
8221   else if (GET_CODE (x) == PLUS)
8222     {
8223       /* REG+REG address can be any two index registers.  */
8224       /* We disallow FRAME+REG addressing since we know that FRAME
8225          will be replaced with STACK, and SP relative addressing only
8226          permits SP+OFFSET.  */
8227       if (GET_MODE_SIZE (mode) <= 4
8228           && XEXP (x, 0) != frame_pointer_rtx
8229           && XEXP (x, 1) != frame_pointer_rtx
8230           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8231           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8232               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8233         return 1;
8234
8235       /* REG+const has 5-7 bit offset for non-SP registers.  */
8236       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8237                 || XEXP (x, 0) == arg_pointer_rtx)
8238                && CONST_INT_P (XEXP (x, 1))
8239                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8240         return 1;
8241
8242       /* REG+const has 10-bit offset for SP, but only SImode and
8243          larger is supported.  */
8244       /* ??? Should probably check for DI/DFmode overflow here
8245          just like GO_IF_LEGITIMATE_OFFSET does.  */
8246       else if (REG_P (XEXP (x, 0))
8247                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8248                && GET_MODE_SIZE (mode) >= 4
8249                && CONST_INT_P (XEXP (x, 1))
8250                && INTVAL (XEXP (x, 1)) >= 0
8251                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8252                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8253         return 1;
8254
8255       else if (REG_P (XEXP (x, 0))
8256                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8257                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8258                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8259                        && REGNO (XEXP (x, 0))
8260                           <= LAST_VIRTUAL_POINTER_REGISTER))
8261                && GET_MODE_SIZE (mode) >= 4
8262                && CONST_INT_P (XEXP (x, 1))
8263                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8264         return 1;
8265     }
8266
8267   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8268            && GET_MODE_SIZE (mode) == 4
8269            && GET_CODE (x) == SYMBOL_REF
8270            && CONSTANT_POOL_ADDRESS_P (x)
8271            && ! (flag_pic
8272                  && symbol_mentioned_p (get_pool_constant (x))
8273                  && ! pcrel_constant_p (get_pool_constant (x))))
8274     return 1;
8275
8276   return 0;
8277 }
8278
8279 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8280    instruction of mode MODE.  */
8281 int
8282 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8283 {
8284   switch (GET_MODE_SIZE (mode))
8285     {
8286     case 1:
8287       return val >= 0 && val < 32;
8288
8289     case 2:
8290       return val >= 0 && val < 64 && (val & 1) == 0;
8291
8292     default:
8293       return (val >= 0
8294               && (val + GET_MODE_SIZE (mode)) <= 128
8295               && (val & 3) == 0);
8296     }
8297 }
8298
8299 bool
8300 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8301 {
8302   if (TARGET_ARM)
8303     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8304   else if (TARGET_THUMB2)
8305     return thumb2_legitimate_address_p (mode, x, strict_p);
8306   else /* if (TARGET_THUMB1) */
8307     return thumb1_legitimate_address_p (mode, x, strict_p);
8308 }
8309
8310 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8311
8312    Given an rtx X being reloaded into a reg required to be
8313    in class CLASS, return the class of reg to actually use.
8314    In general this is just CLASS, but for the Thumb core registers and
8315    immediate constants we prefer a LO_REGS class or a subset.  */
8316
8317 static reg_class_t
8318 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8319 {
8320   if (TARGET_32BIT)
8321     return rclass;
8322   else
8323     {
8324       if (rclass == GENERAL_REGS)
8325         return LO_REGS;
8326       else
8327         return rclass;
8328     }
8329 }
8330
8331 /* Build the SYMBOL_REF for __tls_get_addr.  */
8332
8333 static GTY(()) rtx tls_get_addr_libfunc;
8334
8335 static rtx
8336 get_tls_get_addr (void)
8337 {
8338   if (!tls_get_addr_libfunc)
8339     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8340   return tls_get_addr_libfunc;
8341 }
8342
8343 rtx
8344 arm_load_tp (rtx target)
8345 {
8346   if (!target)
8347     target = gen_reg_rtx (SImode);
8348
8349   if (TARGET_HARD_TP)
8350     {
8351       /* Can return in any reg.  */
8352       emit_insn (gen_load_tp_hard (target));
8353     }
8354   else
8355     {
8356       /* Always returned in r0.  Immediately copy the result into a pseudo,
8357          otherwise other uses of r0 (e.g. setting up function arguments) may
8358          clobber the value.  */
8359
8360       rtx tmp;
8361
8362       emit_insn (gen_load_tp_soft ());
8363
8364       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8365       emit_move_insn (target, tmp);
8366     }
8367   return target;
8368 }
8369
8370 static rtx
8371 load_tls_operand (rtx x, rtx reg)
8372 {
8373   rtx tmp;
8374
8375   if (reg == NULL_RTX)
8376     reg = gen_reg_rtx (SImode);
8377
8378   tmp = gen_rtx_CONST (SImode, x);
8379
8380   emit_move_insn (reg, tmp);
8381
8382   return reg;
8383 }
8384
8385 static rtx_insn *
8386 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8387 {
8388   rtx label, labelno, sum;
8389
8390   gcc_assert (reloc != TLS_DESCSEQ);
8391   start_sequence ();
8392
8393   labelno = GEN_INT (pic_labelno++);
8394   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8395   label = gen_rtx_CONST (VOIDmode, label);
8396
8397   sum = gen_rtx_UNSPEC (Pmode,
8398                         gen_rtvec (4, x, GEN_INT (reloc), label,
8399                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8400                         UNSPEC_TLS);
8401   reg = load_tls_operand (sum, reg);
8402
8403   if (TARGET_ARM)
8404     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8405   else
8406     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8407
8408   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8409                                      LCT_PURE, /* LCT_CONST?  */
8410                                      Pmode, reg, Pmode);
8411
8412   rtx_insn *insns = get_insns ();
8413   end_sequence ();
8414
8415   return insns;
8416 }
8417
8418 static rtx
8419 arm_tls_descseq_addr (rtx x, rtx reg)
8420 {
8421   rtx labelno = GEN_INT (pic_labelno++);
8422   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8423   rtx sum = gen_rtx_UNSPEC (Pmode,
8424                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8425                                        gen_rtx_CONST (VOIDmode, label),
8426                                        GEN_INT (!TARGET_ARM)),
8427                             UNSPEC_TLS);
8428   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8429
8430   emit_insn (gen_tlscall (x, labelno));
8431   if (!reg)
8432     reg = gen_reg_rtx (SImode);
8433   else
8434     gcc_assert (REGNO (reg) != R0_REGNUM);
8435
8436   emit_move_insn (reg, reg0);
8437
8438   return reg;
8439 }
8440
8441 rtx
8442 legitimize_tls_address (rtx x, rtx reg)
8443 {
8444   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8445   rtx_insn *insns;
8446   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8447
8448   switch (model)
8449     {
8450     case TLS_MODEL_GLOBAL_DYNAMIC:
8451       if (TARGET_GNU2_TLS)
8452         {
8453           reg = arm_tls_descseq_addr (x, reg);
8454
8455           tp = arm_load_tp (NULL_RTX);
8456
8457           dest = gen_rtx_PLUS (Pmode, tp, reg);
8458         }
8459       else
8460         {
8461           /* Original scheme */
8462           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8463           dest = gen_reg_rtx (Pmode);
8464           emit_libcall_block (insns, dest, ret, x);
8465         }
8466       return dest;
8467
8468     case TLS_MODEL_LOCAL_DYNAMIC:
8469       if (TARGET_GNU2_TLS)
8470         {
8471           reg = arm_tls_descseq_addr (x, reg);
8472
8473           tp = arm_load_tp (NULL_RTX);
8474
8475           dest = gen_rtx_PLUS (Pmode, tp, reg);
8476         }
8477       else
8478         {
8479           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8480
8481           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8482              share the LDM result with other LD model accesses.  */
8483           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8484                                 UNSPEC_TLS);
8485           dest = gen_reg_rtx (Pmode);
8486           emit_libcall_block (insns, dest, ret, eqv);
8487
8488           /* Load the addend.  */
8489           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8490                                                      GEN_INT (TLS_LDO32)),
8491                                    UNSPEC_TLS);
8492           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8493           dest = gen_rtx_PLUS (Pmode, dest, addend);
8494         }
8495       return dest;
8496
8497     case TLS_MODEL_INITIAL_EXEC:
8498       labelno = GEN_INT (pic_labelno++);
8499       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8500       label = gen_rtx_CONST (VOIDmode, label);
8501       sum = gen_rtx_UNSPEC (Pmode,
8502                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8503                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8504                             UNSPEC_TLS);
8505       reg = load_tls_operand (sum, reg);
8506
8507       if (TARGET_ARM)
8508         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8509       else if (TARGET_THUMB2)
8510         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8511       else
8512         {
8513           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8514           emit_move_insn (reg, gen_const_mem (SImode, reg));
8515         }
8516
8517       tp = arm_load_tp (NULL_RTX);
8518
8519       return gen_rtx_PLUS (Pmode, tp, reg);
8520
8521     case TLS_MODEL_LOCAL_EXEC:
8522       tp = arm_load_tp (NULL_RTX);
8523
8524       reg = gen_rtx_UNSPEC (Pmode,
8525                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8526                             UNSPEC_TLS);
8527       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8528
8529       return gen_rtx_PLUS (Pmode, tp, reg);
8530
8531     default:
8532       abort ();
8533     }
8534 }
8535
8536 /* Try machine-dependent ways of modifying an illegitimate address
8537    to be legitimate.  If we find one, return the new, valid address.  */
8538 rtx
8539 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8540 {
8541   if (arm_tls_referenced_p (x))
8542     {
8543       rtx addend = NULL;
8544
8545       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8546         {
8547           addend = XEXP (XEXP (x, 0), 1);
8548           x = XEXP (XEXP (x, 0), 0);
8549         }
8550
8551       if (GET_CODE (x) != SYMBOL_REF)
8552         return x;
8553
8554       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8555
8556       x = legitimize_tls_address (x, NULL_RTX);
8557
8558       if (addend)
8559         {
8560           x = gen_rtx_PLUS (SImode, x, addend);
8561           orig_x = x;
8562         }
8563       else
8564         return x;
8565     }
8566
8567   if (!TARGET_ARM)
8568     {
8569       /* TODO: legitimize_address for Thumb2.  */
8570       if (TARGET_THUMB2)
8571         return x;
8572       return thumb_legitimize_address (x, orig_x, mode);
8573     }
8574
8575   if (GET_CODE (x) == PLUS)
8576     {
8577       rtx xop0 = XEXP (x, 0);
8578       rtx xop1 = XEXP (x, 1);
8579
8580       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8581         xop0 = force_reg (SImode, xop0);
8582
8583       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8584           && !symbol_mentioned_p (xop1))
8585         xop1 = force_reg (SImode, xop1);
8586
8587       if (ARM_BASE_REGISTER_RTX_P (xop0)
8588           && CONST_INT_P (xop1))
8589         {
8590           HOST_WIDE_INT n, low_n;
8591           rtx base_reg, val;
8592           n = INTVAL (xop1);
8593
8594           /* VFP addressing modes actually allow greater offsets, but for
8595              now we just stick with the lowest common denominator.  */
8596           if (mode == DImode || mode == DFmode)
8597             {
8598               low_n = n & 0x0f;
8599               n &= ~0x0f;
8600               if (low_n > 4)
8601                 {
8602                   n += 16;
8603                   low_n -= 16;
8604                 }
8605             }
8606           else
8607             {
8608               low_n = ((mode) == TImode ? 0
8609                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8610               n -= low_n;
8611             }
8612
8613           base_reg = gen_reg_rtx (SImode);
8614           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8615           emit_move_insn (base_reg, val);
8616           x = plus_constant (Pmode, base_reg, low_n);
8617         }
8618       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619         x = gen_rtx_PLUS (SImode, xop0, xop1);
8620     }
8621
8622   /* XXX We don't allow MINUS any more -- see comment in
8623      arm_legitimate_address_outer_p ().  */
8624   else if (GET_CODE (x) == MINUS)
8625     {
8626       rtx xop0 = XEXP (x, 0);
8627       rtx xop1 = XEXP (x, 1);
8628
8629       if (CONSTANT_P (xop0))
8630         xop0 = force_reg (SImode, xop0);
8631
8632       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8633         xop1 = force_reg (SImode, xop1);
8634
8635       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8636         x = gen_rtx_MINUS (SImode, xop0, xop1);
8637     }
8638
8639   /* Make sure to take full advantage of the pre-indexed addressing mode
8640      with absolute addresses which often allows for the base register to
8641      be factorized for multiple adjacent memory references, and it might
8642      even allows for the mini pool to be avoided entirely. */
8643   else if (CONST_INT_P (x) && optimize > 0)
8644     {
8645       unsigned int bits;
8646       HOST_WIDE_INT mask, base, index;
8647       rtx base_reg;
8648
8649       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8650          use a 8-bit index. So let's use a 12-bit index for SImode only and
8651          hope that arm_gen_constant will enable ldrb to use more bits. */
8652       bits = (mode == SImode) ? 12 : 8;
8653       mask = (1 << bits) - 1;
8654       base = INTVAL (x) & ~mask;
8655       index = INTVAL (x) & mask;
8656       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8657         {
8658           /* It'll most probably be more efficient to generate the base
8659              with more bits set and use a negative index instead. */
8660           base |= mask;
8661           index -= mask;
8662         }
8663       base_reg = force_reg (SImode, GEN_INT (base));
8664       x = plus_constant (Pmode, base_reg, index);
8665     }
8666
8667   if (flag_pic)
8668     {
8669       /* We need to find and carefully transform any SYMBOL and LABEL
8670          references; so go back to the original address expression.  */
8671       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8672
8673       if (new_x != orig_x)
8674         x = new_x;
8675     }
8676
8677   return x;
8678 }
8679
8680
8681 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8682    to be legitimate.  If we find one, return the new, valid address.  */
8683 rtx
8684 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8685 {
8686   if (GET_CODE (x) == PLUS
8687       && CONST_INT_P (XEXP (x, 1))
8688       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8689           || INTVAL (XEXP (x, 1)) < 0))
8690     {
8691       rtx xop0 = XEXP (x, 0);
8692       rtx xop1 = XEXP (x, 1);
8693       HOST_WIDE_INT offset = INTVAL (xop1);
8694
8695       /* Try and fold the offset into a biasing of the base register and
8696          then offsetting that.  Don't do this when optimizing for space
8697          since it can cause too many CSEs.  */
8698       if (optimize_size && offset >= 0
8699           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8700         {
8701           HOST_WIDE_INT delta;
8702
8703           if (offset >= 256)
8704             delta = offset - (256 - GET_MODE_SIZE (mode));
8705           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8706             delta = 31 * GET_MODE_SIZE (mode);
8707           else
8708             delta = offset & (~31 * GET_MODE_SIZE (mode));
8709
8710           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8711                                 NULL_RTX);
8712           x = plus_constant (Pmode, xop0, delta);
8713         }
8714       else if (offset < 0 && offset > -256)
8715         /* Small negative offsets are best done with a subtract before the
8716            dereference, forcing these into a register normally takes two
8717            instructions.  */
8718         x = force_operand (x, NULL_RTX);
8719       else
8720         {
8721           /* For the remaining cases, force the constant into a register.  */
8722           xop1 = force_reg (SImode, xop1);
8723           x = gen_rtx_PLUS (SImode, xop0, xop1);
8724         }
8725     }
8726   else if (GET_CODE (x) == PLUS
8727            && s_register_operand (XEXP (x, 1), SImode)
8728            && !s_register_operand (XEXP (x, 0), SImode))
8729     {
8730       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8731
8732       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8733     }
8734
8735   if (flag_pic)
8736     {
8737       /* We need to find and carefully transform any SYMBOL and LABEL
8738          references; so go back to the original address expression.  */
8739       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8740
8741       if (new_x != orig_x)
8742         x = new_x;
8743     }
8744
8745   return x;
8746 }
8747
8748 /* Return TRUE if X contains any TLS symbol references.  */
8749
8750 bool
8751 arm_tls_referenced_p (rtx x)
8752 {
8753   if (! TARGET_HAVE_TLS)
8754     return false;
8755
8756   subrtx_iterator::array_type array;
8757   FOR_EACH_SUBRTX (iter, array, x, ALL)
8758     {
8759       const_rtx x = *iter;
8760       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8761         {
8762           /* ARM currently does not provide relocations to encode TLS variables
8763              into AArch32 instructions, only data, so there is no way to
8764              currently implement these if a literal pool is disabled.  */
8765           if (arm_disable_literal_pool)
8766             sorry ("accessing thread-local storage is not currently supported "
8767                    "with -mpure-code or -mslow-flash-data");
8768
8769           return true;
8770         }
8771
8772       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8773          TLS offsets, not real symbol references.  */
8774       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8775         iter.skip_subrtxes ();
8776     }
8777   return false;
8778 }
8779
8780 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8781
8782    On the ARM, allow any integer (invalid ones are removed later by insn
8783    patterns), nice doubles and symbol_refs which refer to the function's
8784    constant pool XXX.
8785
8786    When generating pic allow anything.  */
8787
8788 static bool
8789 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8790 {
8791   return flag_pic || !label_mentioned_p (x);
8792 }
8793
8794 static bool
8795 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8796 {
8797   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8798      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8799      for ARMv8-M Baseline or later the result is valid.  */
8800   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8801     x = XEXP (x, 0);
8802
8803   return (CONST_INT_P (x)
8804           || CONST_DOUBLE_P (x)
8805           || CONSTANT_ADDRESS_P (x)
8806           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8807           || flag_pic);
8808 }
8809
8810 static bool
8811 arm_legitimate_constant_p (machine_mode mode, rtx x)
8812 {
8813   return (!arm_cannot_force_const_mem (mode, x)
8814           && (TARGET_32BIT
8815               ? arm_legitimate_constant_p_1 (mode, x)
8816               : thumb_legitimate_constant_p (mode, x)));
8817 }
8818
8819 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8820
8821 static bool
8822 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8823 {
8824   rtx base, offset;
8825
8826   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8827     {
8828       split_const (x, &base, &offset);
8829       if (GET_CODE (base) == SYMBOL_REF
8830           && !offset_within_block_p (base, INTVAL (offset)))
8831         return true;
8832     }
8833   return arm_tls_referenced_p (x);
8834 }
8835 \f
8836 #define REG_OR_SUBREG_REG(X)                                            \
8837   (REG_P (X)                                                    \
8838    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8839
8840 #define REG_OR_SUBREG_RTX(X)                    \
8841    (REG_P (X) ? (X) : SUBREG_REG (X))
8842
8843 static inline int
8844 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8845 {
8846   machine_mode mode = GET_MODE (x);
8847   int total, words;
8848
8849   switch (code)
8850     {
8851     case ASHIFT:
8852     case ASHIFTRT:
8853     case LSHIFTRT:
8854     case ROTATERT:
8855       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8856
8857     case PLUS:
8858     case MINUS:
8859     case COMPARE:
8860     case NEG:
8861     case NOT:
8862       return COSTS_N_INSNS (1);
8863
8864     case MULT:
8865       if (arm_arch6m && arm_m_profile_small_mul)
8866         return COSTS_N_INSNS (32);
8867
8868       if (CONST_INT_P (XEXP (x, 1)))
8869         {
8870           int cycles = 0;
8871           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8872
8873           while (i)
8874             {
8875               i >>= 2;
8876               cycles++;
8877             }
8878           return COSTS_N_INSNS (2) + cycles;
8879         }
8880       return COSTS_N_INSNS (1) + 16;
8881
8882     case SET:
8883       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8884          the mode.  */
8885       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8886       return (COSTS_N_INSNS (words)
8887               + 4 * ((MEM_P (SET_SRC (x)))
8888                      + MEM_P (SET_DEST (x))));
8889
8890     case CONST_INT:
8891       if (outer == SET)
8892         {
8893           if (UINTVAL (x) < 256
8894               /* 16-bit constant.  */
8895               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8896             return 0;
8897           if (thumb_shiftable_const (INTVAL (x)))
8898             return COSTS_N_INSNS (2);
8899           return COSTS_N_INSNS (3);
8900         }
8901       else if ((outer == PLUS || outer == COMPARE)
8902                && INTVAL (x) < 256 && INTVAL (x) > -256)
8903         return 0;
8904       else if ((outer == IOR || outer == XOR || outer == AND)
8905                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8906         return COSTS_N_INSNS (1);
8907       else if (outer == AND)
8908         {
8909           int i;
8910           /* This duplicates the tests in the andsi3 expander.  */
8911           for (i = 9; i <= 31; i++)
8912             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8913                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8914               return COSTS_N_INSNS (2);
8915         }
8916       else if (outer == ASHIFT || outer == ASHIFTRT
8917                || outer == LSHIFTRT)
8918         return 0;
8919       return COSTS_N_INSNS (2);
8920
8921     case CONST:
8922     case CONST_DOUBLE:
8923     case LABEL_REF:
8924     case SYMBOL_REF:
8925       return COSTS_N_INSNS (3);
8926
8927     case UDIV:
8928     case UMOD:
8929     case DIV:
8930     case MOD:
8931       return 100;
8932
8933     case TRUNCATE:
8934       return 99;
8935
8936     case AND:
8937     case XOR:
8938     case IOR:
8939       /* XXX guess.  */
8940       return 8;
8941
8942     case MEM:
8943       /* XXX another guess.  */
8944       /* Memory costs quite a lot for the first word, but subsequent words
8945          load at the equivalent of a single insn each.  */
8946       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8947               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8948                  ? 4 : 0));
8949
8950     case IF_THEN_ELSE:
8951       /* XXX a guess.  */
8952       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8953         return 14;
8954       return 2;
8955
8956     case SIGN_EXTEND:
8957     case ZERO_EXTEND:
8958       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8959       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8960
8961       if (mode == SImode)
8962         return total;
8963
8964       if (arm_arch6)
8965         return total + COSTS_N_INSNS (1);
8966
8967       /* Assume a two-shift sequence.  Increase the cost slightly so
8968          we prefer actual shifts over an extend operation.  */
8969       return total + 1 + COSTS_N_INSNS (2);
8970
8971     default:
8972       return 99;
8973     }
8974 }
8975
8976 /* Estimates the size cost of thumb1 instructions.
8977    For now most of the code is copied from thumb1_rtx_costs. We need more
8978    fine grain tuning when we have more related test cases.  */
8979 static inline int
8980 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8981 {
8982   machine_mode mode = GET_MODE (x);
8983   int words, cost;
8984
8985   switch (code)
8986     {
8987     case ASHIFT:
8988     case ASHIFTRT:
8989     case LSHIFTRT:
8990     case ROTATERT:
8991       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8992
8993     case PLUS:
8994     case MINUS:
8995       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8996          defined by RTL expansion, especially for the expansion of
8997          multiplication.  */
8998       if ((GET_CODE (XEXP (x, 0)) == MULT
8999            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9000           || (GET_CODE (XEXP (x, 1)) == MULT
9001               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9002         return COSTS_N_INSNS (2);
9003       /* Fall through.  */
9004     case COMPARE:
9005     case NEG:
9006     case NOT:
9007       return COSTS_N_INSNS (1);
9008
9009     case MULT:
9010       if (CONST_INT_P (XEXP (x, 1)))
9011         {
9012           /* Thumb1 mul instruction can't operate on const. We must Load it
9013              into a register first.  */
9014           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9015           /* For the targets which have a very small and high-latency multiply
9016              unit, we prefer to synthesize the mult with up to 5 instructions,
9017              giving a good balance between size and performance.  */
9018           if (arm_arch6m && arm_m_profile_small_mul)
9019             return COSTS_N_INSNS (5);
9020           else
9021             return COSTS_N_INSNS (1) + const_size;
9022         }
9023       return COSTS_N_INSNS (1);
9024
9025     case SET:
9026       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9027          the mode.  */
9028       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9029       cost = COSTS_N_INSNS (words);
9030       if (satisfies_constraint_J (SET_SRC (x))
9031           || satisfies_constraint_K (SET_SRC (x))
9032              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9033           || (CONST_INT_P (SET_SRC (x))
9034               && UINTVAL (SET_SRC (x)) >= 256
9035               && TARGET_HAVE_MOVT
9036               && satisfies_constraint_j (SET_SRC (x)))
9037              /* thumb1_movdi_insn.  */
9038           || ((words > 1) && MEM_P (SET_SRC (x))))
9039         cost += COSTS_N_INSNS (1);
9040       return cost;
9041
9042     case CONST_INT:
9043       if (outer == SET)
9044         {
9045           if (UINTVAL (x) < 256)
9046             return COSTS_N_INSNS (1);
9047           /* movw is 4byte long.  */
9048           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9049             return COSTS_N_INSNS (2);
9050           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9051           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9052             return COSTS_N_INSNS (2);
9053           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9054           if (thumb_shiftable_const (INTVAL (x)))
9055             return COSTS_N_INSNS (2);
9056           return COSTS_N_INSNS (3);
9057         }
9058       else if ((outer == PLUS || outer == COMPARE)
9059                && INTVAL (x) < 256 && INTVAL (x) > -256)
9060         return 0;
9061       else if ((outer == IOR || outer == XOR || outer == AND)
9062                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9063         return COSTS_N_INSNS (1);
9064       else if (outer == AND)
9065         {
9066           int i;
9067           /* This duplicates the tests in the andsi3 expander.  */
9068           for (i = 9; i <= 31; i++)
9069             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9070                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9071               return COSTS_N_INSNS (2);
9072         }
9073       else if (outer == ASHIFT || outer == ASHIFTRT
9074                || outer == LSHIFTRT)
9075         return 0;
9076       return COSTS_N_INSNS (2);
9077
9078     case CONST:
9079     case CONST_DOUBLE:
9080     case LABEL_REF:
9081     case SYMBOL_REF:
9082       return COSTS_N_INSNS (3);
9083
9084     case UDIV:
9085     case UMOD:
9086     case DIV:
9087     case MOD:
9088       return 100;
9089
9090     case TRUNCATE:
9091       return 99;
9092
9093     case AND:
9094     case XOR:
9095     case IOR:
9096       return COSTS_N_INSNS (1);
9097
9098     case MEM:
9099       return (COSTS_N_INSNS (1)
9100               + COSTS_N_INSNS (1)
9101                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9102               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9103                  ? COSTS_N_INSNS (1) : 0));
9104
9105     case IF_THEN_ELSE:
9106       /* XXX a guess.  */
9107       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9108         return 14;
9109       return 2;
9110
9111     case ZERO_EXTEND:
9112       /* XXX still guessing.  */
9113       switch (GET_MODE (XEXP (x, 0)))
9114         {
9115           case E_QImode:
9116             return (1 + (mode == DImode ? 4 : 0)
9117                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9118
9119           case E_HImode:
9120             return (4 + (mode == DImode ? 4 : 0)
9121                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9122
9123           case E_SImode:
9124             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9125
9126           default:
9127             return 99;
9128         }
9129
9130     default:
9131       return 99;
9132     }
9133 }
9134
9135 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9136    operand, then return the operand that is being shifted.  If the shift
9137    is not by a constant, then set SHIFT_REG to point to the operand.
9138    Return NULL if OP is not a shifter operand.  */
9139 static rtx
9140 shifter_op_p (rtx op, rtx *shift_reg)
9141 {
9142   enum rtx_code code = GET_CODE (op);
9143
9144   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9145       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9146     return XEXP (op, 0);
9147   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9148     return XEXP (op, 0);
9149   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9150            || code == ASHIFTRT)
9151     {
9152       if (!CONST_INT_P (XEXP (op, 1)))
9153         *shift_reg = XEXP (op, 1);
9154       return XEXP (op, 0);
9155     }
9156
9157   return NULL;
9158 }
9159
9160 static bool
9161 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9162 {
9163   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9164   rtx_code code = GET_CODE (x);
9165   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9166
9167   switch (XINT (x, 1))
9168     {
9169     case UNSPEC_UNALIGNED_LOAD:
9170       /* We can only do unaligned loads into the integer unit, and we can't
9171          use LDM or LDRD.  */
9172       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9173       if (speed_p)
9174         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9175                   + extra_cost->ldst.load_unaligned);
9176
9177 #ifdef NOT_YET
9178       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9179                                  ADDR_SPACE_GENERIC, speed_p);
9180 #endif
9181       return true;
9182
9183     case UNSPEC_UNALIGNED_STORE:
9184       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9185       if (speed_p)
9186         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9187                   + extra_cost->ldst.store_unaligned);
9188
9189       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9190 #ifdef NOT_YET
9191       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9192                                  ADDR_SPACE_GENERIC, speed_p);
9193 #endif
9194       return true;
9195
9196     case UNSPEC_VRINTZ:
9197     case UNSPEC_VRINTP:
9198     case UNSPEC_VRINTM:
9199     case UNSPEC_VRINTR:
9200     case UNSPEC_VRINTX:
9201     case UNSPEC_VRINTA:
9202       if (speed_p)
9203         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9204
9205       return true;
9206     default:
9207       *cost = COSTS_N_INSNS (2);
9208       break;
9209     }
9210   return true;
9211 }
9212
9213 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9214    call (one insn for -Os) and then one for processing the result.  */
9215 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9216
9217 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9218         do                                                              \
9219           {                                                             \
9220             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9221             if (shift_op != NULL                                        \
9222                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9223               {                                                         \
9224                 if (shift_reg)                                          \
9225                   {                                                     \
9226                     if (speed_p)                                        \
9227                       *cost += extra_cost->alu.arith_shift_reg;         \
9228                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9229                                        ASHIFT, 1, speed_p);             \
9230                   }                                                     \
9231                 else if (speed_p)                                       \
9232                   *cost += extra_cost->alu.arith_shift;                 \
9233                                                                         \
9234                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9235                                     ASHIFT, 0, speed_p)                 \
9236                           + rtx_cost (XEXP (x, 1 - IDX),                \
9237                                       GET_MODE (shift_op),              \
9238                                       OP, 1, speed_p));                 \
9239                 return true;                                            \
9240               }                                                         \
9241           }                                                             \
9242         while (0)
9243
9244 /* RTX costs.  Make an estimate of the cost of executing the operation
9245    X, which is contained with an operation with code OUTER_CODE.
9246    SPEED_P indicates whether the cost desired is the performance cost,
9247    or the size cost.  The estimate is stored in COST and the return
9248    value is TRUE if the cost calculation is final, or FALSE if the
9249    caller should recurse through the operands of X to add additional
9250    costs.
9251
9252    We currently make no attempt to model the size savings of Thumb-2
9253    16-bit instructions.  At the normal points in compilation where
9254    this code is called we have no measure of whether the condition
9255    flags are live or not, and thus no realistic way to determine what
9256    the size will eventually be.  */
9257 static bool
9258 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9259                    const struct cpu_cost_table *extra_cost,
9260                    int *cost, bool speed_p)
9261 {
9262   machine_mode mode = GET_MODE (x);
9263
9264   *cost = COSTS_N_INSNS (1);
9265
9266   if (TARGET_THUMB1)
9267     {
9268       if (speed_p)
9269         *cost = thumb1_rtx_costs (x, code, outer_code);
9270       else
9271         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9272       return true;
9273     }
9274
9275   switch (code)
9276     {
9277     case SET:
9278       *cost = 0;
9279       /* SET RTXs don't have a mode so we get it from the destination.  */
9280       mode = GET_MODE (SET_DEST (x));
9281
9282       if (REG_P (SET_SRC (x))
9283           && REG_P (SET_DEST (x)))
9284         {
9285           /* Assume that most copies can be done with a single insn,
9286              unless we don't have HW FP, in which case everything
9287              larger than word mode will require two insns.  */
9288           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9289                                    && GET_MODE_SIZE (mode) > 4)
9290                                   || mode == DImode)
9291                                  ? 2 : 1);
9292           /* Conditional register moves can be encoded
9293              in 16 bits in Thumb mode.  */
9294           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9295             *cost >>= 1;
9296
9297           return true;
9298         }
9299
9300       if (CONST_INT_P (SET_SRC (x)))
9301         {
9302           /* Handle CONST_INT here, since the value doesn't have a mode
9303              and we would otherwise be unable to work out the true cost.  */
9304           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9305                             0, speed_p);
9306           outer_code = SET;
9307           /* Slightly lower the cost of setting a core reg to a constant.
9308              This helps break up chains and allows for better scheduling.  */
9309           if (REG_P (SET_DEST (x))
9310               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9311             *cost -= 1;
9312           x = SET_SRC (x);
9313           /* Immediate moves with an immediate in the range [0, 255] can be
9314              encoded in 16 bits in Thumb mode.  */
9315           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9316               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9317             *cost >>= 1;
9318           goto const_int_cost;
9319         }
9320
9321       return false;
9322
9323     case MEM:
9324       /* A memory access costs 1 insn if the mode is small, or the address is
9325          a single register, otherwise it costs one insn per word.  */
9326       if (REG_P (XEXP (x, 0)))
9327         *cost = COSTS_N_INSNS (1);
9328       else if (flag_pic
9329                && GET_CODE (XEXP (x, 0)) == PLUS
9330                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9331         /* This will be split into two instructions.
9332            See arm.md:calculate_pic_address.  */
9333         *cost = COSTS_N_INSNS (2);
9334       else
9335         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9336
9337       /* For speed optimizations, add the costs of the address and
9338          accessing memory.  */
9339       if (speed_p)
9340 #ifdef NOT_YET
9341         *cost += (extra_cost->ldst.load
9342                   + arm_address_cost (XEXP (x, 0), mode,
9343                                       ADDR_SPACE_GENERIC, speed_p));
9344 #else
9345         *cost += extra_cost->ldst.load;
9346 #endif
9347       return true;
9348
9349     case PARALLEL:
9350     {
9351    /* Calculations of LDM costs are complex.  We assume an initial cost
9352    (ldm_1st) which will load the number of registers mentioned in
9353    ldm_regs_per_insn_1st registers; then each additional
9354    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9355    formula for N regs is thus:
9356
9357    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9358                              + ldm_regs_per_insn_subsequent - 1)
9359                             / ldm_regs_per_insn_subsequent).
9360
9361    Additional costs may also be added for addressing.  A similar
9362    formula is used for STM.  */
9363
9364       bool is_ldm = load_multiple_operation (x, SImode);
9365       bool is_stm = store_multiple_operation (x, SImode);
9366
9367       if (is_ldm || is_stm)
9368         {
9369           if (speed_p)
9370             {
9371               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9372               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9373                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9374                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9375               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9376                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9377                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9378
9379               *cost += regs_per_insn_1st
9380                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9381                                             + regs_per_insn_sub - 1)
9382                                           / regs_per_insn_sub);
9383               return true;
9384             }
9385
9386         }
9387       return false;
9388     }
9389     case DIV:
9390     case UDIV:
9391       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9392           && (mode == SFmode || !TARGET_VFP_SINGLE))
9393         *cost += COSTS_N_INSNS (speed_p
9394                                ? extra_cost->fp[mode != SFmode].div : 0);
9395       else if (mode == SImode && TARGET_IDIV)
9396         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9397       else
9398         *cost = LIBCALL_COST (2);
9399
9400       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9401          possible udiv is prefered.  */
9402       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9403       return false;     /* All arguments must be in registers.  */
9404
9405     case MOD:
9406       /* MOD by a power of 2 can be expanded as:
9407          rsbs    r1, r0, #0
9408          and     r0, r0, #(n - 1)
9409          and     r1, r1, #(n - 1)
9410          rsbpl   r0, r1, #0.  */
9411       if (CONST_INT_P (XEXP (x, 1))
9412           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9413           && mode == SImode)
9414         {
9415           *cost += COSTS_N_INSNS (3);
9416
9417           if (speed_p)
9418             *cost += 2 * extra_cost->alu.logical
9419                      + extra_cost->alu.arith;
9420           return true;
9421         }
9422
9423     /* Fall-through.  */
9424     case UMOD:
9425       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9426          possible udiv is prefered.  */
9427       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9428       return false;     /* All arguments must be in registers.  */
9429
9430     case ROTATE:
9431       if (mode == SImode && REG_P (XEXP (x, 1)))
9432         {
9433           *cost += (COSTS_N_INSNS (1)
9434                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9435           if (speed_p)
9436             *cost += extra_cost->alu.shift_reg;
9437           return true;
9438         }
9439       /* Fall through */
9440     case ROTATERT:
9441     case ASHIFT:
9442     case LSHIFTRT:
9443     case ASHIFTRT:
9444       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9445         {
9446           *cost += (COSTS_N_INSNS (2)
9447                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9448           if (speed_p)
9449             *cost += 2 * extra_cost->alu.shift;
9450           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9451           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9452             *cost += 1;
9453           return true;
9454         }
9455       else if (mode == SImode)
9456         {
9457           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9458           /* Slightly disparage register shifts at -Os, but not by much.  */
9459           if (!CONST_INT_P (XEXP (x, 1)))
9460             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9461                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9462           return true;
9463         }
9464       else if (GET_MODE_CLASS (mode) == MODE_INT
9465                && GET_MODE_SIZE (mode) < 4)
9466         {
9467           if (code == ASHIFT)
9468             {
9469               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9470               /* Slightly disparage register shifts at -Os, but not by
9471                  much.  */
9472               if (!CONST_INT_P (XEXP (x, 1)))
9473                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9474                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9475             }
9476           else if (code == LSHIFTRT || code == ASHIFTRT)
9477             {
9478               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9479                 {
9480                   /* Can use SBFX/UBFX.  */
9481                   if (speed_p)
9482                     *cost += extra_cost->alu.bfx;
9483                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9484                 }
9485               else
9486                 {
9487                   *cost += COSTS_N_INSNS (1);
9488                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9489                   if (speed_p)
9490                     {
9491                       if (CONST_INT_P (XEXP (x, 1)))
9492                         *cost += 2 * extra_cost->alu.shift;
9493                       else
9494                         *cost += (extra_cost->alu.shift
9495                                   + extra_cost->alu.shift_reg);
9496                     }
9497                   else
9498                     /* Slightly disparage register shifts.  */
9499                     *cost += !CONST_INT_P (XEXP (x, 1));
9500                 }
9501             }
9502           else /* Rotates.  */
9503             {
9504               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9505               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9506               if (speed_p)
9507                 {
9508                   if (CONST_INT_P (XEXP (x, 1)))
9509                     *cost += (2 * extra_cost->alu.shift
9510                               + extra_cost->alu.log_shift);
9511                   else
9512                     *cost += (extra_cost->alu.shift
9513                               + extra_cost->alu.shift_reg
9514                               + extra_cost->alu.log_shift_reg);
9515                 }
9516             }
9517           return true;
9518         }
9519
9520       *cost = LIBCALL_COST (2);
9521       return false;
9522
9523     case BSWAP:
9524       if (arm_arch6)
9525         {
9526           if (mode == SImode)
9527             {
9528               if (speed_p)
9529                 *cost += extra_cost->alu.rev;
9530
9531               return false;
9532             }
9533         }
9534       else
9535         {
9536         /* No rev instruction available.  Look at arm_legacy_rev
9537            and thumb_legacy_rev for the form of RTL used then.  */
9538           if (TARGET_THUMB)
9539             {
9540               *cost += COSTS_N_INSNS (9);
9541
9542               if (speed_p)
9543                 {
9544                   *cost += 6 * extra_cost->alu.shift;
9545                   *cost += 3 * extra_cost->alu.logical;
9546                 }
9547             }
9548           else
9549             {
9550               *cost += COSTS_N_INSNS (4);
9551
9552               if (speed_p)
9553                 {
9554                   *cost += 2 * extra_cost->alu.shift;
9555                   *cost += extra_cost->alu.arith_shift;
9556                   *cost += 2 * extra_cost->alu.logical;
9557                 }
9558             }
9559           return true;
9560         }
9561       return false;
9562
9563     case MINUS:
9564       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9565           && (mode == SFmode || !TARGET_VFP_SINGLE))
9566         {
9567           if (GET_CODE (XEXP (x, 0)) == MULT
9568               || GET_CODE (XEXP (x, 1)) == MULT)
9569             {
9570               rtx mul_op0, mul_op1, sub_op;
9571
9572               if (speed_p)
9573                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9574
9575               if (GET_CODE (XEXP (x, 0)) == MULT)
9576                 {
9577                   mul_op0 = XEXP (XEXP (x, 0), 0);
9578                   mul_op1 = XEXP (XEXP (x, 0), 1);
9579                   sub_op = XEXP (x, 1);
9580                 }
9581               else
9582                 {
9583                   mul_op0 = XEXP (XEXP (x, 1), 0);
9584                   mul_op1 = XEXP (XEXP (x, 1), 1);
9585                   sub_op = XEXP (x, 0);
9586                 }
9587
9588               /* The first operand of the multiply may be optionally
9589                  negated.  */
9590               if (GET_CODE (mul_op0) == NEG)
9591                 mul_op0 = XEXP (mul_op0, 0);
9592
9593               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9594                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9595                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9596
9597               return true;
9598             }
9599
9600           if (speed_p)
9601             *cost += extra_cost->fp[mode != SFmode].addsub;
9602           return false;
9603         }
9604
9605       if (mode == SImode)
9606         {
9607           rtx shift_by_reg = NULL;
9608           rtx shift_op;
9609           rtx non_shift_op;
9610
9611           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9612           if (shift_op == NULL)
9613             {
9614               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9615               non_shift_op = XEXP (x, 0);
9616             }
9617           else
9618             non_shift_op = XEXP (x, 1);
9619
9620           if (shift_op != NULL)
9621             {
9622               if (shift_by_reg != NULL)
9623                 {
9624                   if (speed_p)
9625                     *cost += extra_cost->alu.arith_shift_reg;
9626                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9627                 }
9628               else if (speed_p)
9629                 *cost += extra_cost->alu.arith_shift;
9630
9631               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9632               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9633               return true;
9634             }
9635
9636           if (arm_arch_thumb2
9637               && GET_CODE (XEXP (x, 1)) == MULT)
9638             {
9639               /* MLS.  */
9640               if (speed_p)
9641                 *cost += extra_cost->mult[0].add;
9642               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9643               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9644               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9645               return true;
9646             }
9647
9648           if (CONST_INT_P (XEXP (x, 0)))
9649             {
9650               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9651                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9652                                             NULL_RTX, 1, 0);
9653               *cost = COSTS_N_INSNS (insns);
9654               if (speed_p)
9655                 *cost += insns * extra_cost->alu.arith;
9656               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9657               return true;
9658             }
9659           else if (speed_p)
9660             *cost += extra_cost->alu.arith;
9661
9662           return false;
9663         }
9664
9665       if (GET_MODE_CLASS (mode) == MODE_INT
9666           && GET_MODE_SIZE (mode) < 4)
9667         {
9668           rtx shift_op, shift_reg;
9669           shift_reg = NULL;
9670
9671           /* We check both sides of the MINUS for shifter operands since,
9672              unlike PLUS, it's not commutative.  */
9673
9674           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9675           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9676
9677           /* Slightly disparage, as we might need to widen the result.  */
9678           *cost += 1;
9679           if (speed_p)
9680             *cost += extra_cost->alu.arith;
9681
9682           if (CONST_INT_P (XEXP (x, 0)))
9683             {
9684               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9685               return true;
9686             }
9687
9688           return false;
9689         }
9690
9691       if (mode == DImode)
9692         {
9693           *cost += COSTS_N_INSNS (1);
9694
9695           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9696             {
9697               rtx op1 = XEXP (x, 1);
9698
9699               if (speed_p)
9700                 *cost += 2 * extra_cost->alu.arith;
9701
9702               if (GET_CODE (op1) == ZERO_EXTEND)
9703                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9704                                    0, speed_p);
9705               else
9706                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9707               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9708                                  0, speed_p);
9709               return true;
9710             }
9711           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9712             {
9713               if (speed_p)
9714                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9715               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9716                                   0, speed_p)
9717                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9718               return true;
9719             }
9720           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9721                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9722             {
9723               if (speed_p)
9724                 *cost += (extra_cost->alu.arith
9725                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9726                              ? extra_cost->alu.arith
9727                              : extra_cost->alu.arith_shift));
9728               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9729                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9730                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9731               return true;
9732             }
9733
9734           if (speed_p)
9735             *cost += 2 * extra_cost->alu.arith;
9736           return false;
9737         }
9738
9739       /* Vector mode?  */
9740
9741       *cost = LIBCALL_COST (2);
9742       return false;
9743
9744     case PLUS:
9745       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9746           && (mode == SFmode || !TARGET_VFP_SINGLE))
9747         {
9748           if (GET_CODE (XEXP (x, 0)) == MULT)
9749             {
9750               rtx mul_op0, mul_op1, add_op;
9751
9752               if (speed_p)
9753                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9754
9755               mul_op0 = XEXP (XEXP (x, 0), 0);
9756               mul_op1 = XEXP (XEXP (x, 0), 1);
9757               add_op = XEXP (x, 1);
9758
9759               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9760                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9761                         + rtx_cost (add_op, mode, code, 0, speed_p));
9762
9763               return true;
9764             }
9765
9766           if (speed_p)
9767             *cost += extra_cost->fp[mode != SFmode].addsub;
9768           return false;
9769         }
9770       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9771         {
9772           *cost = LIBCALL_COST (2);
9773           return false;
9774         }
9775
9776         /* Narrow modes can be synthesized in SImode, but the range
9777            of useful sub-operations is limited.  Check for shift operations
9778            on one of the operands.  Only left shifts can be used in the
9779            narrow modes.  */
9780       if (GET_MODE_CLASS (mode) == MODE_INT
9781           && GET_MODE_SIZE (mode) < 4)
9782         {
9783           rtx shift_op, shift_reg;
9784           shift_reg = NULL;
9785
9786           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9787
9788           if (CONST_INT_P (XEXP (x, 1)))
9789             {
9790               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9791                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9792                                             NULL_RTX, 1, 0);
9793               *cost = COSTS_N_INSNS (insns);
9794               if (speed_p)
9795                 *cost += insns * extra_cost->alu.arith;
9796               /* Slightly penalize a narrow operation as the result may
9797                  need widening.  */
9798               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9799               return true;
9800             }
9801
9802           /* Slightly penalize a narrow operation as the result may
9803              need widening.  */
9804           *cost += 1;
9805           if (speed_p)
9806             *cost += extra_cost->alu.arith;
9807
9808           return false;
9809         }
9810
9811       if (mode == SImode)
9812         {
9813           rtx shift_op, shift_reg;
9814
9815           if (TARGET_INT_SIMD
9816               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9817                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9818             {
9819               /* UXTA[BH] or SXTA[BH].  */
9820               if (speed_p)
9821                 *cost += extra_cost->alu.extend_arith;
9822               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9823                                   0, speed_p)
9824                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9825               return true;
9826             }
9827
9828           shift_reg = NULL;
9829           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9830           if (shift_op != NULL)
9831             {
9832               if (shift_reg)
9833                 {
9834                   if (speed_p)
9835                     *cost += extra_cost->alu.arith_shift_reg;
9836                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9837                 }
9838               else if (speed_p)
9839                 *cost += extra_cost->alu.arith_shift;
9840
9841               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9842                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9843               return true;
9844             }
9845           if (GET_CODE (XEXP (x, 0)) == MULT)
9846             {
9847               rtx mul_op = XEXP (x, 0);
9848
9849               if (TARGET_DSP_MULTIPLY
9850                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9851                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9852                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9853                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9854                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9855                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9856                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9857                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9858                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9859                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9860                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9861                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9862                                       == 16))))))
9863                 {
9864                   /* SMLA[BT][BT].  */
9865                   if (speed_p)
9866                     *cost += extra_cost->mult[0].extend_add;
9867                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9868                                       SIGN_EXTEND, 0, speed_p)
9869                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9870                                         SIGN_EXTEND, 0, speed_p)
9871                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9872                   return true;
9873                 }
9874
9875               if (speed_p)
9876                 *cost += extra_cost->mult[0].add;
9877               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9878                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9879                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9880               return true;
9881             }
9882           if (CONST_INT_P (XEXP (x, 1)))
9883             {
9884               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9885                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9886                                             NULL_RTX, 1, 0);
9887               *cost = COSTS_N_INSNS (insns);
9888               if (speed_p)
9889                 *cost += insns * extra_cost->alu.arith;
9890               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9891               return true;
9892             }
9893           else if (speed_p)
9894             *cost += extra_cost->alu.arith;
9895
9896           return false;
9897         }
9898
9899       if (mode == DImode)
9900         {
9901           if (arm_arch3m
9902               && GET_CODE (XEXP (x, 0)) == MULT
9903               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9904                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9905                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9906                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9907             {
9908               if (speed_p)
9909                 *cost += extra_cost->mult[1].extend_add;
9910               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9911                                   ZERO_EXTEND, 0, speed_p)
9912                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9913                                     ZERO_EXTEND, 0, speed_p)
9914                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9915               return true;
9916             }
9917
9918           *cost += COSTS_N_INSNS (1);
9919
9920           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9921               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9922             {
9923               if (speed_p)
9924                 *cost += (extra_cost->alu.arith
9925                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9926                              ? extra_cost->alu.arith
9927                              : extra_cost->alu.arith_shift));
9928
9929               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9930                                   0, speed_p)
9931                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9932               return true;
9933             }
9934
9935           if (speed_p)
9936             *cost += 2 * extra_cost->alu.arith;
9937           return false;
9938         }
9939
9940       /* Vector mode?  */
9941       *cost = LIBCALL_COST (2);
9942       return false;
9943     case IOR:
9944       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9945         {
9946           if (speed_p)
9947             *cost += extra_cost->alu.rev;
9948
9949           return true;
9950         }
9951     /* Fall through.  */
9952     case AND: case XOR:
9953       if (mode == SImode)
9954         {
9955           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9956           rtx op0 = XEXP (x, 0);
9957           rtx shift_op, shift_reg;
9958
9959           if (subcode == NOT
9960               && (code == AND
9961                   || (code == IOR && TARGET_THUMB2)))
9962             op0 = XEXP (op0, 0);
9963
9964           shift_reg = NULL;
9965           shift_op = shifter_op_p (op0, &shift_reg);
9966           if (shift_op != NULL)
9967             {
9968               if (shift_reg)
9969                 {
9970                   if (speed_p)
9971                     *cost += extra_cost->alu.log_shift_reg;
9972                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9973                 }
9974               else if (speed_p)
9975                 *cost += extra_cost->alu.log_shift;
9976
9977               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9978                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9979               return true;
9980             }
9981
9982           if (CONST_INT_P (XEXP (x, 1)))
9983             {
9984               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9985                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9986                                             NULL_RTX, 1, 0);
9987
9988               *cost = COSTS_N_INSNS (insns);
9989               if (speed_p)
9990                 *cost += insns * extra_cost->alu.logical;
9991               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9992               return true;
9993             }
9994
9995           if (speed_p)
9996             *cost += extra_cost->alu.logical;
9997           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9998                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9999           return true;
10000         }
10001
10002       if (mode == DImode)
10003         {
10004           rtx op0 = XEXP (x, 0);
10005           enum rtx_code subcode = GET_CODE (op0);
10006
10007           *cost += COSTS_N_INSNS (1);
10008
10009           if (subcode == NOT
10010               && (code == AND
10011                   || (code == IOR && TARGET_THUMB2)))
10012             op0 = XEXP (op0, 0);
10013
10014           if (GET_CODE (op0) == ZERO_EXTEND)
10015             {
10016               if (speed_p)
10017                 *cost += 2 * extra_cost->alu.logical;
10018
10019               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10020                                   0, speed_p)
10021                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10022               return true;
10023             }
10024           else if (GET_CODE (op0) == SIGN_EXTEND)
10025             {
10026               if (speed_p)
10027                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10028
10029               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10030                                   0, speed_p)
10031                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10032               return true;
10033             }
10034
10035           if (speed_p)
10036             *cost += 2 * extra_cost->alu.logical;
10037
10038           return true;
10039         }
10040       /* Vector mode?  */
10041
10042       *cost = LIBCALL_COST (2);
10043       return false;
10044
10045     case MULT:
10046       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10047           && (mode == SFmode || !TARGET_VFP_SINGLE))
10048         {
10049           rtx op0 = XEXP (x, 0);
10050
10051           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10052             op0 = XEXP (op0, 0);
10053
10054           if (speed_p)
10055             *cost += extra_cost->fp[mode != SFmode].mult;
10056
10057           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10058                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10059           return true;
10060         }
10061       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10062         {
10063           *cost = LIBCALL_COST (2);
10064           return false;
10065         }
10066
10067       if (mode == SImode)
10068         {
10069           if (TARGET_DSP_MULTIPLY
10070               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10071                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10072                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10073                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10074                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10075                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10076                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10077                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10078                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10079                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10080                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10081                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10082                                   == 16))))))
10083             {
10084               /* SMUL[TB][TB].  */
10085               if (speed_p)
10086                 *cost += extra_cost->mult[0].extend;
10087               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10088                                  SIGN_EXTEND, 0, speed_p);
10089               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10090                                  SIGN_EXTEND, 1, speed_p);
10091               return true;
10092             }
10093           if (speed_p)
10094             *cost += extra_cost->mult[0].simple;
10095           return false;
10096         }
10097
10098       if (mode == DImode)
10099         {
10100           if (arm_arch3m
10101               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10102                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10103                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10104                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10105             {
10106               if (speed_p)
10107                 *cost += extra_cost->mult[1].extend;
10108               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10109                                   ZERO_EXTEND, 0, speed_p)
10110                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10111                                     ZERO_EXTEND, 0, speed_p));
10112               return true;
10113             }
10114
10115           *cost = LIBCALL_COST (2);
10116           return false;
10117         }
10118
10119       /* Vector mode?  */
10120       *cost = LIBCALL_COST (2);
10121       return false;
10122
10123     case NEG:
10124       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10125           && (mode == SFmode || !TARGET_VFP_SINGLE))
10126         {
10127           if (GET_CODE (XEXP (x, 0)) == MULT)
10128             {
10129               /* VNMUL.  */
10130               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10131               return true;
10132             }
10133
10134           if (speed_p)
10135             *cost += extra_cost->fp[mode != SFmode].neg;
10136
10137           return false;
10138         }
10139       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10140         {
10141           *cost = LIBCALL_COST (1);
10142           return false;
10143         }
10144
10145       if (mode == SImode)
10146         {
10147           if (GET_CODE (XEXP (x, 0)) == ABS)
10148             {
10149               *cost += COSTS_N_INSNS (1);
10150               /* Assume the non-flag-changing variant.  */
10151               if (speed_p)
10152                 *cost += (extra_cost->alu.log_shift
10153                           + extra_cost->alu.arith_shift);
10154               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10155               return true;
10156             }
10157
10158           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10159               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10160             {
10161               *cost += COSTS_N_INSNS (1);
10162               /* No extra cost for MOV imm and MVN imm.  */
10163               /* If the comparison op is using the flags, there's no further
10164                  cost, otherwise we need to add the cost of the comparison.  */
10165               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10166                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10167                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10168                 {
10169                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10170                   *cost += (COSTS_N_INSNS (1)
10171                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10172                                         0, speed_p)
10173                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10174                                         1, speed_p));
10175                   if (speed_p)
10176                     *cost += extra_cost->alu.arith;
10177                 }
10178               return true;
10179             }
10180
10181           if (speed_p)
10182             *cost += extra_cost->alu.arith;
10183           return false;
10184         }
10185
10186       if (GET_MODE_CLASS (mode) == MODE_INT
10187           && GET_MODE_SIZE (mode) < 4)
10188         {
10189           /* Slightly disparage, as we might need an extend operation.  */
10190           *cost += 1;
10191           if (speed_p)
10192             *cost += extra_cost->alu.arith;
10193           return false;
10194         }
10195
10196       if (mode == DImode)
10197         {
10198           *cost += COSTS_N_INSNS (1);
10199           if (speed_p)
10200             *cost += 2 * extra_cost->alu.arith;
10201           return false;
10202         }
10203
10204       /* Vector mode?  */
10205       *cost = LIBCALL_COST (1);
10206       return false;
10207
10208     case NOT:
10209       if (mode == SImode)
10210         {
10211           rtx shift_op;
10212           rtx shift_reg = NULL;
10213
10214           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10215
10216           if (shift_op)
10217             {
10218               if (shift_reg != NULL)
10219                 {
10220                   if (speed_p)
10221                     *cost += extra_cost->alu.log_shift_reg;
10222                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10223                 }
10224               else if (speed_p)
10225                 *cost += extra_cost->alu.log_shift;
10226               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10227               return true;
10228             }
10229
10230           if (speed_p)
10231             *cost += extra_cost->alu.logical;
10232           return false;
10233         }
10234       if (mode == DImode)
10235         {
10236           *cost += COSTS_N_INSNS (1);
10237           return false;
10238         }
10239
10240       /* Vector mode?  */
10241
10242       *cost += LIBCALL_COST (1);
10243       return false;
10244
10245     case IF_THEN_ELSE:
10246       {
10247         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10248           {
10249             *cost += COSTS_N_INSNS (3);
10250             return true;
10251           }
10252         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10253         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10254
10255         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10256         /* Assume that if one arm of the if_then_else is a register,
10257            that it will be tied with the result and eliminate the
10258            conditional insn.  */
10259         if (REG_P (XEXP (x, 1)))
10260           *cost += op2cost;
10261         else if (REG_P (XEXP (x, 2)))
10262           *cost += op1cost;
10263         else
10264           {
10265             if (speed_p)
10266               {
10267                 if (extra_cost->alu.non_exec_costs_exec)
10268                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10269                 else
10270                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10271               }
10272             else
10273               *cost += op1cost + op2cost;
10274           }
10275       }
10276       return true;
10277
10278     case COMPARE:
10279       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10280         *cost = 0;
10281       else
10282         {
10283           machine_mode op0mode;
10284           /* We'll mostly assume that the cost of a compare is the cost of the
10285              LHS.  However, there are some notable exceptions.  */
10286
10287           /* Floating point compares are never done as side-effects.  */
10288           op0mode = GET_MODE (XEXP (x, 0));
10289           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10290               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10291             {
10292               if (speed_p)
10293                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10294
10295               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10296                 {
10297                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10298                   return true;
10299                 }
10300
10301               return false;
10302             }
10303           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10304             {
10305               *cost = LIBCALL_COST (2);
10306               return false;
10307             }
10308
10309           /* DImode compares normally take two insns.  */
10310           if (op0mode == DImode)
10311             {
10312               *cost += COSTS_N_INSNS (1);
10313               if (speed_p)
10314                 *cost += 2 * extra_cost->alu.arith;
10315               return false;
10316             }
10317
10318           if (op0mode == SImode)
10319             {
10320               rtx shift_op;
10321               rtx shift_reg;
10322
10323               if (XEXP (x, 1) == const0_rtx
10324                   && !(REG_P (XEXP (x, 0))
10325                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10326                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10327                 {
10328                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10329
10330                   /* Multiply operations that set the flags are often
10331                      significantly more expensive.  */
10332                   if (speed_p
10333                       && GET_CODE (XEXP (x, 0)) == MULT
10334                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10335                     *cost += extra_cost->mult[0].flag_setting;
10336
10337                   if (speed_p
10338                       && GET_CODE (XEXP (x, 0)) == PLUS
10339                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10340                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10341                                                             0), 1), mode))
10342                     *cost += extra_cost->mult[0].flag_setting;
10343                   return true;
10344                 }
10345
10346               shift_reg = NULL;
10347               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10348               if (shift_op != NULL)
10349                 {
10350                   if (shift_reg != NULL)
10351                     {
10352                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10353                                          1, speed_p);
10354                       if (speed_p)
10355                         *cost += extra_cost->alu.arith_shift_reg;
10356                     }
10357                   else if (speed_p)
10358                     *cost += extra_cost->alu.arith_shift;
10359                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10360                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10361                   return true;
10362                 }
10363
10364               if (speed_p)
10365                 *cost += extra_cost->alu.arith;
10366               if (CONST_INT_P (XEXP (x, 1))
10367                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10368                 {
10369                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10370                   return true;
10371                 }
10372               return false;
10373             }
10374
10375           /* Vector mode?  */
10376
10377           *cost = LIBCALL_COST (2);
10378           return false;
10379         }
10380       return true;
10381
10382     case EQ:
10383     case NE:
10384     case LT:
10385     case LE:
10386     case GT:
10387     case GE:
10388     case LTU:
10389     case LEU:
10390     case GEU:
10391     case GTU:
10392     case ORDERED:
10393     case UNORDERED:
10394     case UNEQ:
10395     case UNLE:
10396     case UNLT:
10397     case UNGE:
10398     case UNGT:
10399     case LTGT:
10400       if (outer_code == SET)
10401         {
10402           /* Is it a store-flag operation?  */
10403           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10404               && XEXP (x, 1) == const0_rtx)
10405             {
10406               /* Thumb also needs an IT insn.  */
10407               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10408               return true;
10409             }
10410           if (XEXP (x, 1) == const0_rtx)
10411             {
10412               switch (code)
10413                 {
10414                 case LT:
10415                   /* LSR Rd, Rn, #31.  */
10416                   if (speed_p)
10417                     *cost += extra_cost->alu.shift;
10418                   break;
10419
10420                 case EQ:
10421                   /* RSBS T1, Rn, #0
10422                      ADC  Rd, Rn, T1.  */
10423
10424                 case NE:
10425                   /* SUBS T1, Rn, #1
10426                      SBC  Rd, Rn, T1.  */
10427                   *cost += COSTS_N_INSNS (1);
10428                   break;
10429
10430                 case LE:
10431                   /* RSBS T1, Rn, Rn, LSR #31
10432                      ADC  Rd, Rn, T1. */
10433                   *cost += COSTS_N_INSNS (1);
10434                   if (speed_p)
10435                     *cost += extra_cost->alu.arith_shift;
10436                   break;
10437
10438                 case GT:
10439                   /* RSB  Rd, Rn, Rn, ASR #1
10440                      LSR  Rd, Rd, #31.  */
10441                   *cost += COSTS_N_INSNS (1);
10442                   if (speed_p)
10443                     *cost += (extra_cost->alu.arith_shift
10444                               + extra_cost->alu.shift);
10445                   break;
10446
10447                 case GE:
10448                   /* ASR  Rd, Rn, #31
10449                      ADD  Rd, Rn, #1.  */
10450                   *cost += COSTS_N_INSNS (1);
10451                   if (speed_p)
10452                     *cost += extra_cost->alu.shift;
10453                   break;
10454
10455                 default:
10456                   /* Remaining cases are either meaningless or would take
10457                      three insns anyway.  */
10458                   *cost = COSTS_N_INSNS (3);
10459                   break;
10460                 }
10461               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10462               return true;
10463             }
10464           else
10465             {
10466               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10467               if (CONST_INT_P (XEXP (x, 1))
10468                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10469                 {
10470                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10471                   return true;
10472                 }
10473
10474               return false;
10475             }
10476         }
10477       /* Not directly inside a set.  If it involves the condition code
10478          register it must be the condition for a branch, cond_exec or
10479          I_T_E operation.  Since the comparison is performed elsewhere
10480          this is just the control part which has no additional
10481          cost.  */
10482       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10483                && XEXP (x, 1) == const0_rtx)
10484         {
10485           *cost = 0;
10486           return true;
10487         }
10488       return false;
10489
10490     case ABS:
10491       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10492           && (mode == SFmode || !TARGET_VFP_SINGLE))
10493         {
10494           if (speed_p)
10495             *cost += extra_cost->fp[mode != SFmode].neg;
10496
10497           return false;
10498         }
10499       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10500         {
10501           *cost = LIBCALL_COST (1);
10502           return false;
10503         }
10504
10505       if (mode == SImode)
10506         {
10507           if (speed_p)
10508             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10509           return false;
10510         }
10511       /* Vector mode?  */
10512       *cost = LIBCALL_COST (1);
10513       return false;
10514
10515     case SIGN_EXTEND:
10516       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10517           && MEM_P (XEXP (x, 0)))
10518         {
10519           if (mode == DImode)
10520             *cost += COSTS_N_INSNS (1);
10521
10522           if (!speed_p)
10523             return true;
10524
10525           if (GET_MODE (XEXP (x, 0)) == SImode)
10526             *cost += extra_cost->ldst.load;
10527           else
10528             *cost += extra_cost->ldst.load_sign_extend;
10529
10530           if (mode == DImode)
10531             *cost += extra_cost->alu.shift;
10532
10533           return true;
10534         }
10535
10536       /* Widening from less than 32-bits requires an extend operation.  */
10537       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10538         {
10539           /* We have SXTB/SXTH.  */
10540           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10541           if (speed_p)
10542             *cost += extra_cost->alu.extend;
10543         }
10544       else if (GET_MODE (XEXP (x, 0)) != SImode)
10545         {
10546           /* Needs two shifts.  */
10547           *cost += COSTS_N_INSNS (1);
10548           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10549           if (speed_p)
10550             *cost += 2 * extra_cost->alu.shift;
10551         }
10552
10553       /* Widening beyond 32-bits requires one more insn.  */
10554       if (mode == DImode)
10555         {
10556           *cost += COSTS_N_INSNS (1);
10557           if (speed_p)
10558             *cost += extra_cost->alu.shift;
10559         }
10560
10561       return true;
10562
10563     case ZERO_EXTEND:
10564       if ((arm_arch4
10565            || GET_MODE (XEXP (x, 0)) == SImode
10566            || GET_MODE (XEXP (x, 0)) == QImode)
10567           && MEM_P (XEXP (x, 0)))
10568         {
10569           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10570
10571           if (mode == DImode)
10572             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10573
10574           return true;
10575         }
10576
10577       /* Widening from less than 32-bits requires an extend operation.  */
10578       if (GET_MODE (XEXP (x, 0)) == QImode)
10579         {
10580           /* UXTB can be a shorter instruction in Thumb2, but it might
10581              be slower than the AND Rd, Rn, #255 alternative.  When
10582              optimizing for speed it should never be slower to use
10583              AND, and we don't really model 16-bit vs 32-bit insns
10584              here.  */
10585           if (speed_p)
10586             *cost += extra_cost->alu.logical;
10587         }
10588       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10589         {
10590           /* We have UXTB/UXTH.  */
10591           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10592           if (speed_p)
10593             *cost += extra_cost->alu.extend;
10594         }
10595       else if (GET_MODE (XEXP (x, 0)) != SImode)
10596         {
10597           /* Needs two shifts.  It's marginally preferable to use
10598              shifts rather than two BIC instructions as the second
10599              shift may merge with a subsequent insn as a shifter
10600              op.  */
10601           *cost = COSTS_N_INSNS (2);
10602           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10603           if (speed_p)
10604             *cost += 2 * extra_cost->alu.shift;
10605         }
10606
10607       /* Widening beyond 32-bits requires one more insn.  */
10608       if (mode == DImode)
10609         {
10610           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10611         }
10612
10613       return true;
10614
10615     case CONST_INT:
10616       *cost = 0;
10617       /* CONST_INT has no mode, so we cannot tell for sure how many
10618          insns are really going to be needed.  The best we can do is
10619          look at the value passed.  If it fits in SImode, then assume
10620          that's the mode it will be used for.  Otherwise assume it
10621          will be used in DImode.  */
10622       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10623         mode = SImode;
10624       else
10625         mode = DImode;
10626
10627       /* Avoid blowing up in arm_gen_constant ().  */
10628       if (!(outer_code == PLUS
10629             || outer_code == AND
10630             || outer_code == IOR
10631             || outer_code == XOR
10632             || outer_code == MINUS))
10633         outer_code = SET;
10634
10635     const_int_cost:
10636       if (mode == SImode)
10637         {
10638           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10639                                                     INTVAL (x), NULL, NULL,
10640                                                     0, 0));
10641           /* Extra costs?  */
10642         }
10643       else
10644         {
10645           *cost += COSTS_N_INSNS (arm_gen_constant
10646                                   (outer_code, SImode, NULL,
10647                                    trunc_int_for_mode (INTVAL (x), SImode),
10648                                    NULL, NULL, 0, 0)
10649                                   + arm_gen_constant (outer_code, SImode, NULL,
10650                                                       INTVAL (x) >> 32, NULL,
10651                                                       NULL, 0, 0));
10652           /* Extra costs?  */
10653         }
10654
10655       return true;
10656
10657     case CONST:
10658     case LABEL_REF:
10659     case SYMBOL_REF:
10660       if (speed_p)
10661         {
10662           if (arm_arch_thumb2 && !flag_pic)
10663             *cost += COSTS_N_INSNS (1);
10664           else
10665             *cost += extra_cost->ldst.load;
10666         }
10667       else
10668         *cost += COSTS_N_INSNS (1);
10669
10670       if (flag_pic)
10671         {
10672           *cost += COSTS_N_INSNS (1);
10673           if (speed_p)
10674             *cost += extra_cost->alu.arith;
10675         }
10676
10677       return true;
10678
10679     case CONST_FIXED:
10680       *cost = COSTS_N_INSNS (4);
10681       /* Fixme.  */
10682       return true;
10683
10684     case CONST_DOUBLE:
10685       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10686           && (mode == SFmode || !TARGET_VFP_SINGLE))
10687         {
10688           if (vfp3_const_double_rtx (x))
10689             {
10690               if (speed_p)
10691                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10692               return true;
10693             }
10694
10695           if (speed_p)
10696             {
10697               if (mode == DFmode)
10698                 *cost += extra_cost->ldst.loadd;
10699               else
10700                 *cost += extra_cost->ldst.loadf;
10701             }
10702           else
10703             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10704
10705           return true;
10706         }
10707       *cost = COSTS_N_INSNS (4);
10708       return true;
10709
10710     case CONST_VECTOR:
10711       /* Fixme.  */
10712       if (TARGET_NEON
10713           && TARGET_HARD_FLOAT
10714           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10715           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10716         *cost = COSTS_N_INSNS (1);
10717       else
10718         *cost = COSTS_N_INSNS (4);
10719       return true;
10720
10721     case HIGH:
10722     case LO_SUM:
10723       /* When optimizing for size, we prefer constant pool entries to
10724          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10725       if (!speed_p)
10726         *cost += 1;
10727       return true;
10728
10729     case CLZ:
10730       if (speed_p)
10731         *cost += extra_cost->alu.clz;
10732       return false;
10733
10734     case SMIN:
10735       if (XEXP (x, 1) == const0_rtx)
10736         {
10737           if (speed_p)
10738             *cost += extra_cost->alu.log_shift;
10739           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10740           return true;
10741         }
10742       /* Fall through.  */
10743     case SMAX:
10744     case UMIN:
10745     case UMAX:
10746       *cost += COSTS_N_INSNS (1);
10747       return false;
10748
10749     case TRUNCATE:
10750       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10751           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10752           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10753           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10754           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10755                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10756               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10757                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10758                       == ZERO_EXTEND))))
10759         {
10760           if (speed_p)
10761             *cost += extra_cost->mult[1].extend;
10762           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10763                               ZERO_EXTEND, 0, speed_p)
10764                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10765                                 ZERO_EXTEND, 0, speed_p));
10766           return true;
10767         }
10768       *cost = LIBCALL_COST (1);
10769       return false;
10770
10771     case UNSPEC_VOLATILE:
10772     case UNSPEC:
10773       return arm_unspec_cost (x, outer_code, speed_p, cost);
10774
10775     case PC:
10776       /* Reading the PC is like reading any other register.  Writing it
10777          is more expensive, but we take that into account elsewhere.  */
10778       *cost = 0;
10779       return true;
10780
10781     case ZERO_EXTRACT:
10782       /* TODO: Simple zero_extract of bottom bits using AND.  */
10783       /* Fall through.  */
10784     case SIGN_EXTRACT:
10785       if (arm_arch6
10786           && mode == SImode
10787           && CONST_INT_P (XEXP (x, 1))
10788           && CONST_INT_P (XEXP (x, 2)))
10789         {
10790           if (speed_p)
10791             *cost += extra_cost->alu.bfx;
10792           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10793           return true;
10794         }
10795       /* Without UBFX/SBFX, need to resort to shift operations.  */
10796       *cost += COSTS_N_INSNS (1);
10797       if (speed_p)
10798         *cost += 2 * extra_cost->alu.shift;
10799       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10800       return true;
10801
10802     case FLOAT_EXTEND:
10803       if (TARGET_HARD_FLOAT)
10804         {
10805           if (speed_p)
10806             *cost += extra_cost->fp[mode == DFmode].widen;
10807           if (!TARGET_VFP5
10808               && GET_MODE (XEXP (x, 0)) == HFmode)
10809             {
10810               /* Pre v8, widening HF->DF is a two-step process, first
10811                  widening to SFmode.  */
10812               *cost += COSTS_N_INSNS (1);
10813               if (speed_p)
10814                 *cost += extra_cost->fp[0].widen;
10815             }
10816           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10817           return true;
10818         }
10819
10820       *cost = LIBCALL_COST (1);
10821       return false;
10822
10823     case FLOAT_TRUNCATE:
10824       if (TARGET_HARD_FLOAT)
10825         {
10826           if (speed_p)
10827             *cost += extra_cost->fp[mode == DFmode].narrow;
10828           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10829           return true;
10830           /* Vector modes?  */
10831         }
10832       *cost = LIBCALL_COST (1);
10833       return false;
10834
10835     case FMA:
10836       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10837         {
10838           rtx op0 = XEXP (x, 0);
10839           rtx op1 = XEXP (x, 1);
10840           rtx op2 = XEXP (x, 2);
10841
10842
10843           /* vfms or vfnma.  */
10844           if (GET_CODE (op0) == NEG)
10845             op0 = XEXP (op0, 0);
10846
10847           /* vfnms or vfnma.  */
10848           if (GET_CODE (op2) == NEG)
10849             op2 = XEXP (op2, 0);
10850
10851           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10852           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10853           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10854
10855           if (speed_p)
10856             *cost += extra_cost->fp[mode ==DFmode].fma;
10857
10858           return true;
10859         }
10860
10861       *cost = LIBCALL_COST (3);
10862       return false;
10863
10864     case FIX:
10865     case UNSIGNED_FIX:
10866       if (TARGET_HARD_FLOAT)
10867         {
10868           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10869              a vcvt fixed-point conversion.  */
10870           if (code == FIX && mode == SImode
10871               && GET_CODE (XEXP (x, 0)) == FIX
10872               && GET_MODE (XEXP (x, 0)) == SFmode
10873               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10874               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10875                  > 0)
10876             {
10877               if (speed_p)
10878                 *cost += extra_cost->fp[0].toint;
10879
10880               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10881                                  code, 0, speed_p);
10882               return true;
10883             }
10884
10885           if (GET_MODE_CLASS (mode) == MODE_INT)
10886             {
10887               mode = GET_MODE (XEXP (x, 0));
10888               if (speed_p)
10889                 *cost += extra_cost->fp[mode == DFmode].toint;
10890               /* Strip of the 'cost' of rounding towards zero.  */
10891               if (GET_CODE (XEXP (x, 0)) == FIX)
10892                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10893                                    0, speed_p);
10894               else
10895                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10896               /* ??? Increase the cost to deal with transferring from
10897                  FP -> CORE registers?  */
10898               return true;
10899             }
10900           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10901                    && TARGET_VFP5)
10902             {
10903               if (speed_p)
10904                 *cost += extra_cost->fp[mode == DFmode].roundint;
10905               return false;
10906             }
10907           /* Vector costs? */
10908         }
10909       *cost = LIBCALL_COST (1);
10910       return false;
10911
10912     case FLOAT:
10913     case UNSIGNED_FLOAT:
10914       if (TARGET_HARD_FLOAT)
10915         {
10916           /* ??? Increase the cost to deal with transferring from CORE
10917              -> FP registers?  */
10918           if (speed_p)
10919             *cost += extra_cost->fp[mode == DFmode].fromint;
10920           return false;
10921         }
10922       *cost = LIBCALL_COST (1);
10923       return false;
10924
10925     case CALL:
10926       return true;
10927
10928     case ASM_OPERANDS:
10929       {
10930       /* Just a guess.  Guess number of instructions in the asm
10931          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10932          though (see PR60663).  */
10933         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10934         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10935
10936         *cost = COSTS_N_INSNS (asm_length + num_operands);
10937         return true;
10938       }
10939     default:
10940       if (mode != VOIDmode)
10941         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10942       else
10943         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10944       return false;
10945     }
10946 }
10947
10948 #undef HANDLE_NARROW_SHIFT_ARITH
10949
10950 /* RTX costs entry point.  */
10951
10952 static bool
10953 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10954                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10955 {
10956   bool result;
10957   int code = GET_CODE (x);
10958   gcc_assert (current_tune->insn_extra_cost);
10959
10960   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10961                                 (enum rtx_code) outer_code,
10962                                 current_tune->insn_extra_cost,
10963                                 total, speed);
10964
10965   if (dump_file && (dump_flags & TDF_DETAILS))
10966     {
10967       print_rtl_single (dump_file, x);
10968       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10969                *total, result ? "final" : "partial");
10970     }
10971   return result;
10972 }
10973
10974 /* All address computations that can be done are free, but rtx cost returns
10975    the same for practically all of them.  So we weight the different types
10976    of address here in the order (most pref first):
10977    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10978 static inline int
10979 arm_arm_address_cost (rtx x)
10980 {
10981   enum rtx_code c  = GET_CODE (x);
10982
10983   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10984     return 0;
10985   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10986     return 10;
10987
10988   if (c == PLUS)
10989     {
10990       if (CONST_INT_P (XEXP (x, 1)))
10991         return 2;
10992
10993       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10994         return 3;
10995
10996       return 4;
10997     }
10998
10999   return 6;
11000 }
11001
11002 static inline int
11003 arm_thumb_address_cost (rtx x)
11004 {
11005   enum rtx_code c  = GET_CODE (x);
11006
11007   if (c == REG)
11008     return 1;
11009   if (c == PLUS
11010       && REG_P (XEXP (x, 0))
11011       && CONST_INT_P (XEXP (x, 1)))
11012     return 1;
11013
11014   return 2;
11015 }
11016
11017 static int
11018 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11019                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11020 {
11021   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11022 }
11023
11024 /* Adjust cost hook for XScale.  */
11025 static bool
11026 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11027                           int * cost)
11028 {
11029   /* Some true dependencies can have a higher cost depending
11030      on precisely how certain input operands are used.  */
11031   if (dep_type == 0
11032       && recog_memoized (insn) >= 0
11033       && recog_memoized (dep) >= 0)
11034     {
11035       int shift_opnum = get_attr_shift (insn);
11036       enum attr_type attr_type = get_attr_type (dep);
11037
11038       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11039          operand for INSN.  If we have a shifted input operand and the
11040          instruction we depend on is another ALU instruction, then we may
11041          have to account for an additional stall.  */
11042       if (shift_opnum != 0
11043           && (attr_type == TYPE_ALU_SHIFT_IMM
11044               || attr_type == TYPE_ALUS_SHIFT_IMM
11045               || attr_type == TYPE_LOGIC_SHIFT_IMM
11046               || attr_type == TYPE_LOGICS_SHIFT_IMM
11047               || attr_type == TYPE_ALU_SHIFT_REG
11048               || attr_type == TYPE_ALUS_SHIFT_REG
11049               || attr_type == TYPE_LOGIC_SHIFT_REG
11050               || attr_type == TYPE_LOGICS_SHIFT_REG
11051               || attr_type == TYPE_MOV_SHIFT
11052               || attr_type == TYPE_MVN_SHIFT
11053               || attr_type == TYPE_MOV_SHIFT_REG
11054               || attr_type == TYPE_MVN_SHIFT_REG))
11055         {
11056           rtx shifted_operand;
11057           int opno;
11058
11059           /* Get the shifted operand.  */
11060           extract_insn (insn);
11061           shifted_operand = recog_data.operand[shift_opnum];
11062
11063           /* Iterate over all the operands in DEP.  If we write an operand
11064              that overlaps with SHIFTED_OPERAND, then we have increase the
11065              cost of this dependency.  */
11066           extract_insn (dep);
11067           preprocess_constraints (dep);
11068           for (opno = 0; opno < recog_data.n_operands; opno++)
11069             {
11070               /* We can ignore strict inputs.  */
11071               if (recog_data.operand_type[opno] == OP_IN)
11072                 continue;
11073
11074               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11075                                            shifted_operand))
11076                 {
11077                   *cost = 2;
11078                   return false;
11079                 }
11080             }
11081         }
11082     }
11083   return true;
11084 }
11085
11086 /* Adjust cost hook for Cortex A9.  */
11087 static bool
11088 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11089                              int * cost)
11090 {
11091   switch (dep_type)
11092     {
11093     case REG_DEP_ANTI:
11094       *cost = 0;
11095       return false;
11096
11097     case REG_DEP_TRUE:
11098     case REG_DEP_OUTPUT:
11099         if (recog_memoized (insn) >= 0
11100             && recog_memoized (dep) >= 0)
11101           {
11102             if (GET_CODE (PATTERN (insn)) == SET)
11103               {
11104                 if (GET_MODE_CLASS
11105                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11106                   || GET_MODE_CLASS
11107                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11108                   {
11109                     enum attr_type attr_type_insn = get_attr_type (insn);
11110                     enum attr_type attr_type_dep = get_attr_type (dep);
11111
11112                     /* By default all dependencies of the form
11113                        s0 = s0 <op> s1
11114                        s0 = s0 <op> s2
11115                        have an extra latency of 1 cycle because
11116                        of the input and output dependency in this
11117                        case. However this gets modeled as an true
11118                        dependency and hence all these checks.  */
11119                     if (REG_P (SET_DEST (PATTERN (insn)))
11120                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11121                       {
11122                         /* FMACS is a special case where the dependent
11123                            instruction can be issued 3 cycles before
11124                            the normal latency in case of an output
11125                            dependency.  */
11126                         if ((attr_type_insn == TYPE_FMACS
11127                              || attr_type_insn == TYPE_FMACD)
11128                             && (attr_type_dep == TYPE_FMACS
11129                                 || attr_type_dep == TYPE_FMACD))
11130                           {
11131                             if (dep_type == REG_DEP_OUTPUT)
11132                               *cost = insn_default_latency (dep) - 3;
11133                             else
11134                               *cost = insn_default_latency (dep);
11135                             return false;
11136                           }
11137                         else
11138                           {
11139                             if (dep_type == REG_DEP_OUTPUT)
11140                               *cost = insn_default_latency (dep) + 1;
11141                             else
11142                               *cost = insn_default_latency (dep);
11143                           }
11144                         return false;
11145                       }
11146                   }
11147               }
11148           }
11149         break;
11150
11151     default:
11152       gcc_unreachable ();
11153     }
11154
11155   return true;
11156 }
11157
11158 /* Adjust cost hook for FA726TE.  */
11159 static bool
11160 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11161                            int * cost)
11162 {
11163   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11164      have penalty of 3.  */
11165   if (dep_type == REG_DEP_TRUE
11166       && recog_memoized (insn) >= 0
11167       && recog_memoized (dep) >= 0
11168       && get_attr_conds (dep) == CONDS_SET)
11169     {
11170       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11171       if (get_attr_conds (insn) == CONDS_USE
11172           && get_attr_type (insn) != TYPE_BRANCH)
11173         {
11174           *cost = 3;
11175           return false;
11176         }
11177
11178       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11179           || get_attr_conds (insn) == CONDS_USE)
11180         {
11181           *cost = 0;
11182           return false;
11183         }
11184     }
11185
11186   return true;
11187 }
11188
11189 /* Implement TARGET_REGISTER_MOVE_COST.
11190
11191    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11192    it is typically more expensive than a single memory access.  We set
11193    the cost to less than two memory accesses so that floating
11194    point to integer conversion does not go through memory.  */
11195
11196 int
11197 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11198                         reg_class_t from, reg_class_t to)
11199 {
11200   if (TARGET_32BIT)
11201     {
11202       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11203           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11204         return 15;
11205       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11206                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11207         return 4;
11208       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11209         return 20;
11210       else
11211         return 2;
11212     }
11213   else
11214     {
11215       if (from == HI_REGS || to == HI_REGS)
11216         return 4;
11217       else
11218         return 2;
11219     }
11220 }
11221
11222 /* Implement TARGET_MEMORY_MOVE_COST.  */
11223
11224 int
11225 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11226                       bool in ATTRIBUTE_UNUSED)
11227 {
11228   if (TARGET_32BIT)
11229     return 10;
11230   else
11231     {
11232       if (GET_MODE_SIZE (mode) < 4)
11233         return 8;
11234       else
11235         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11236     }
11237 }
11238
11239 /* Vectorizer cost model implementation.  */
11240
11241 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11242 static int
11243 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11244                                 tree vectype,
11245                                 int misalign ATTRIBUTE_UNUSED)
11246 {
11247   unsigned elements;
11248
11249   switch (type_of_cost)
11250     {
11251       case scalar_stmt:
11252         return current_tune->vec_costs->scalar_stmt_cost;
11253
11254       case scalar_load:
11255         return current_tune->vec_costs->scalar_load_cost;
11256
11257       case scalar_store:
11258         return current_tune->vec_costs->scalar_store_cost;
11259
11260       case vector_stmt:
11261         return current_tune->vec_costs->vec_stmt_cost;
11262
11263       case vector_load:
11264         return current_tune->vec_costs->vec_align_load_cost;
11265
11266       case vector_store:
11267         return current_tune->vec_costs->vec_store_cost;
11268
11269       case vec_to_scalar:
11270         return current_tune->vec_costs->vec_to_scalar_cost;
11271
11272       case scalar_to_vec:
11273         return current_tune->vec_costs->scalar_to_vec_cost;
11274
11275       case unaligned_load:
11276       case vector_gather_load:
11277         return current_tune->vec_costs->vec_unalign_load_cost;
11278
11279       case unaligned_store:
11280       case vector_scatter_store:
11281         return current_tune->vec_costs->vec_unalign_store_cost;
11282
11283       case cond_branch_taken:
11284         return current_tune->vec_costs->cond_taken_branch_cost;
11285
11286       case cond_branch_not_taken:
11287         return current_tune->vec_costs->cond_not_taken_branch_cost;
11288
11289       case vec_perm:
11290       case vec_promote_demote:
11291         return current_tune->vec_costs->vec_stmt_cost;
11292
11293       case vec_construct:
11294         elements = TYPE_VECTOR_SUBPARTS (vectype);
11295         return elements / 2 + 1;
11296
11297       default:
11298         gcc_unreachable ();
11299     }
11300 }
11301
11302 /* Implement targetm.vectorize.add_stmt_cost.  */
11303
11304 static unsigned
11305 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11306                    struct _stmt_vec_info *stmt_info, int misalign,
11307                    enum vect_cost_model_location where)
11308 {
11309   unsigned *cost = (unsigned *) data;
11310   unsigned retval = 0;
11311
11312   if (flag_vect_cost_model)
11313     {
11314       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11315       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11316
11317       /* Statements in an inner loop relative to the loop being
11318          vectorized are weighted more heavily.  The value here is
11319          arbitrary and could potentially be improved with analysis.  */
11320       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11321         count *= 50;  /* FIXME.  */
11322
11323       retval = (unsigned) (count * stmt_cost);
11324       cost[where] += retval;
11325     }
11326
11327   return retval;
11328 }
11329
11330 /* Return true if and only if this insn can dual-issue only as older.  */
11331 static bool
11332 cortexa7_older_only (rtx_insn *insn)
11333 {
11334   if (recog_memoized (insn) < 0)
11335     return false;
11336
11337   switch (get_attr_type (insn))
11338     {
11339     case TYPE_ALU_DSP_REG:
11340     case TYPE_ALU_SREG:
11341     case TYPE_ALUS_SREG:
11342     case TYPE_LOGIC_REG:
11343     case TYPE_LOGICS_REG:
11344     case TYPE_ADC_REG:
11345     case TYPE_ADCS_REG:
11346     case TYPE_ADR:
11347     case TYPE_BFM:
11348     case TYPE_REV:
11349     case TYPE_MVN_REG:
11350     case TYPE_SHIFT_IMM:
11351     case TYPE_SHIFT_REG:
11352     case TYPE_LOAD_BYTE:
11353     case TYPE_LOAD_4:
11354     case TYPE_STORE_4:
11355     case TYPE_FFARITHS:
11356     case TYPE_FADDS:
11357     case TYPE_FFARITHD:
11358     case TYPE_FADDD:
11359     case TYPE_FMOV:
11360     case TYPE_F_CVT:
11361     case TYPE_FCMPS:
11362     case TYPE_FCMPD:
11363     case TYPE_FCONSTS:
11364     case TYPE_FCONSTD:
11365     case TYPE_FMULS:
11366     case TYPE_FMACS:
11367     case TYPE_FMULD:
11368     case TYPE_FMACD:
11369     case TYPE_FDIVS:
11370     case TYPE_FDIVD:
11371     case TYPE_F_MRC:
11372     case TYPE_F_MRRC:
11373     case TYPE_F_FLAG:
11374     case TYPE_F_LOADS:
11375     case TYPE_F_STORES:
11376       return true;
11377     default:
11378       return false;
11379     }
11380 }
11381
11382 /* Return true if and only if this insn can dual-issue as younger.  */
11383 static bool
11384 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11385 {
11386   if (recog_memoized (insn) < 0)
11387     {
11388       if (verbose > 5)
11389         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11390       return false;
11391     }
11392
11393   switch (get_attr_type (insn))
11394     {
11395     case TYPE_ALU_IMM:
11396     case TYPE_ALUS_IMM:
11397     case TYPE_LOGIC_IMM:
11398     case TYPE_LOGICS_IMM:
11399     case TYPE_EXTEND:
11400     case TYPE_MVN_IMM:
11401     case TYPE_MOV_IMM:
11402     case TYPE_MOV_REG:
11403     case TYPE_MOV_SHIFT:
11404     case TYPE_MOV_SHIFT_REG:
11405     case TYPE_BRANCH:
11406     case TYPE_CALL:
11407       return true;
11408     default:
11409       return false;
11410     }
11411 }
11412
11413
11414 /* Look for an instruction that can dual issue only as an older
11415    instruction, and move it in front of any instructions that can
11416    dual-issue as younger, while preserving the relative order of all
11417    other instructions in the ready list.  This is a hueuristic to help
11418    dual-issue in later cycles, by postponing issue of more flexible
11419    instructions.  This heuristic may affect dual issue opportunities
11420    in the current cycle.  */
11421 static void
11422 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11423                         int *n_readyp, int clock)
11424 {
11425   int i;
11426   int first_older_only = -1, first_younger = -1;
11427
11428   if (verbose > 5)
11429     fprintf (file,
11430              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11431              clock,
11432              *n_readyp);
11433
11434   /* Traverse the ready list from the head (the instruction to issue
11435      first), and looking for the first instruction that can issue as
11436      younger and the first instruction that can dual-issue only as
11437      older.  */
11438   for (i = *n_readyp - 1; i >= 0; i--)
11439     {
11440       rtx_insn *insn = ready[i];
11441       if (cortexa7_older_only (insn))
11442         {
11443           first_older_only = i;
11444           if (verbose > 5)
11445             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11446           break;
11447         }
11448       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11449         first_younger = i;
11450     }
11451
11452   /* Nothing to reorder because either no younger insn found or insn
11453      that can dual-issue only as older appears before any insn that
11454      can dual-issue as younger.  */
11455   if (first_younger == -1)
11456     {
11457       if (verbose > 5)
11458         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11459       return;
11460     }
11461
11462   /* Nothing to reorder because no older-only insn in the ready list.  */
11463   if (first_older_only == -1)
11464     {
11465       if (verbose > 5)
11466         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11467       return;
11468     }
11469
11470   /* Move first_older_only insn before first_younger.  */
11471   if (verbose > 5)
11472     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11473              INSN_UID(ready [first_older_only]),
11474              INSN_UID(ready [first_younger]));
11475   rtx_insn *first_older_only_insn = ready [first_older_only];
11476   for (i = first_older_only; i < first_younger; i++)
11477     {
11478       ready[i] = ready[i+1];
11479     }
11480
11481   ready[i] = first_older_only_insn;
11482   return;
11483 }
11484
11485 /* Implement TARGET_SCHED_REORDER. */
11486 static int
11487 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11488                    int clock)
11489 {
11490   switch (arm_tune)
11491     {
11492     case TARGET_CPU_cortexa7:
11493       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11494       break;
11495     default:
11496       /* Do nothing for other cores.  */
11497       break;
11498     }
11499
11500   return arm_issue_rate ();
11501 }
11502
11503 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11504    It corrects the value of COST based on the relationship between
11505    INSN and DEP through the dependence LINK.  It returns the new
11506    value. There is a per-core adjust_cost hook to adjust scheduler costs
11507    and the per-core hook can choose to completely override the generic
11508    adjust_cost function. Only put bits of code into arm_adjust_cost that
11509    are common across all cores.  */
11510 static int
11511 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11512                  unsigned int)
11513 {
11514   rtx i_pat, d_pat;
11515
11516  /* When generating Thumb-1 code, we want to place flag-setting operations
11517     close to a conditional branch which depends on them, so that we can
11518     omit the comparison. */
11519   if (TARGET_THUMB1
11520       && dep_type == 0
11521       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11522       && recog_memoized (dep) >= 0
11523       && get_attr_conds (dep) == CONDS_SET)
11524     return 0;
11525
11526   if (current_tune->sched_adjust_cost != NULL)
11527     {
11528       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11529         return cost;
11530     }
11531
11532   /* XXX Is this strictly true?  */
11533   if (dep_type == REG_DEP_ANTI
11534       || dep_type == REG_DEP_OUTPUT)
11535     return 0;
11536
11537   /* Call insns don't incur a stall, even if they follow a load.  */
11538   if (dep_type == 0
11539       && CALL_P (insn))
11540     return 1;
11541
11542   if ((i_pat = single_set (insn)) != NULL
11543       && MEM_P (SET_SRC (i_pat))
11544       && (d_pat = single_set (dep)) != NULL
11545       && MEM_P (SET_DEST (d_pat)))
11546     {
11547       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11548       /* This is a load after a store, there is no conflict if the load reads
11549          from a cached area.  Assume that loads from the stack, and from the
11550          constant pool are cached, and that others will miss.  This is a
11551          hack.  */
11552
11553       if ((GET_CODE (src_mem) == SYMBOL_REF
11554            && CONSTANT_POOL_ADDRESS_P (src_mem))
11555           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11556           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11557           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11558         return 1;
11559     }
11560
11561   return cost;
11562 }
11563
11564 int
11565 arm_max_conditional_execute (void)
11566 {
11567   return max_insns_skipped;
11568 }
11569
11570 static int
11571 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11572 {
11573   if (TARGET_32BIT)
11574     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11575   else
11576     return (optimize > 0) ? 2 : 0;
11577 }
11578
11579 static int
11580 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11581 {
11582   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11583 }
11584
11585 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11586    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11587    sequences of non-executed instructions in IT blocks probably take the same
11588    amount of time as executed instructions (and the IT instruction itself takes
11589    space in icache).  This function was experimentally determined to give good
11590    results on a popular embedded benchmark.  */
11591
11592 static int
11593 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11594 {
11595   return (TARGET_32BIT && speed_p) ? 1
11596          : arm_default_branch_cost (speed_p, predictable_p);
11597 }
11598
11599 static int
11600 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11601 {
11602   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11603 }
11604
11605 static bool fp_consts_inited = false;
11606
11607 static REAL_VALUE_TYPE value_fp0;
11608
11609 static void
11610 init_fp_table (void)
11611 {
11612   REAL_VALUE_TYPE r;
11613
11614   r = REAL_VALUE_ATOF ("0", DFmode);
11615   value_fp0 = r;
11616   fp_consts_inited = true;
11617 }
11618
11619 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11620 int
11621 arm_const_double_rtx (rtx x)
11622 {
11623   const REAL_VALUE_TYPE *r;
11624
11625   if (!fp_consts_inited)
11626     init_fp_table ();
11627
11628   r = CONST_DOUBLE_REAL_VALUE (x);
11629   if (REAL_VALUE_MINUS_ZERO (*r))
11630     return 0;
11631
11632   if (real_equal (r, &value_fp0))
11633     return 1;
11634
11635   return 0;
11636 }
11637
11638 /* VFPv3 has a fairly wide range of representable immediates, formed from
11639    "quarter-precision" floating-point values. These can be evaluated using this
11640    formula (with ^ for exponentiation):
11641
11642      -1^s * n * 2^-r
11643
11644    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11645    16 <= n <= 31 and 0 <= r <= 7.
11646
11647    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11648
11649      - A (most-significant) is the sign bit.
11650      - BCD are the exponent (encoded as r XOR 3).
11651      - EFGH are the mantissa (encoded as n - 16).
11652 */
11653
11654 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11655    fconst[sd] instruction, or -1 if X isn't suitable.  */
11656 static int
11657 vfp3_const_double_index (rtx x)
11658 {
11659   REAL_VALUE_TYPE r, m;
11660   int sign, exponent;
11661   unsigned HOST_WIDE_INT mantissa, mant_hi;
11662   unsigned HOST_WIDE_INT mask;
11663   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11664   bool fail;
11665
11666   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11667     return -1;
11668
11669   r = *CONST_DOUBLE_REAL_VALUE (x);
11670
11671   /* We can't represent these things, so detect them first.  */
11672   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11673     return -1;
11674
11675   /* Extract sign, exponent and mantissa.  */
11676   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11677   r = real_value_abs (&r);
11678   exponent = REAL_EXP (&r);
11679   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11680      highest (sign) bit, with a fixed binary point at bit point_pos.
11681      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11682      bits for the mantissa, this may fail (low bits would be lost).  */
11683   real_ldexp (&m, &r, point_pos - exponent);
11684   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11685   mantissa = w.elt (0);
11686   mant_hi = w.elt (1);
11687
11688   /* If there are bits set in the low part of the mantissa, we can't
11689      represent this value.  */
11690   if (mantissa != 0)
11691     return -1;
11692
11693   /* Now make it so that mantissa contains the most-significant bits, and move
11694      the point_pos to indicate that the least-significant bits have been
11695      discarded.  */
11696   point_pos -= HOST_BITS_PER_WIDE_INT;
11697   mantissa = mant_hi;
11698
11699   /* We can permit four significant bits of mantissa only, plus a high bit
11700      which is always 1.  */
11701   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11702   if ((mantissa & mask) != 0)
11703     return -1;
11704
11705   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11706   mantissa >>= point_pos - 5;
11707
11708   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11709      floating-point immediate zero with Neon using an integer-zero load, but
11710      that case is handled elsewhere.)  */
11711   if (mantissa == 0)
11712     return -1;
11713
11714   gcc_assert (mantissa >= 16 && mantissa <= 31);
11715
11716   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11717      normalized significands are in the range [1, 2). (Our mantissa is shifted
11718      left 4 places at this point relative to normalized IEEE754 values).  GCC
11719      internally uses [0.5, 1) (see real.c), so the exponent returned from
11720      REAL_EXP must be altered.  */
11721   exponent = 5 - exponent;
11722
11723   if (exponent < 0 || exponent > 7)
11724     return -1;
11725
11726   /* Sign, mantissa and exponent are now in the correct form to plug into the
11727      formula described in the comment above.  */
11728   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11729 }
11730
11731 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11732 int
11733 vfp3_const_double_rtx (rtx x)
11734 {
11735   if (!TARGET_VFP3)
11736     return 0;
11737
11738   return vfp3_const_double_index (x) != -1;
11739 }
11740
11741 /* Recognize immediates which can be used in various Neon instructions. Legal
11742    immediates are described by the following table (for VMVN variants, the
11743    bitwise inverse of the constant shown is recognized. In either case, VMOV
11744    is output and the correct instruction to use for a given constant is chosen
11745    by the assembler). The constant shown is replicated across all elements of
11746    the destination vector.
11747
11748    insn elems variant constant (binary)
11749    ---- ----- ------- -----------------
11750    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11751    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11752    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11753    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11754    vmov  i16     4    00000000 abcdefgh
11755    vmov  i16     5    abcdefgh 00000000
11756    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11757    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11758    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11759    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11760    vmvn  i16    10    00000000 abcdefgh
11761    vmvn  i16    11    abcdefgh 00000000
11762    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11763    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11764    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11765    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11766    vmov   i8    16    abcdefgh
11767    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11768                       eeeeeeee ffffffff gggggggg hhhhhhhh
11769    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11770    vmov  f32    19    00000000 00000000 00000000 00000000
11771
11772    For case 18, B = !b. Representable values are exactly those accepted by
11773    vfp3_const_double_index, but are output as floating-point numbers rather
11774    than indices.
11775
11776    For case 19, we will change it to vmov.i32 when assembling.
11777
11778    Variants 0-5 (inclusive) may also be used as immediates for the second
11779    operand of VORR/VBIC instructions.
11780
11781    The INVERSE argument causes the bitwise inverse of the given operand to be
11782    recognized instead (used for recognizing legal immediates for the VAND/VORN
11783    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11784    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11785    output, rather than the real insns vbic/vorr).
11786
11787    INVERSE makes no difference to the recognition of float vectors.
11788
11789    The return value is the variant of immediate as shown in the above table, or
11790    -1 if the given value doesn't match any of the listed patterns.
11791 */
11792 static int
11793 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11794                       rtx *modconst, int *elementwidth)
11795 {
11796 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11797   matches = 1;                                  \
11798   for (i = 0; i < idx; i += (STRIDE))           \
11799     if (!(TEST))                                \
11800       matches = 0;                              \
11801   if (matches)                                  \
11802     {                                           \
11803       immtype = (CLASS);                        \
11804       elsize = (ELSIZE);                        \
11805       break;                                    \
11806     }
11807
11808   unsigned int i, elsize = 0, idx = 0, n_elts;
11809   unsigned int innersize;
11810   unsigned char bytes[16];
11811   int immtype = -1, matches;
11812   unsigned int invmask = inverse ? 0xff : 0;
11813   bool vector = GET_CODE (op) == CONST_VECTOR;
11814
11815   if (vector)
11816     n_elts = CONST_VECTOR_NUNITS (op);
11817   else
11818     {
11819       n_elts = 1;
11820       if (mode == VOIDmode)
11821         mode = DImode;
11822     }
11823
11824   innersize = GET_MODE_UNIT_SIZE (mode);
11825
11826   /* Vectors of float constants.  */
11827   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11828     {
11829       rtx el0 = CONST_VECTOR_ELT (op, 0);
11830
11831       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11832         return -1;
11833
11834       /* FP16 vectors cannot be represented.  */
11835       if (GET_MODE_INNER (mode) == HFmode)
11836         return -1;
11837
11838       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11839          are distinct in this context.  */
11840       if (!const_vec_duplicate_p (op))
11841         return -1;
11842
11843       if (modconst)
11844         *modconst = CONST_VECTOR_ELT (op, 0);
11845
11846       if (elementwidth)
11847         *elementwidth = 0;
11848
11849       if (el0 == CONST0_RTX (GET_MODE (el0)))
11850         return 19;
11851       else
11852         return 18;
11853     }
11854
11855   /* The tricks done in the code below apply for little-endian vector layout.
11856      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11857      FIXME: Implement logic for big-endian vectors.  */
11858   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11859     return -1;
11860
11861   /* Splat vector constant out into a byte vector.  */
11862   for (i = 0; i < n_elts; i++)
11863     {
11864       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11865       unsigned HOST_WIDE_INT elpart;
11866
11867       gcc_assert (CONST_INT_P (el));
11868       elpart = INTVAL (el);
11869
11870       for (unsigned int byte = 0; byte < innersize; byte++)
11871         {
11872           bytes[idx++] = (elpart & 0xff) ^ invmask;
11873           elpart >>= BITS_PER_UNIT;
11874         }
11875     }
11876
11877   /* Sanity check.  */
11878   gcc_assert (idx == GET_MODE_SIZE (mode));
11879
11880   do
11881     {
11882       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11883                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11884
11885       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11886                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11887
11888       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11889                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11890
11891       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11892                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11893
11894       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11895
11896       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11897
11898       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11899                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11900
11901       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11902                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11903
11904       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11905                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11906
11907       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11908                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11909
11910       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11911
11912       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11913
11914       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11915                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11916
11917       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11918                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11919
11920       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11921                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11922
11923       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11924                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11925
11926       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11927
11928       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11929                         && bytes[i] == bytes[(i + 8) % idx]);
11930     }
11931   while (0);
11932
11933   if (immtype == -1)
11934     return -1;
11935
11936   if (elementwidth)
11937     *elementwidth = elsize;
11938
11939   if (modconst)
11940     {
11941       unsigned HOST_WIDE_INT imm = 0;
11942
11943       /* Un-invert bytes of recognized vector, if necessary.  */
11944       if (invmask != 0)
11945         for (i = 0; i < idx; i++)
11946           bytes[i] ^= invmask;
11947
11948       if (immtype == 17)
11949         {
11950           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11951           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11952
11953           for (i = 0; i < 8; i++)
11954             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11955                    << (i * BITS_PER_UNIT);
11956
11957           *modconst = GEN_INT (imm);
11958         }
11959       else
11960         {
11961           unsigned HOST_WIDE_INT imm = 0;
11962
11963           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11964             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11965
11966           *modconst = GEN_INT (imm);
11967         }
11968     }
11969
11970   return immtype;
11971 #undef CHECK
11972 }
11973
11974 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11975    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11976    float elements), and a modified constant (whatever should be output for a
11977    VMOV) in *MODCONST.  */
11978
11979 int
11980 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11981                                rtx *modconst, int *elementwidth)
11982 {
11983   rtx tmpconst;
11984   int tmpwidth;
11985   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11986
11987   if (retval == -1)
11988     return 0;
11989
11990   if (modconst)
11991     *modconst = tmpconst;
11992
11993   if (elementwidth)
11994     *elementwidth = tmpwidth;
11995
11996   return 1;
11997 }
11998
11999 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12000    the immediate is valid, write a constant suitable for using as an operand
12001    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12002    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12003
12004 int
12005 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12006                                 rtx *modconst, int *elementwidth)
12007 {
12008   rtx tmpconst;
12009   int tmpwidth;
12010   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12011
12012   if (retval < 0 || retval > 5)
12013     return 0;
12014
12015   if (modconst)
12016     *modconst = tmpconst;
12017
12018   if (elementwidth)
12019     *elementwidth = tmpwidth;
12020
12021   return 1;
12022 }
12023
12024 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12025    the immediate is valid, write a constant suitable for using as an operand
12026    to VSHR/VSHL to *MODCONST and the corresponding element width to
12027    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12028    because they have different limitations.  */
12029
12030 int
12031 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12032                                 rtx *modconst, int *elementwidth,
12033                                 bool isleftshift)
12034 {
12035   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12036   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12037   unsigned HOST_WIDE_INT last_elt = 0;
12038   unsigned HOST_WIDE_INT maxshift;
12039
12040   /* Split vector constant out into a byte vector.  */
12041   for (i = 0; i < n_elts; i++)
12042     {
12043       rtx el = CONST_VECTOR_ELT (op, i);
12044       unsigned HOST_WIDE_INT elpart;
12045
12046       if (CONST_INT_P (el))
12047         elpart = INTVAL (el);
12048       else if (CONST_DOUBLE_P (el))
12049         return 0;
12050       else
12051         gcc_unreachable ();
12052
12053       if (i != 0 && elpart != last_elt)
12054         return 0;
12055
12056       last_elt = elpart;
12057     }
12058
12059   /* Shift less than element size.  */
12060   maxshift = innersize * 8;
12061
12062   if (isleftshift)
12063     {
12064       /* Left shift immediate value can be from 0 to <size>-1.  */
12065       if (last_elt >= maxshift)
12066         return 0;
12067     }
12068   else
12069     {
12070       /* Right shift immediate value can be from 1 to <size>.  */
12071       if (last_elt == 0 || last_elt > maxshift)
12072         return 0;
12073     }
12074
12075   if (elementwidth)
12076     *elementwidth = innersize * 8;
12077
12078   if (modconst)
12079     *modconst = CONST_VECTOR_ELT (op, 0);
12080
12081   return 1;
12082 }
12083
12084 /* Return a string suitable for output of Neon immediate logic operation
12085    MNEM.  */
12086
12087 char *
12088 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12089                              int inverse, int quad)
12090 {
12091   int width, is_valid;
12092   static char templ[40];
12093
12094   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12095
12096   gcc_assert (is_valid != 0);
12097
12098   if (quad)
12099     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12100   else
12101     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12102
12103   return templ;
12104 }
12105
12106 /* Return a string suitable for output of Neon immediate shift operation
12107    (VSHR or VSHL) MNEM.  */
12108
12109 char *
12110 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12111                              machine_mode mode, int quad,
12112                              bool isleftshift)
12113 {
12114   int width, is_valid;
12115   static char templ[40];
12116
12117   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12118   gcc_assert (is_valid != 0);
12119
12120   if (quad)
12121     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12122   else
12123     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12124
12125   return templ;
12126 }
12127
12128 /* Output a sequence of pairwise operations to implement a reduction.
12129    NOTE: We do "too much work" here, because pairwise operations work on two
12130    registers-worth of operands in one go. Unfortunately we can't exploit those
12131    extra calculations to do the full operation in fewer steps, I don't think.
12132    Although all vector elements of the result but the first are ignored, we
12133    actually calculate the same result in each of the elements. An alternative
12134    such as initially loading a vector with zero to use as each of the second
12135    operands would use up an additional register and take an extra instruction,
12136    for no particular gain.  */
12137
12138 void
12139 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12140                       rtx (*reduc) (rtx, rtx, rtx))
12141 {
12142   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12143   rtx tmpsum = op1;
12144
12145   for (i = parts / 2; i >= 1; i /= 2)
12146     {
12147       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12148       emit_insn (reduc (dest, tmpsum, tmpsum));
12149       tmpsum = dest;
12150     }
12151 }
12152
12153 /* If VALS is a vector constant that can be loaded into a register
12154    using VDUP, generate instructions to do so and return an RTX to
12155    assign to the register.  Otherwise return NULL_RTX.  */
12156
12157 static rtx
12158 neon_vdup_constant (rtx vals)
12159 {
12160   machine_mode mode = GET_MODE (vals);
12161   machine_mode inner_mode = GET_MODE_INNER (mode);
12162   rtx x;
12163
12164   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12165     return NULL_RTX;
12166
12167   if (!const_vec_duplicate_p (vals, &x))
12168     /* The elements are not all the same.  We could handle repeating
12169        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12170        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12171        vdup.i16).  */
12172     return NULL_RTX;
12173
12174   /* We can load this constant by using VDUP and a constant in a
12175      single ARM register.  This will be cheaper than a vector
12176      load.  */
12177
12178   x = copy_to_mode_reg (inner_mode, x);
12179   return gen_vec_duplicate (mode, x);
12180 }
12181
12182 /* Generate code to load VALS, which is a PARALLEL containing only
12183    constants (for vec_init) or CONST_VECTOR, efficiently into a
12184    register.  Returns an RTX to copy into the register, or NULL_RTX
12185    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12186
12187 rtx
12188 neon_make_constant (rtx vals)
12189 {
12190   machine_mode mode = GET_MODE (vals);
12191   rtx target;
12192   rtx const_vec = NULL_RTX;
12193   int n_elts = GET_MODE_NUNITS (mode);
12194   int n_const = 0;
12195   int i;
12196
12197   if (GET_CODE (vals) == CONST_VECTOR)
12198     const_vec = vals;
12199   else if (GET_CODE (vals) == PARALLEL)
12200     {
12201       /* A CONST_VECTOR must contain only CONST_INTs and
12202          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12203          Only store valid constants in a CONST_VECTOR.  */
12204       for (i = 0; i < n_elts; ++i)
12205         {
12206           rtx x = XVECEXP (vals, 0, i);
12207           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12208             n_const++;
12209         }
12210       if (n_const == n_elts)
12211         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12212     }
12213   else
12214     gcc_unreachable ();
12215
12216   if (const_vec != NULL
12217       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12218     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12219     return const_vec;
12220   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12221     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12222        pipeline cycle; creating the constant takes one or two ARM
12223        pipeline cycles.  */
12224     return target;
12225   else if (const_vec != NULL_RTX)
12226     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12227        (for either double or quad vectors).  We can not take advantage
12228        of single-cycle VLD1 because we need a PC-relative addressing
12229        mode.  */
12230     return const_vec;
12231   else
12232     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12233        We can not construct an initializer.  */
12234     return NULL_RTX;
12235 }
12236
12237 /* Initialize vector TARGET to VALS.  */
12238
12239 void
12240 neon_expand_vector_init (rtx target, rtx vals)
12241 {
12242   machine_mode mode = GET_MODE (target);
12243   machine_mode inner_mode = GET_MODE_INNER (mode);
12244   int n_elts = GET_MODE_NUNITS (mode);
12245   int n_var = 0, one_var = -1;
12246   bool all_same = true;
12247   rtx x, mem;
12248   int i;
12249
12250   for (i = 0; i < n_elts; ++i)
12251     {
12252       x = XVECEXP (vals, 0, i);
12253       if (!CONSTANT_P (x))
12254         ++n_var, one_var = i;
12255
12256       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12257         all_same = false;
12258     }
12259
12260   if (n_var == 0)
12261     {
12262       rtx constant = neon_make_constant (vals);
12263       if (constant != NULL_RTX)
12264         {
12265           emit_move_insn (target, constant);
12266           return;
12267         }
12268     }
12269
12270   /* Splat a single non-constant element if we can.  */
12271   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12272     {
12273       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12274       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12275       return;
12276     }
12277
12278   /* One field is non-constant.  Load constant then overwrite varying
12279      field.  This is more efficient than using the stack.  */
12280   if (n_var == 1)
12281     {
12282       rtx copy = copy_rtx (vals);
12283       rtx index = GEN_INT (one_var);
12284
12285       /* Load constant part of vector, substitute neighboring value for
12286          varying element.  */
12287       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12288       neon_expand_vector_init (target, copy);
12289
12290       /* Insert variable.  */
12291       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12292       switch (mode)
12293         {
12294         case E_V8QImode:
12295           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12296           break;
12297         case E_V16QImode:
12298           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12299           break;
12300         case E_V4HImode:
12301           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12302           break;
12303         case E_V8HImode:
12304           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12305           break;
12306         case E_V2SImode:
12307           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12308           break;
12309         case E_V4SImode:
12310           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12311           break;
12312         case E_V2SFmode:
12313           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12314           break;
12315         case E_V4SFmode:
12316           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12317           break;
12318         case E_V2DImode:
12319           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12320           break;
12321         default:
12322           gcc_unreachable ();
12323         }
12324       return;
12325     }
12326
12327   /* Construct the vector in memory one field at a time
12328      and load the whole vector.  */
12329   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12330   for (i = 0; i < n_elts; i++)
12331     emit_move_insn (adjust_address_nv (mem, inner_mode,
12332                                     i * GET_MODE_SIZE (inner_mode)),
12333                     XVECEXP (vals, 0, i));
12334   emit_move_insn (target, mem);
12335 }
12336
12337 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12338    ERR if it doesn't.  EXP indicates the source location, which includes the
12339    inlining history for intrinsics.  */
12340
12341 static void
12342 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12343               const_tree exp, const char *desc)
12344 {
12345   HOST_WIDE_INT lane;
12346
12347   gcc_assert (CONST_INT_P (operand));
12348
12349   lane = INTVAL (operand);
12350
12351   if (lane < low || lane >= high)
12352     {
12353       if (exp)
12354         error ("%K%s %wd out of range %wd - %wd",
12355                exp, desc, lane, low, high - 1);
12356       else
12357         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12358     }
12359 }
12360
12361 /* Bounds-check lanes.  */
12362
12363 void
12364 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12365                   const_tree exp)
12366 {
12367   bounds_check (operand, low, high, exp, "lane");
12368 }
12369
12370 /* Bounds-check constants.  */
12371
12372 void
12373 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12374 {
12375   bounds_check (operand, low, high, NULL_TREE, "constant");
12376 }
12377
12378 HOST_WIDE_INT
12379 neon_element_bits (machine_mode mode)
12380 {
12381   return GET_MODE_UNIT_BITSIZE (mode);
12382 }
12383
12384 \f
12385 /* Predicates for `match_operand' and `match_operator'.  */
12386
12387 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12388    WB is true if full writeback address modes are allowed and is false
12389    if limited writeback address modes (POST_INC and PRE_DEC) are
12390    allowed.  */
12391
12392 int
12393 arm_coproc_mem_operand (rtx op, bool wb)
12394 {
12395   rtx ind;
12396
12397   /* Reject eliminable registers.  */
12398   if (! (reload_in_progress || reload_completed || lra_in_progress)
12399       && (   reg_mentioned_p (frame_pointer_rtx, op)
12400           || reg_mentioned_p (arg_pointer_rtx, op)
12401           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12402           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12403           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12404           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12405     return FALSE;
12406
12407   /* Constants are converted into offsets from labels.  */
12408   if (!MEM_P (op))
12409     return FALSE;
12410
12411   ind = XEXP (op, 0);
12412
12413   if (reload_completed
12414       && (GET_CODE (ind) == LABEL_REF
12415           || (GET_CODE (ind) == CONST
12416               && GET_CODE (XEXP (ind, 0)) == PLUS
12417               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12418               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12419     return TRUE;
12420
12421   /* Match: (mem (reg)).  */
12422   if (REG_P (ind))
12423     return arm_address_register_rtx_p (ind, 0);
12424
12425   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12426      acceptable in any case (subject to verification by
12427      arm_address_register_rtx_p).  We need WB to be true to accept
12428      PRE_INC and POST_DEC.  */
12429   if (GET_CODE (ind) == POST_INC
12430       || GET_CODE (ind) == PRE_DEC
12431       || (wb
12432           && (GET_CODE (ind) == PRE_INC
12433               || GET_CODE (ind) == POST_DEC)))
12434     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12435
12436   if (wb
12437       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12438       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12439       && GET_CODE (XEXP (ind, 1)) == PLUS
12440       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12441     ind = XEXP (ind, 1);
12442
12443   /* Match:
12444      (plus (reg)
12445            (const)).  */
12446   if (GET_CODE (ind) == PLUS
12447       && REG_P (XEXP (ind, 0))
12448       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12449       && CONST_INT_P (XEXP (ind, 1))
12450       && INTVAL (XEXP (ind, 1)) > -1024
12451       && INTVAL (XEXP (ind, 1)) <  1024
12452       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12453     return TRUE;
12454
12455   return FALSE;
12456 }
12457
12458 /* Return TRUE if OP is a memory operand which we can load or store a vector
12459    to/from. TYPE is one of the following values:
12460     0 - Vector load/stor (vldr)
12461     1 - Core registers (ldm)
12462     2 - Element/structure loads (vld1)
12463  */
12464 int
12465 neon_vector_mem_operand (rtx op, int type, bool strict)
12466 {
12467   rtx ind;
12468
12469   /* Reject eliminable registers.  */
12470   if (strict && ! (reload_in_progress || reload_completed)
12471       && (reg_mentioned_p (frame_pointer_rtx, op)
12472           || reg_mentioned_p (arg_pointer_rtx, op)
12473           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12474           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12475           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12476           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12477     return FALSE;
12478
12479   /* Constants are converted into offsets from labels.  */
12480   if (!MEM_P (op))
12481     return FALSE;
12482
12483   ind = XEXP (op, 0);
12484
12485   if (reload_completed
12486       && (GET_CODE (ind) == LABEL_REF
12487           || (GET_CODE (ind) == CONST
12488               && GET_CODE (XEXP (ind, 0)) == PLUS
12489               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12490               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12491     return TRUE;
12492
12493   /* Match: (mem (reg)).  */
12494   if (REG_P (ind))
12495     return arm_address_register_rtx_p (ind, 0);
12496
12497   /* Allow post-increment with Neon registers.  */
12498   if ((type != 1 && GET_CODE (ind) == POST_INC)
12499       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12500     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12501
12502   /* Allow post-increment by register for VLDn */
12503   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12504       && GET_CODE (XEXP (ind, 1)) == PLUS
12505       && REG_P (XEXP (XEXP (ind, 1), 1)))
12506      return true;
12507
12508   /* Match:
12509      (plus (reg)
12510           (const)).  */
12511   if (type == 0
12512       && GET_CODE (ind) == PLUS
12513       && REG_P (XEXP (ind, 0))
12514       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12515       && CONST_INT_P (XEXP (ind, 1))
12516       && INTVAL (XEXP (ind, 1)) > -1024
12517       /* For quad modes, we restrict the constant offset to be slightly less
12518          than what the instruction format permits.  We have no such constraint
12519          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12520       && (INTVAL (XEXP (ind, 1))
12521           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12522       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12523     return TRUE;
12524
12525   return FALSE;
12526 }
12527
12528 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12529    type.  */
12530 int
12531 neon_struct_mem_operand (rtx op)
12532 {
12533   rtx ind;
12534
12535   /* Reject eliminable registers.  */
12536   if (! (reload_in_progress || reload_completed)
12537       && (   reg_mentioned_p (frame_pointer_rtx, op)
12538           || reg_mentioned_p (arg_pointer_rtx, op)
12539           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12540           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12541           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12542           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12543     return FALSE;
12544
12545   /* Constants are converted into offsets from labels.  */
12546   if (!MEM_P (op))
12547     return FALSE;
12548
12549   ind = XEXP (op, 0);
12550
12551   if (reload_completed
12552       && (GET_CODE (ind) == LABEL_REF
12553           || (GET_CODE (ind) == CONST
12554               && GET_CODE (XEXP (ind, 0)) == PLUS
12555               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12556               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12557     return TRUE;
12558
12559   /* Match: (mem (reg)).  */
12560   if (REG_P (ind))
12561     return arm_address_register_rtx_p (ind, 0);
12562
12563   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12564   if (GET_CODE (ind) == POST_INC
12565       || GET_CODE (ind) == PRE_DEC)
12566     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12567
12568   return FALSE;
12569 }
12570
12571 /* Return true if X is a register that will be eliminated later on.  */
12572 int
12573 arm_eliminable_register (rtx x)
12574 {
12575   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12576                        || REGNO (x) == ARG_POINTER_REGNUM
12577                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12578                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12579 }
12580
12581 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12582    coprocessor registers.  Otherwise return NO_REGS.  */
12583
12584 enum reg_class
12585 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12586 {
12587   if (mode == HFmode)
12588     {
12589       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12590         return GENERAL_REGS;
12591       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12592         return NO_REGS;
12593       return GENERAL_REGS;
12594     }
12595
12596   /* The neon move patterns handle all legitimate vector and struct
12597      addresses.  */
12598   if (TARGET_NEON
12599       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12600       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12601           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12602           || VALID_NEON_STRUCT_MODE (mode)))
12603     return NO_REGS;
12604
12605   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12606     return NO_REGS;
12607
12608   return GENERAL_REGS;
12609 }
12610
12611 /* Values which must be returned in the most-significant end of the return
12612    register.  */
12613
12614 static bool
12615 arm_return_in_msb (const_tree valtype)
12616 {
12617   return (TARGET_AAPCS_BASED
12618           && BYTES_BIG_ENDIAN
12619           && (AGGREGATE_TYPE_P (valtype)
12620               || TREE_CODE (valtype) == COMPLEX_TYPE
12621               || FIXED_POINT_TYPE_P (valtype)));
12622 }
12623
12624 /* Return TRUE if X references a SYMBOL_REF.  */
12625 int
12626 symbol_mentioned_p (rtx x)
12627 {
12628   const char * fmt;
12629   int i;
12630
12631   if (GET_CODE (x) == SYMBOL_REF)
12632     return 1;
12633
12634   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12635      are constant offsets, not symbols.  */
12636   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12637     return 0;
12638
12639   fmt = GET_RTX_FORMAT (GET_CODE (x));
12640
12641   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12642     {
12643       if (fmt[i] == 'E')
12644         {
12645           int j;
12646
12647           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12648             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12649               return 1;
12650         }
12651       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12652         return 1;
12653     }
12654
12655   return 0;
12656 }
12657
12658 /* Return TRUE if X references a LABEL_REF.  */
12659 int
12660 label_mentioned_p (rtx x)
12661 {
12662   const char * fmt;
12663   int i;
12664
12665   if (GET_CODE (x) == LABEL_REF)
12666     return 1;
12667
12668   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12669      instruction, but they are constant offsets, not symbols.  */
12670   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12671     return 0;
12672
12673   fmt = GET_RTX_FORMAT (GET_CODE (x));
12674   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12675     {
12676       if (fmt[i] == 'E')
12677         {
12678           int j;
12679
12680           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12681             if (label_mentioned_p (XVECEXP (x, i, j)))
12682               return 1;
12683         }
12684       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12685         return 1;
12686     }
12687
12688   return 0;
12689 }
12690
12691 int
12692 tls_mentioned_p (rtx x)
12693 {
12694   switch (GET_CODE (x))
12695     {
12696     case CONST:
12697       return tls_mentioned_p (XEXP (x, 0));
12698
12699     case UNSPEC:
12700       if (XINT (x, 1) == UNSPEC_TLS)
12701         return 1;
12702
12703     /* Fall through.  */
12704     default:
12705       return 0;
12706     }
12707 }
12708
12709 /* Must not copy any rtx that uses a pc-relative address.
12710    Also, disallow copying of load-exclusive instructions that
12711    may appear after splitting of compare-and-swap-style operations
12712    so as to prevent those loops from being transformed away from their
12713    canonical forms (see PR 69904).  */
12714
12715 static bool
12716 arm_cannot_copy_insn_p (rtx_insn *insn)
12717 {
12718   /* The tls call insn cannot be copied, as it is paired with a data
12719      word.  */
12720   if (recog_memoized (insn) == CODE_FOR_tlscall)
12721     return true;
12722
12723   subrtx_iterator::array_type array;
12724   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12725     {
12726       const_rtx x = *iter;
12727       if (GET_CODE (x) == UNSPEC
12728           && (XINT (x, 1) == UNSPEC_PIC_BASE
12729               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12730         return true;
12731     }
12732
12733   rtx set = single_set (insn);
12734   if (set)
12735     {
12736       rtx src = SET_SRC (set);
12737       if (GET_CODE (src) == ZERO_EXTEND)
12738         src = XEXP (src, 0);
12739
12740       /* Catch the load-exclusive and load-acquire operations.  */
12741       if (GET_CODE (src) == UNSPEC_VOLATILE
12742           && (XINT (src, 1) == VUNSPEC_LL
12743               || XINT (src, 1) == VUNSPEC_LAX))
12744         return true;
12745     }
12746   return false;
12747 }
12748
12749 enum rtx_code
12750 minmax_code (rtx x)
12751 {
12752   enum rtx_code code = GET_CODE (x);
12753
12754   switch (code)
12755     {
12756     case SMAX:
12757       return GE;
12758     case SMIN:
12759       return LE;
12760     case UMIN:
12761       return LEU;
12762     case UMAX:
12763       return GEU;
12764     default:
12765       gcc_unreachable ();
12766     }
12767 }
12768
12769 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12770
12771 bool
12772 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12773                         int *mask, bool *signed_sat)
12774 {
12775   /* The high bound must be a power of two minus one.  */
12776   int log = exact_log2 (INTVAL (hi_bound) + 1);
12777   if (log == -1)
12778     return false;
12779
12780   /* The low bound is either zero (for usat) or one less than the
12781      negation of the high bound (for ssat).  */
12782   if (INTVAL (lo_bound) == 0)
12783     {
12784       if (mask)
12785         *mask = log;
12786       if (signed_sat)
12787         *signed_sat = false;
12788
12789       return true;
12790     }
12791
12792   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12793     {
12794       if (mask)
12795         *mask = log + 1;
12796       if (signed_sat)
12797         *signed_sat = true;
12798
12799       return true;
12800     }
12801
12802   return false;
12803 }
12804
12805 /* Return 1 if memory locations are adjacent.  */
12806 int
12807 adjacent_mem_locations (rtx a, rtx b)
12808 {
12809   /* We don't guarantee to preserve the order of these memory refs.  */
12810   if (volatile_refs_p (a) || volatile_refs_p (b))
12811     return 0;
12812
12813   if ((REG_P (XEXP (a, 0))
12814        || (GET_CODE (XEXP (a, 0)) == PLUS
12815            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12816       && (REG_P (XEXP (b, 0))
12817           || (GET_CODE (XEXP (b, 0)) == PLUS
12818               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12819     {
12820       HOST_WIDE_INT val0 = 0, val1 = 0;
12821       rtx reg0, reg1;
12822       int val_diff;
12823
12824       if (GET_CODE (XEXP (a, 0)) == PLUS)
12825         {
12826           reg0 = XEXP (XEXP (a, 0), 0);
12827           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12828         }
12829       else
12830         reg0 = XEXP (a, 0);
12831
12832       if (GET_CODE (XEXP (b, 0)) == PLUS)
12833         {
12834           reg1 = XEXP (XEXP (b, 0), 0);
12835           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12836         }
12837       else
12838         reg1 = XEXP (b, 0);
12839
12840       /* Don't accept any offset that will require multiple
12841          instructions to handle, since this would cause the
12842          arith_adjacentmem pattern to output an overlong sequence.  */
12843       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12844         return 0;
12845
12846       /* Don't allow an eliminable register: register elimination can make
12847          the offset too large.  */
12848       if (arm_eliminable_register (reg0))
12849         return 0;
12850
12851       val_diff = val1 - val0;
12852
12853       if (arm_ld_sched)
12854         {
12855           /* If the target has load delay slots, then there's no benefit
12856              to using an ldm instruction unless the offset is zero and
12857              we are optimizing for size.  */
12858           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12859                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12860                   && (val_diff == 4 || val_diff == -4));
12861         }
12862
12863       return ((REGNO (reg0) == REGNO (reg1))
12864               && (val_diff == 4 || val_diff == -4));
12865     }
12866
12867   return 0;
12868 }
12869
12870 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12871    for load operations, false for store operations.  CONSECUTIVE is true
12872    if the register numbers in the operation must be consecutive in the register
12873    bank. RETURN_PC is true if value is to be loaded in PC.
12874    The pattern we are trying to match for load is:
12875      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12876       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12877        :
12878        :
12879       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12880      ]
12881      where
12882      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12883      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12884      3.  If consecutive is TRUE, then for kth register being loaded,
12885          REGNO (R_dk) = REGNO (R_d0) + k.
12886    The pattern for store is similar.  */
12887 bool
12888 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12889                      bool consecutive, bool return_pc)
12890 {
12891   HOST_WIDE_INT count = XVECLEN (op, 0);
12892   rtx reg, mem, addr;
12893   unsigned regno;
12894   unsigned first_regno;
12895   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12896   rtx elt;
12897   bool addr_reg_in_reglist = false;
12898   bool update = false;
12899   int reg_increment;
12900   int offset_adj;
12901   int regs_per_val;
12902
12903   /* If not in SImode, then registers must be consecutive
12904      (e.g., VLDM instructions for DFmode).  */
12905   gcc_assert ((mode == SImode) || consecutive);
12906   /* Setting return_pc for stores is illegal.  */
12907   gcc_assert (!return_pc || load);
12908
12909   /* Set up the increments and the regs per val based on the mode.  */
12910   reg_increment = GET_MODE_SIZE (mode);
12911   regs_per_val = reg_increment / 4;
12912   offset_adj = return_pc ? 1 : 0;
12913
12914   if (count <= 1
12915       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12916       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12917     return false;
12918
12919   /* Check if this is a write-back.  */
12920   elt = XVECEXP (op, 0, offset_adj);
12921   if (GET_CODE (SET_SRC (elt)) == PLUS)
12922     {
12923       i++;
12924       base = 1;
12925       update = true;
12926
12927       /* The offset adjustment must be the number of registers being
12928          popped times the size of a single register.  */
12929       if (!REG_P (SET_DEST (elt))
12930           || !REG_P (XEXP (SET_SRC (elt), 0))
12931           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12932           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12933           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12934              ((count - 1 - offset_adj) * reg_increment))
12935         return false;
12936     }
12937
12938   i = i + offset_adj;
12939   base = base + offset_adj;
12940   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12941      success depends on the type: VLDM can do just one reg,
12942      LDM must do at least two.  */
12943   if ((count <= i) && (mode == SImode))
12944       return false;
12945
12946   elt = XVECEXP (op, 0, i - 1);
12947   if (GET_CODE (elt) != SET)
12948     return false;
12949
12950   if (load)
12951     {
12952       reg = SET_DEST (elt);
12953       mem = SET_SRC (elt);
12954     }
12955   else
12956     {
12957       reg = SET_SRC (elt);
12958       mem = SET_DEST (elt);
12959     }
12960
12961   if (!REG_P (reg) || !MEM_P (mem))
12962     return false;
12963
12964   regno = REGNO (reg);
12965   first_regno = regno;
12966   addr = XEXP (mem, 0);
12967   if (GET_CODE (addr) == PLUS)
12968     {
12969       if (!CONST_INT_P (XEXP (addr, 1)))
12970         return false;
12971
12972       offset = INTVAL (XEXP (addr, 1));
12973       addr = XEXP (addr, 0);
12974     }
12975
12976   if (!REG_P (addr))
12977     return false;
12978
12979   /* Don't allow SP to be loaded unless it is also the base register. It
12980      guarantees that SP is reset correctly when an LDM instruction
12981      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12982   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12983     return false;
12984
12985   for (; i < count; i++)
12986     {
12987       elt = XVECEXP (op, 0, i);
12988       if (GET_CODE (elt) != SET)
12989         return false;
12990
12991       if (load)
12992         {
12993           reg = SET_DEST (elt);
12994           mem = SET_SRC (elt);
12995         }
12996       else
12997         {
12998           reg = SET_SRC (elt);
12999           mem = SET_DEST (elt);
13000         }
13001
13002       if (!REG_P (reg)
13003           || GET_MODE (reg) != mode
13004           || REGNO (reg) <= regno
13005           || (consecutive
13006               && (REGNO (reg) !=
13007                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13008           /* Don't allow SP to be loaded unless it is also the base register. It
13009              guarantees that SP is reset correctly when an LDM instruction
13010              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13011           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13012           || !MEM_P (mem)
13013           || GET_MODE (mem) != mode
13014           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13015                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13016                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13017                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13018                    offset + (i - base) * reg_increment))
13019               && (!REG_P (XEXP (mem, 0))
13020                   || offset + (i - base) * reg_increment != 0)))
13021         return false;
13022
13023       regno = REGNO (reg);
13024       if (regno == REGNO (addr))
13025         addr_reg_in_reglist = true;
13026     }
13027
13028   if (load)
13029     {
13030       if (update && addr_reg_in_reglist)
13031         return false;
13032
13033       /* For Thumb-1, address register is always modified - either by write-back
13034          or by explicit load.  If the pattern does not describe an update,
13035          then the address register must be in the list of loaded registers.  */
13036       if (TARGET_THUMB1)
13037         return update || addr_reg_in_reglist;
13038     }
13039
13040   return true;
13041 }
13042
13043 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13044    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13045    instruction.  ADD_OFFSET is nonzero if the base address register needs
13046    to be modified with an add instruction before we can use it.  */
13047
13048 static bool
13049 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13050                                  int nops, HOST_WIDE_INT add_offset)
13051  {
13052   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13053      if the offset isn't small enough.  The reason 2 ldrs are faster
13054      is because these ARMs are able to do more than one cache access
13055      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13056      whilst the ARM8 has a double bandwidth cache.  This means that
13057      these cores can do both an instruction fetch and a data fetch in
13058      a single cycle, so the trick of calculating the address into a
13059      scratch register (one of the result regs) and then doing a load
13060      multiple actually becomes slower (and no smaller in code size).
13061      That is the transformation
13062
13063         ldr     rd1, [rbase + offset]
13064         ldr     rd2, [rbase + offset + 4]
13065
13066      to
13067
13068         add     rd1, rbase, offset
13069         ldmia   rd1, {rd1, rd2}
13070
13071      produces worse code -- '3 cycles + any stalls on rd2' instead of
13072      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13073      access per cycle, the first sequence could never complete in less
13074      than 6 cycles, whereas the ldm sequence would only take 5 and
13075      would make better use of sequential accesses if not hitting the
13076      cache.
13077
13078      We cheat here and test 'arm_ld_sched' which we currently know to
13079      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13080      changes, then the test below needs to be reworked.  */
13081   if (nops == 2 && arm_ld_sched && add_offset != 0)
13082     return false;
13083
13084   /* XScale has load-store double instructions, but they have stricter
13085      alignment requirements than load-store multiple, so we cannot
13086      use them.
13087
13088      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13089      the pipeline until completion.
13090
13091         NREGS           CYCLES
13092           1               3
13093           2               4
13094           3               5
13095           4               6
13096
13097      An ldr instruction takes 1-3 cycles, but does not block the
13098      pipeline.
13099
13100         NREGS           CYCLES
13101           1              1-3
13102           2              2-6
13103           3              3-9
13104           4              4-12
13105
13106      Best case ldr will always win.  However, the more ldr instructions
13107      we issue, the less likely we are to be able to schedule them well.
13108      Using ldr instructions also increases code size.
13109
13110      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13111      for counts of 3 or 4 regs.  */
13112   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13113     return false;
13114   return true;
13115 }
13116
13117 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13118    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13119    an array ORDER which describes the sequence to use when accessing the
13120    offsets that produces an ascending order.  In this sequence, each
13121    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13122    must have been filled in with the lowest offset by the caller.
13123    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13124    we use to verify that ORDER produces an ascending order of registers.
13125    Return true if it was possible to construct such an order, false if
13126    not.  */
13127
13128 static bool
13129 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13130                       int *unsorted_regs)
13131 {
13132   int i;
13133   for (i = 1; i < nops; i++)
13134     {
13135       int j;
13136
13137       order[i] = order[i - 1];
13138       for (j = 0; j < nops; j++)
13139         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13140           {
13141             /* We must find exactly one offset that is higher than the
13142                previous one by 4.  */
13143             if (order[i] != order[i - 1])
13144               return false;
13145             order[i] = j;
13146           }
13147       if (order[i] == order[i - 1])
13148         return false;
13149       /* The register numbers must be ascending.  */
13150       if (unsorted_regs != NULL
13151           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13152         return false;
13153     }
13154   return true;
13155 }
13156
13157 /* Used to determine in a peephole whether a sequence of load
13158    instructions can be changed into a load-multiple instruction.
13159    NOPS is the number of separate load instructions we are examining.  The
13160    first NOPS entries in OPERANDS are the destination registers, the
13161    next NOPS entries are memory operands.  If this function is
13162    successful, *BASE is set to the common base register of the memory
13163    accesses; *LOAD_OFFSET is set to the first memory location's offset
13164    from that base register.
13165    REGS is an array filled in with the destination register numbers.
13166    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13167    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13168    the sequence of registers in REGS matches the loads from ascending memory
13169    locations, and the function verifies that the register numbers are
13170    themselves ascending.  If CHECK_REGS is false, the register numbers
13171    are stored in the order they are found in the operands.  */
13172 static int
13173 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13174                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13175 {
13176   int unsorted_regs[MAX_LDM_STM_OPS];
13177   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13178   int order[MAX_LDM_STM_OPS];
13179   rtx base_reg_rtx = NULL;
13180   int base_reg = -1;
13181   int i, ldm_case;
13182
13183   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13184      easily extended if required.  */
13185   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13186
13187   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13188
13189   /* Loop over the operands and check that the memory references are
13190      suitable (i.e. immediate offsets from the same base register).  At
13191      the same time, extract the target register, and the memory
13192      offsets.  */
13193   for (i = 0; i < nops; i++)
13194     {
13195       rtx reg;
13196       rtx offset;
13197
13198       /* Convert a subreg of a mem into the mem itself.  */
13199       if (GET_CODE (operands[nops + i]) == SUBREG)
13200         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13201
13202       gcc_assert (MEM_P (operands[nops + i]));
13203
13204       /* Don't reorder volatile memory references; it doesn't seem worth
13205          looking for the case where the order is ok anyway.  */
13206       if (MEM_VOLATILE_P (operands[nops + i]))
13207         return 0;
13208
13209       offset = const0_rtx;
13210
13211       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13212            || (GET_CODE (reg) == SUBREG
13213                && REG_P (reg = SUBREG_REG (reg))))
13214           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13215               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13216                   || (GET_CODE (reg) == SUBREG
13217                       && REG_P (reg = SUBREG_REG (reg))))
13218               && (CONST_INT_P (offset
13219                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13220         {
13221           if (i == 0)
13222             {
13223               base_reg = REGNO (reg);
13224               base_reg_rtx = reg;
13225               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13226                 return 0;
13227             }
13228           else if (base_reg != (int) REGNO (reg))
13229             /* Not addressed from the same base register.  */
13230             return 0;
13231
13232           unsorted_regs[i] = (REG_P (operands[i])
13233                               ? REGNO (operands[i])
13234                               : REGNO (SUBREG_REG (operands[i])));
13235
13236           /* If it isn't an integer register, or if it overwrites the
13237              base register but isn't the last insn in the list, then
13238              we can't do this.  */
13239           if (unsorted_regs[i] < 0
13240               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13241               || unsorted_regs[i] > 14
13242               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13243             return 0;
13244
13245           /* Don't allow SP to be loaded unless it is also the base
13246              register.  It guarantees that SP is reset correctly when
13247              an LDM instruction is interrupted.  Otherwise, we might
13248              end up with a corrupt stack.  */
13249           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13250             return 0;
13251
13252           unsorted_offsets[i] = INTVAL (offset);
13253           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13254             order[0] = i;
13255         }
13256       else
13257         /* Not a suitable memory address.  */
13258         return 0;
13259     }
13260
13261   /* All the useful information has now been extracted from the
13262      operands into unsorted_regs and unsorted_offsets; additionally,
13263      order[0] has been set to the lowest offset in the list.  Sort
13264      the offsets into order, verifying that they are adjacent, and
13265      check that the register numbers are ascending.  */
13266   if (!compute_offset_order (nops, unsorted_offsets, order,
13267                              check_regs ? unsorted_regs : NULL))
13268     return 0;
13269
13270   if (saved_order)
13271     memcpy (saved_order, order, sizeof order);
13272
13273   if (base)
13274     {
13275       *base = base_reg;
13276
13277       for (i = 0; i < nops; i++)
13278         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13279
13280       *load_offset = unsorted_offsets[order[0]];
13281     }
13282
13283   if (TARGET_THUMB1
13284       && !peep2_reg_dead_p (nops, base_reg_rtx))
13285     return 0;
13286
13287   if (unsorted_offsets[order[0]] == 0)
13288     ldm_case = 1; /* ldmia */
13289   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13290     ldm_case = 2; /* ldmib */
13291   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13292     ldm_case = 3; /* ldmda */
13293   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13294     ldm_case = 4; /* ldmdb */
13295   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13296            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13297     ldm_case = 5;
13298   else
13299     return 0;
13300
13301   if (!multiple_operation_profitable_p (false, nops,
13302                                         ldm_case == 5
13303                                         ? unsorted_offsets[order[0]] : 0))
13304     return 0;
13305
13306   return ldm_case;
13307 }
13308
13309 /* Used to determine in a peephole whether a sequence of store instructions can
13310    be changed into a store-multiple instruction.
13311    NOPS is the number of separate store instructions we are examining.
13312    NOPS_TOTAL is the total number of instructions recognized by the peephole
13313    pattern.
13314    The first NOPS entries in OPERANDS are the source registers, the next
13315    NOPS entries are memory operands.  If this function is successful, *BASE is
13316    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13317    to the first memory location's offset from that base register.  REGS is an
13318    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13319    likewise filled with the corresponding rtx's.
13320    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13321    numbers to an ascending order of stores.
13322    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13323    from ascending memory locations, and the function verifies that the register
13324    numbers are themselves ascending.  If CHECK_REGS is false, the register
13325    numbers are stored in the order they are found in the operands.  */
13326 static int
13327 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13328                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13329                          HOST_WIDE_INT *load_offset, bool check_regs)
13330 {
13331   int unsorted_regs[MAX_LDM_STM_OPS];
13332   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13333   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13334   int order[MAX_LDM_STM_OPS];
13335   int base_reg = -1;
13336   rtx base_reg_rtx = NULL;
13337   int i, stm_case;
13338
13339   /* Write back of base register is currently only supported for Thumb 1.  */
13340   int base_writeback = TARGET_THUMB1;
13341
13342   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13343      easily extended if required.  */
13344   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13345
13346   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13347
13348   /* Loop over the operands and check that the memory references are
13349      suitable (i.e. immediate offsets from the same base register).  At
13350      the same time, extract the target register, and the memory
13351      offsets.  */
13352   for (i = 0; i < nops; i++)
13353     {
13354       rtx reg;
13355       rtx offset;
13356
13357       /* Convert a subreg of a mem into the mem itself.  */
13358       if (GET_CODE (operands[nops + i]) == SUBREG)
13359         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13360
13361       gcc_assert (MEM_P (operands[nops + i]));
13362
13363       /* Don't reorder volatile memory references; it doesn't seem worth
13364          looking for the case where the order is ok anyway.  */
13365       if (MEM_VOLATILE_P (operands[nops + i]))
13366         return 0;
13367
13368       offset = const0_rtx;
13369
13370       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13371            || (GET_CODE (reg) == SUBREG
13372                && REG_P (reg = SUBREG_REG (reg))))
13373           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13374               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13375                   || (GET_CODE (reg) == SUBREG
13376                       && REG_P (reg = SUBREG_REG (reg))))
13377               && (CONST_INT_P (offset
13378                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13379         {
13380           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13381                                   ? operands[i] : SUBREG_REG (operands[i]));
13382           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13383
13384           if (i == 0)
13385             {
13386               base_reg = REGNO (reg);
13387               base_reg_rtx = reg;
13388               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13389                 return 0;
13390             }
13391           else if (base_reg != (int) REGNO (reg))
13392             /* Not addressed from the same base register.  */
13393             return 0;
13394
13395           /* If it isn't an integer register, then we can't do this.  */
13396           if (unsorted_regs[i] < 0
13397               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13398               /* The effects are unpredictable if the base register is
13399                  both updated and stored.  */
13400               || (base_writeback && unsorted_regs[i] == base_reg)
13401               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13402               || unsorted_regs[i] > 14)
13403             return 0;
13404
13405           unsorted_offsets[i] = INTVAL (offset);
13406           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13407             order[0] = i;
13408         }
13409       else
13410         /* Not a suitable memory address.  */
13411         return 0;
13412     }
13413
13414   /* All the useful information has now been extracted from the
13415      operands into unsorted_regs and unsorted_offsets; additionally,
13416      order[0] has been set to the lowest offset in the list.  Sort
13417      the offsets into order, verifying that they are adjacent, and
13418      check that the register numbers are ascending.  */
13419   if (!compute_offset_order (nops, unsorted_offsets, order,
13420                              check_regs ? unsorted_regs : NULL))
13421     return 0;
13422
13423   if (saved_order)
13424     memcpy (saved_order, order, sizeof order);
13425
13426   if (base)
13427     {
13428       *base = base_reg;
13429
13430       for (i = 0; i < nops; i++)
13431         {
13432           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13433           if (reg_rtxs)
13434             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13435         }
13436
13437       *load_offset = unsorted_offsets[order[0]];
13438     }
13439
13440   if (TARGET_THUMB1
13441       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13442     return 0;
13443
13444   if (unsorted_offsets[order[0]] == 0)
13445     stm_case = 1; /* stmia */
13446   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13447     stm_case = 2; /* stmib */
13448   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13449     stm_case = 3; /* stmda */
13450   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13451     stm_case = 4; /* stmdb */
13452   else
13453     return 0;
13454
13455   if (!multiple_operation_profitable_p (false, nops, 0))
13456     return 0;
13457
13458   return stm_case;
13459 }
13460 \f
13461 /* Routines for use in generating RTL.  */
13462
13463 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13464    the instruction; REGS and MEMS are arrays containing the operands.
13465    BASEREG is the base register to be used in addressing the memory operands.
13466    WBACK_OFFSET is nonzero if the instruction should update the base
13467    register.  */
13468
13469 static rtx
13470 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13471                          HOST_WIDE_INT wback_offset)
13472 {
13473   int i = 0, j;
13474   rtx result;
13475
13476   if (!multiple_operation_profitable_p (false, count, 0))
13477     {
13478       rtx seq;
13479
13480       start_sequence ();
13481
13482       for (i = 0; i < count; i++)
13483         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13484
13485       if (wback_offset != 0)
13486         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13487
13488       seq = get_insns ();
13489       end_sequence ();
13490
13491       return seq;
13492     }
13493
13494   result = gen_rtx_PARALLEL (VOIDmode,
13495                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13496   if (wback_offset != 0)
13497     {
13498       XVECEXP (result, 0, 0)
13499         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13500       i = 1;
13501       count++;
13502     }
13503
13504   for (j = 0; i < count; i++, j++)
13505     XVECEXP (result, 0, i)
13506       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13507
13508   return result;
13509 }
13510
13511 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13512    the instruction; REGS and MEMS are arrays containing the operands.
13513    BASEREG is the base register to be used in addressing the memory operands.
13514    WBACK_OFFSET is nonzero if the instruction should update the base
13515    register.  */
13516
13517 static rtx
13518 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13519                           HOST_WIDE_INT wback_offset)
13520 {
13521   int i = 0, j;
13522   rtx result;
13523
13524   if (GET_CODE (basereg) == PLUS)
13525     basereg = XEXP (basereg, 0);
13526
13527   if (!multiple_operation_profitable_p (false, count, 0))
13528     {
13529       rtx seq;
13530
13531       start_sequence ();
13532
13533       for (i = 0; i < count; i++)
13534         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13535
13536       if (wback_offset != 0)
13537         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13538
13539       seq = get_insns ();
13540       end_sequence ();
13541
13542       return seq;
13543     }
13544
13545   result = gen_rtx_PARALLEL (VOIDmode,
13546                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13547   if (wback_offset != 0)
13548     {
13549       XVECEXP (result, 0, 0)
13550         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13551       i = 1;
13552       count++;
13553     }
13554
13555   for (j = 0; i < count; i++, j++)
13556     XVECEXP (result, 0, i)
13557       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13558
13559   return result;
13560 }
13561
13562 /* Generate either a load-multiple or a store-multiple instruction.  This
13563    function can be used in situations where we can start with a single MEM
13564    rtx and adjust its address upwards.
13565    COUNT is the number of operations in the instruction, not counting a
13566    possible update of the base register.  REGS is an array containing the
13567    register operands.
13568    BASEREG is the base register to be used in addressing the memory operands,
13569    which are constructed from BASEMEM.
13570    WRITE_BACK specifies whether the generated instruction should include an
13571    update of the base register.
13572    OFFSETP is used to pass an offset to and from this function; this offset
13573    is not used when constructing the address (instead BASEMEM should have an
13574    appropriate offset in its address), it is used only for setting
13575    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13576
13577 static rtx
13578 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13579                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13580 {
13581   rtx mems[MAX_LDM_STM_OPS];
13582   HOST_WIDE_INT offset = *offsetp;
13583   int i;
13584
13585   gcc_assert (count <= MAX_LDM_STM_OPS);
13586
13587   if (GET_CODE (basereg) == PLUS)
13588     basereg = XEXP (basereg, 0);
13589
13590   for (i = 0; i < count; i++)
13591     {
13592       rtx addr = plus_constant (Pmode, basereg, i * 4);
13593       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13594       offset += 4;
13595     }
13596
13597   if (write_back)
13598     *offsetp = offset;
13599
13600   if (is_load)
13601     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13602                                     write_back ? 4 * count : 0);
13603   else
13604     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13605                                      write_back ? 4 * count : 0);
13606 }
13607
13608 rtx
13609 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13610                        rtx basemem, HOST_WIDE_INT *offsetp)
13611 {
13612   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13613                               offsetp);
13614 }
13615
13616 rtx
13617 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13618                         rtx basemem, HOST_WIDE_INT *offsetp)
13619 {
13620   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13621                               offsetp);
13622 }
13623
13624 /* Called from a peephole2 expander to turn a sequence of loads into an
13625    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13626    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13627    is true if we can reorder the registers because they are used commutatively
13628    subsequently.
13629    Returns true iff we could generate a new instruction.  */
13630
13631 bool
13632 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13633 {
13634   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13635   rtx mems[MAX_LDM_STM_OPS];
13636   int i, j, base_reg;
13637   rtx base_reg_rtx;
13638   HOST_WIDE_INT offset;
13639   int write_back = FALSE;
13640   int ldm_case;
13641   rtx addr;
13642
13643   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13644                                      &base_reg, &offset, !sort_regs);
13645
13646   if (ldm_case == 0)
13647     return false;
13648
13649   if (sort_regs)
13650     for (i = 0; i < nops - 1; i++)
13651       for (j = i + 1; j < nops; j++)
13652         if (regs[i] > regs[j])
13653           {
13654             int t = regs[i];
13655             regs[i] = regs[j];
13656             regs[j] = t;
13657           }
13658   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13659
13660   if (TARGET_THUMB1)
13661     {
13662       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13663       gcc_assert (ldm_case == 1 || ldm_case == 5);
13664       write_back = TRUE;
13665     }
13666
13667   if (ldm_case == 5)
13668     {
13669       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13670       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13671       offset = 0;
13672       if (!TARGET_THUMB1)
13673         base_reg_rtx = newbase;
13674     }
13675
13676   for (i = 0; i < nops; i++)
13677     {
13678       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13679       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13680                                               SImode, addr, 0);
13681     }
13682   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13683                                       write_back ? offset + i * 4 : 0));
13684   return true;
13685 }
13686
13687 /* Called from a peephole2 expander to turn a sequence of stores into an
13688    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13689    NOPS indicates how many separate stores we are trying to combine.
13690    Returns true iff we could generate a new instruction.  */
13691
13692 bool
13693 gen_stm_seq (rtx *operands, int nops)
13694 {
13695   int i;
13696   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13697   rtx mems[MAX_LDM_STM_OPS];
13698   int base_reg;
13699   rtx base_reg_rtx;
13700   HOST_WIDE_INT offset;
13701   int write_back = FALSE;
13702   int stm_case;
13703   rtx addr;
13704   bool base_reg_dies;
13705
13706   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13707                                       mem_order, &base_reg, &offset, true);
13708
13709   if (stm_case == 0)
13710     return false;
13711
13712   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13713
13714   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13715   if (TARGET_THUMB1)
13716     {
13717       gcc_assert (base_reg_dies);
13718       write_back = TRUE;
13719     }
13720
13721   if (stm_case == 5)
13722     {
13723       gcc_assert (base_reg_dies);
13724       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13725       offset = 0;
13726     }
13727
13728   addr = plus_constant (Pmode, base_reg_rtx, offset);
13729
13730   for (i = 0; i < nops; i++)
13731     {
13732       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13733       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13734                                               SImode, addr, 0);
13735     }
13736   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13737                                        write_back ? offset + i * 4 : 0));
13738   return true;
13739 }
13740
13741 /* Called from a peephole2 expander to turn a sequence of stores that are
13742    preceded by constant loads into an STM instruction.  OPERANDS are the
13743    operands found by the peephole matcher; NOPS indicates how many
13744    separate stores we are trying to combine; there are 2 * NOPS
13745    instructions in the peephole.
13746    Returns true iff we could generate a new instruction.  */
13747
13748 bool
13749 gen_const_stm_seq (rtx *operands, int nops)
13750 {
13751   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13752   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13753   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13754   rtx mems[MAX_LDM_STM_OPS];
13755   int base_reg;
13756   rtx base_reg_rtx;
13757   HOST_WIDE_INT offset;
13758   int write_back = FALSE;
13759   int stm_case;
13760   rtx addr;
13761   bool base_reg_dies;
13762   int i, j;
13763   HARD_REG_SET allocated;
13764
13765   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13766                                       mem_order, &base_reg, &offset, false);
13767
13768   if (stm_case == 0)
13769     return false;
13770
13771   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13772
13773   /* If the same register is used more than once, try to find a free
13774      register.  */
13775   CLEAR_HARD_REG_SET (allocated);
13776   for (i = 0; i < nops; i++)
13777     {
13778       for (j = i + 1; j < nops; j++)
13779         if (regs[i] == regs[j])
13780           {
13781             rtx t = peep2_find_free_register (0, nops * 2,
13782                                               TARGET_THUMB1 ? "l" : "r",
13783                                               SImode, &allocated);
13784             if (t == NULL_RTX)
13785               return false;
13786             reg_rtxs[i] = t;
13787             regs[i] = REGNO (t);
13788           }
13789     }
13790
13791   /* Compute an ordering that maps the register numbers to an ascending
13792      sequence.  */
13793   reg_order[0] = 0;
13794   for (i = 0; i < nops; i++)
13795     if (regs[i] < regs[reg_order[0]])
13796       reg_order[0] = i;
13797
13798   for (i = 1; i < nops; i++)
13799     {
13800       int this_order = reg_order[i - 1];
13801       for (j = 0; j < nops; j++)
13802         if (regs[j] > regs[reg_order[i - 1]]
13803             && (this_order == reg_order[i - 1]
13804                 || regs[j] < regs[this_order]))
13805           this_order = j;
13806       reg_order[i] = this_order;
13807     }
13808
13809   /* Ensure that registers that must be live after the instruction end
13810      up with the correct value.  */
13811   for (i = 0; i < nops; i++)
13812     {
13813       int this_order = reg_order[i];
13814       if ((this_order != mem_order[i]
13815            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13816           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13817         return false;
13818     }
13819
13820   /* Load the constants.  */
13821   for (i = 0; i < nops; i++)
13822     {
13823       rtx op = operands[2 * nops + mem_order[i]];
13824       sorted_regs[i] = regs[reg_order[i]];
13825       emit_move_insn (reg_rtxs[reg_order[i]], op);
13826     }
13827
13828   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13829
13830   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13831   if (TARGET_THUMB1)
13832     {
13833       gcc_assert (base_reg_dies);
13834       write_back = TRUE;
13835     }
13836
13837   if (stm_case == 5)
13838     {
13839       gcc_assert (base_reg_dies);
13840       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13841       offset = 0;
13842     }
13843
13844   addr = plus_constant (Pmode, base_reg_rtx, offset);
13845
13846   for (i = 0; i < nops; i++)
13847     {
13848       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13849       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13850                                               SImode, addr, 0);
13851     }
13852   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13853                                        write_back ? offset + i * 4 : 0));
13854   return true;
13855 }
13856
13857 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13858    unaligned copies on processors which support unaligned semantics for those
13859    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13860    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13861    An interleave factor of 1 (the minimum) will perform no interleaving.
13862    Load/store multiple are used for aligned addresses where possible.  */
13863
13864 static void
13865 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13866                                    HOST_WIDE_INT length,
13867                                    unsigned int interleave_factor)
13868 {
13869   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13870   int *regnos = XALLOCAVEC (int, interleave_factor);
13871   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13872   HOST_WIDE_INT i, j;
13873   HOST_WIDE_INT remaining = length, words;
13874   rtx halfword_tmp = NULL, byte_tmp = NULL;
13875   rtx dst, src;
13876   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13877   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13878   HOST_WIDE_INT srcoffset, dstoffset;
13879   HOST_WIDE_INT src_autoinc, dst_autoinc;
13880   rtx mem, addr;
13881
13882   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13883
13884   /* Use hard registers if we have aligned source or destination so we can use
13885      load/store multiple with contiguous registers.  */
13886   if (dst_aligned || src_aligned)
13887     for (i = 0; i < interleave_factor; i++)
13888       regs[i] = gen_rtx_REG (SImode, i);
13889   else
13890     for (i = 0; i < interleave_factor; i++)
13891       regs[i] = gen_reg_rtx (SImode);
13892
13893   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13894   src = copy_addr_to_reg (XEXP (srcbase, 0));
13895
13896   srcoffset = dstoffset = 0;
13897
13898   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13899      For copying the last bytes we want to subtract this offset again.  */
13900   src_autoinc = dst_autoinc = 0;
13901
13902   for (i = 0; i < interleave_factor; i++)
13903     regnos[i] = i;
13904
13905   /* Copy BLOCK_SIZE_BYTES chunks.  */
13906
13907   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13908     {
13909       /* Load words.  */
13910       if (src_aligned && interleave_factor > 1)
13911         {
13912           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13913                                             TRUE, srcbase, &srcoffset));
13914           src_autoinc += UNITS_PER_WORD * interleave_factor;
13915         }
13916       else
13917         {
13918           for (j = 0; j < interleave_factor; j++)
13919             {
13920               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13921                                                  - src_autoinc));
13922               mem = adjust_automodify_address (srcbase, SImode, addr,
13923                                                srcoffset + j * UNITS_PER_WORD);
13924               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13925             }
13926           srcoffset += block_size_bytes;
13927         }
13928
13929       /* Store words.  */
13930       if (dst_aligned && interleave_factor > 1)
13931         {
13932           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13933                                              TRUE, dstbase, &dstoffset));
13934           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13935         }
13936       else
13937         {
13938           for (j = 0; j < interleave_factor; j++)
13939             {
13940               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13941                                                  - dst_autoinc));
13942               mem = adjust_automodify_address (dstbase, SImode, addr,
13943                                                dstoffset + j * UNITS_PER_WORD);
13944               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13945             }
13946           dstoffset += block_size_bytes;
13947         }
13948
13949       remaining -= block_size_bytes;
13950     }
13951
13952   /* Copy any whole words left (note these aren't interleaved with any
13953      subsequent halfword/byte load/stores in the interests of simplicity).  */
13954
13955   words = remaining / UNITS_PER_WORD;
13956
13957   gcc_assert (words < interleave_factor);
13958
13959   if (src_aligned && words > 1)
13960     {
13961       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13962                                         &srcoffset));
13963       src_autoinc += UNITS_PER_WORD * words;
13964     }
13965   else
13966     {
13967       for (j = 0; j < words; j++)
13968         {
13969           addr = plus_constant (Pmode, src,
13970                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13971           mem = adjust_automodify_address (srcbase, SImode, addr,
13972                                            srcoffset + j * UNITS_PER_WORD);
13973           if (src_aligned)
13974             emit_move_insn (regs[j], mem);
13975           else
13976             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13977         }
13978       srcoffset += words * UNITS_PER_WORD;
13979     }
13980
13981   if (dst_aligned && words > 1)
13982     {
13983       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13984                                          &dstoffset));
13985       dst_autoinc += words * UNITS_PER_WORD;
13986     }
13987   else
13988     {
13989       for (j = 0; j < words; j++)
13990         {
13991           addr = plus_constant (Pmode, dst,
13992                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13993           mem = adjust_automodify_address (dstbase, SImode, addr,
13994                                            dstoffset + j * UNITS_PER_WORD);
13995           if (dst_aligned)
13996             emit_move_insn (mem, regs[j]);
13997           else
13998             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13999         }
14000       dstoffset += words * UNITS_PER_WORD;
14001     }
14002
14003   remaining -= words * UNITS_PER_WORD;
14004
14005   gcc_assert (remaining < 4);
14006
14007   /* Copy a halfword if necessary.  */
14008
14009   if (remaining >= 2)
14010     {
14011       halfword_tmp = gen_reg_rtx (SImode);
14012
14013       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14014       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14015       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14016
14017       /* Either write out immediately, or delay until we've loaded the last
14018          byte, depending on interleave factor.  */
14019       if (interleave_factor == 1)
14020         {
14021           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14022           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14023           emit_insn (gen_unaligned_storehi (mem,
14024                        gen_lowpart (HImode, halfword_tmp)));
14025           halfword_tmp = NULL;
14026           dstoffset += 2;
14027         }
14028
14029       remaining -= 2;
14030       srcoffset += 2;
14031     }
14032
14033   gcc_assert (remaining < 2);
14034
14035   /* Copy last byte.  */
14036
14037   if ((remaining & 1) != 0)
14038     {
14039       byte_tmp = gen_reg_rtx (SImode);
14040
14041       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14042       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14043       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14044
14045       if (interleave_factor == 1)
14046         {
14047           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14048           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14049           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14050           byte_tmp = NULL;
14051           dstoffset++;
14052         }
14053
14054       remaining--;
14055       srcoffset++;
14056     }
14057
14058   /* Store last halfword if we haven't done so already.  */
14059
14060   if (halfword_tmp)
14061     {
14062       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14063       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14064       emit_insn (gen_unaligned_storehi (mem,
14065                    gen_lowpart (HImode, halfword_tmp)));
14066       dstoffset += 2;
14067     }
14068
14069   /* Likewise for last byte.  */
14070
14071   if (byte_tmp)
14072     {
14073       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14074       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14075       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14076       dstoffset++;
14077     }
14078
14079   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14080 }
14081
14082 /* From mips_adjust_block_mem:
14083
14084    Helper function for doing a loop-based block operation on memory
14085    reference MEM.  Each iteration of the loop will operate on LENGTH
14086    bytes of MEM.
14087
14088    Create a new base register for use within the loop and point it to
14089    the start of MEM.  Create a new memory reference that uses this
14090    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14091
14092 static void
14093 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14094                       rtx *loop_mem)
14095 {
14096   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14097
14098   /* Although the new mem does not refer to a known location,
14099      it does keep up to LENGTH bytes of alignment.  */
14100   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14101   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14102 }
14103
14104 /* From mips_block_move_loop:
14105
14106    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14107    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14108    the memory regions do not overlap.  */
14109
14110 static void
14111 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14112                                unsigned int interleave_factor,
14113                                HOST_WIDE_INT bytes_per_iter)
14114 {
14115   rtx src_reg, dest_reg, final_src, test;
14116   HOST_WIDE_INT leftover;
14117
14118   leftover = length % bytes_per_iter;
14119   length -= leftover;
14120
14121   /* Create registers and memory references for use within the loop.  */
14122   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14123   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14124
14125   /* Calculate the value that SRC_REG should have after the last iteration of
14126      the loop.  */
14127   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14128                                    0, 0, OPTAB_WIDEN);
14129
14130   /* Emit the start of the loop.  */
14131   rtx_code_label *label = gen_label_rtx ();
14132   emit_label (label);
14133
14134   /* Emit the loop body.  */
14135   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14136                                      interleave_factor);
14137
14138   /* Move on to the next block.  */
14139   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14140   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14141
14142   /* Emit the loop condition.  */
14143   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14144   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14145
14146   /* Mop up any left-over bytes.  */
14147   if (leftover)
14148     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14149 }
14150
14151 /* Emit a block move when either the source or destination is unaligned (not
14152    aligned to a four-byte boundary).  This may need further tuning depending on
14153    core type, optimize_size setting, etc.  */
14154
14155 static int
14156 arm_movmemqi_unaligned (rtx *operands)
14157 {
14158   HOST_WIDE_INT length = INTVAL (operands[2]);
14159
14160   if (optimize_size)
14161     {
14162       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14163       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14164       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14165          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14166          or dst_aligned though: allow more interleaving in those cases since the
14167          resulting code can be smaller.  */
14168       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14169       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14170
14171       if (length > 12)
14172         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14173                                        interleave_factor, bytes_per_iter);
14174       else
14175         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14176                                            interleave_factor);
14177     }
14178   else
14179     {
14180       /* Note that the loop created by arm_block_move_unaligned_loop may be
14181          subject to loop unrolling, which makes tuning this condition a little
14182          redundant.  */
14183       if (length > 32)
14184         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14185       else
14186         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14187     }
14188
14189   return 1;
14190 }
14191
14192 int
14193 arm_gen_movmemqi (rtx *operands)
14194 {
14195   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14196   HOST_WIDE_INT srcoffset, dstoffset;
14197   rtx src, dst, srcbase, dstbase;
14198   rtx part_bytes_reg = NULL;
14199   rtx mem;
14200
14201   if (!CONST_INT_P (operands[2])
14202       || !CONST_INT_P (operands[3])
14203       || INTVAL (operands[2]) > 64)
14204     return 0;
14205
14206   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14207     return arm_movmemqi_unaligned (operands);
14208
14209   if (INTVAL (operands[3]) & 3)
14210     return 0;
14211
14212   dstbase = operands[0];
14213   srcbase = operands[1];
14214
14215   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14216   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14217
14218   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14219   out_words_to_go = INTVAL (operands[2]) / 4;
14220   last_bytes = INTVAL (operands[2]) & 3;
14221   dstoffset = srcoffset = 0;
14222
14223   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14224     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14225
14226   while (in_words_to_go >= 2)
14227     {
14228       if (in_words_to_go > 4)
14229         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14230                                           TRUE, srcbase, &srcoffset));
14231       else
14232         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14233                                           src, FALSE, srcbase,
14234                                           &srcoffset));
14235
14236       if (out_words_to_go)
14237         {
14238           if (out_words_to_go > 4)
14239             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14240                                                TRUE, dstbase, &dstoffset));
14241           else if (out_words_to_go != 1)
14242             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14243                                                out_words_to_go, dst,
14244                                                (last_bytes == 0
14245                                                 ? FALSE : TRUE),
14246                                                dstbase, &dstoffset));
14247           else
14248             {
14249               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14250               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14251               if (last_bytes != 0)
14252                 {
14253                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14254                   dstoffset += 4;
14255                 }
14256             }
14257         }
14258
14259       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14260       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14261     }
14262
14263   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14264   if (out_words_to_go)
14265     {
14266       rtx sreg;
14267
14268       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14269       sreg = copy_to_reg (mem);
14270
14271       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14272       emit_move_insn (mem, sreg);
14273       in_words_to_go--;
14274
14275       gcc_assert (!in_words_to_go);     /* Sanity check */
14276     }
14277
14278   if (in_words_to_go)
14279     {
14280       gcc_assert (in_words_to_go > 0);
14281
14282       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14283       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14284     }
14285
14286   gcc_assert (!last_bytes || part_bytes_reg);
14287
14288   if (BYTES_BIG_ENDIAN && last_bytes)
14289     {
14290       rtx tmp = gen_reg_rtx (SImode);
14291
14292       /* The bytes we want are in the top end of the word.  */
14293       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14294                               GEN_INT (8 * (4 - last_bytes))));
14295       part_bytes_reg = tmp;
14296
14297       while (last_bytes)
14298         {
14299           mem = adjust_automodify_address (dstbase, QImode,
14300                                            plus_constant (Pmode, dst,
14301                                                           last_bytes - 1),
14302                                            dstoffset + last_bytes - 1);
14303           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14304
14305           if (--last_bytes)
14306             {
14307               tmp = gen_reg_rtx (SImode);
14308               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14309               part_bytes_reg = tmp;
14310             }
14311         }
14312
14313     }
14314   else
14315     {
14316       if (last_bytes > 1)
14317         {
14318           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14319           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14320           last_bytes -= 2;
14321           if (last_bytes)
14322             {
14323               rtx tmp = gen_reg_rtx (SImode);
14324               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14325               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14326               part_bytes_reg = tmp;
14327               dstoffset += 2;
14328             }
14329         }
14330
14331       if (last_bytes)
14332         {
14333           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14334           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14335         }
14336     }
14337
14338   return 1;
14339 }
14340
14341 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14342 by mode size.  */
14343 inline static rtx
14344 next_consecutive_mem (rtx mem)
14345 {
14346   machine_mode mode = GET_MODE (mem);
14347   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14348   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14349
14350   return adjust_automodify_address (mem, mode, addr, offset);
14351 }
14352
14353 /* Copy using LDRD/STRD instructions whenever possible.
14354    Returns true upon success. */
14355 bool
14356 gen_movmem_ldrd_strd (rtx *operands)
14357 {
14358   unsigned HOST_WIDE_INT len;
14359   HOST_WIDE_INT align;
14360   rtx src, dst, base;
14361   rtx reg0;
14362   bool src_aligned, dst_aligned;
14363   bool src_volatile, dst_volatile;
14364
14365   gcc_assert (CONST_INT_P (operands[2]));
14366   gcc_assert (CONST_INT_P (operands[3]));
14367
14368   len = UINTVAL (operands[2]);
14369   if (len > 64)
14370     return false;
14371
14372   /* Maximum alignment we can assume for both src and dst buffers.  */
14373   align = INTVAL (operands[3]);
14374
14375   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14376     return false;
14377
14378   /* Place src and dst addresses in registers
14379      and update the corresponding mem rtx.  */
14380   dst = operands[0];
14381   dst_volatile = MEM_VOLATILE_P (dst);
14382   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14383   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14384   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14385
14386   src = operands[1];
14387   src_volatile = MEM_VOLATILE_P (src);
14388   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14389   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14390   src = adjust_automodify_address (src, VOIDmode, base, 0);
14391
14392   if (!unaligned_access && !(src_aligned && dst_aligned))
14393     return false;
14394
14395   if (src_volatile || dst_volatile)
14396     return false;
14397
14398   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14399   if (!(dst_aligned || src_aligned))
14400     return arm_gen_movmemqi (operands);
14401
14402   /* If the either src or dst is unaligned we'll be accessing it as pairs
14403      of unaligned SImode accesses.  Otherwise we can generate DImode
14404      ldrd/strd instructions.  */
14405   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14406   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14407
14408   while (len >= 8)
14409     {
14410       len -= 8;
14411       reg0 = gen_reg_rtx (DImode);
14412       rtx low_reg = NULL_RTX;
14413       rtx hi_reg = NULL_RTX;
14414
14415       if (!src_aligned || !dst_aligned)
14416         {
14417           low_reg = gen_lowpart (SImode, reg0);
14418           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14419         }
14420       if (src_aligned)
14421         emit_move_insn (reg0, src);
14422       else
14423         {
14424           emit_insn (gen_unaligned_loadsi (low_reg, src));
14425           src = next_consecutive_mem (src);
14426           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14427         }
14428
14429       if (dst_aligned)
14430         emit_move_insn (dst, reg0);
14431       else
14432         {
14433           emit_insn (gen_unaligned_storesi (dst, low_reg));
14434           dst = next_consecutive_mem (dst);
14435           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14436         }
14437
14438       src = next_consecutive_mem (src);
14439       dst = next_consecutive_mem (dst);
14440     }
14441
14442   gcc_assert (len < 8);
14443   if (len >= 4)
14444     {
14445       /* More than a word but less than a double-word to copy.  Copy a word.  */
14446       reg0 = gen_reg_rtx (SImode);
14447       src = adjust_address (src, SImode, 0);
14448       dst = adjust_address (dst, SImode, 0);
14449       if (src_aligned)
14450         emit_move_insn (reg0, src);
14451       else
14452         emit_insn (gen_unaligned_loadsi (reg0, src));
14453
14454       if (dst_aligned)
14455         emit_move_insn (dst, reg0);
14456       else
14457         emit_insn (gen_unaligned_storesi (dst, reg0));
14458
14459       src = next_consecutive_mem (src);
14460       dst = next_consecutive_mem (dst);
14461       len -= 4;
14462     }
14463
14464   if (len == 0)
14465     return true;
14466
14467   /* Copy the remaining bytes.  */
14468   if (len >= 2)
14469     {
14470       dst = adjust_address (dst, HImode, 0);
14471       src = adjust_address (src, HImode, 0);
14472       reg0 = gen_reg_rtx (SImode);
14473       if (src_aligned)
14474         emit_insn (gen_zero_extendhisi2 (reg0, src));
14475       else
14476         emit_insn (gen_unaligned_loadhiu (reg0, src));
14477
14478       if (dst_aligned)
14479         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14480       else
14481         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14482
14483       src = next_consecutive_mem (src);
14484       dst = next_consecutive_mem (dst);
14485       if (len == 2)
14486         return true;
14487     }
14488
14489   dst = adjust_address (dst, QImode, 0);
14490   src = adjust_address (src, QImode, 0);
14491   reg0 = gen_reg_rtx (QImode);
14492   emit_move_insn (reg0, src);
14493   emit_move_insn (dst, reg0);
14494   return true;
14495 }
14496
14497 /* Select a dominance comparison mode if possible for a test of the general
14498    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14499    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14500    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14501    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14502    In all cases OP will be either EQ or NE, but we don't need to know which
14503    here.  If we are unable to support a dominance comparison we return
14504    CC mode.  This will then fail to match for the RTL expressions that
14505    generate this call.  */
14506 machine_mode
14507 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14508 {
14509   enum rtx_code cond1, cond2;
14510   int swapped = 0;
14511
14512   /* Currently we will probably get the wrong result if the individual
14513      comparisons are not simple.  This also ensures that it is safe to
14514      reverse a comparison if necessary.  */
14515   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14516        != CCmode)
14517       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14518           != CCmode))
14519     return CCmode;
14520
14521   /* The if_then_else variant of this tests the second condition if the
14522      first passes, but is true if the first fails.  Reverse the first
14523      condition to get a true "inclusive-or" expression.  */
14524   if (cond_or == DOM_CC_NX_OR_Y)
14525     cond1 = reverse_condition (cond1);
14526
14527   /* If the comparisons are not equal, and one doesn't dominate the other,
14528      then we can't do this.  */
14529   if (cond1 != cond2
14530       && !comparison_dominates_p (cond1, cond2)
14531       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14532     return CCmode;
14533
14534   if (swapped)
14535     std::swap (cond1, cond2);
14536
14537   switch (cond1)
14538     {
14539     case EQ:
14540       if (cond_or == DOM_CC_X_AND_Y)
14541         return CC_DEQmode;
14542
14543       switch (cond2)
14544         {
14545         case EQ: return CC_DEQmode;
14546         case LE: return CC_DLEmode;
14547         case LEU: return CC_DLEUmode;
14548         case GE: return CC_DGEmode;
14549         case GEU: return CC_DGEUmode;
14550         default: gcc_unreachable ();
14551         }
14552
14553     case LT:
14554       if (cond_or == DOM_CC_X_AND_Y)
14555         return CC_DLTmode;
14556
14557       switch (cond2)
14558         {
14559         case  LT:
14560             return CC_DLTmode;
14561         case LE:
14562           return CC_DLEmode;
14563         case NE:
14564           return CC_DNEmode;
14565         default:
14566           gcc_unreachable ();
14567         }
14568
14569     case GT:
14570       if (cond_or == DOM_CC_X_AND_Y)
14571         return CC_DGTmode;
14572
14573       switch (cond2)
14574         {
14575         case GT:
14576           return CC_DGTmode;
14577         case GE:
14578           return CC_DGEmode;
14579         case NE:
14580           return CC_DNEmode;
14581         default:
14582           gcc_unreachable ();
14583         }
14584
14585     case LTU:
14586       if (cond_or == DOM_CC_X_AND_Y)
14587         return CC_DLTUmode;
14588
14589       switch (cond2)
14590         {
14591         case LTU:
14592           return CC_DLTUmode;
14593         case LEU:
14594           return CC_DLEUmode;
14595         case NE:
14596           return CC_DNEmode;
14597         default:
14598           gcc_unreachable ();
14599         }
14600
14601     case GTU:
14602       if (cond_or == DOM_CC_X_AND_Y)
14603         return CC_DGTUmode;
14604
14605       switch (cond2)
14606         {
14607         case GTU:
14608           return CC_DGTUmode;
14609         case GEU:
14610           return CC_DGEUmode;
14611         case NE:
14612           return CC_DNEmode;
14613         default:
14614           gcc_unreachable ();
14615         }
14616
14617     /* The remaining cases only occur when both comparisons are the
14618        same.  */
14619     case NE:
14620       gcc_assert (cond1 == cond2);
14621       return CC_DNEmode;
14622
14623     case LE:
14624       gcc_assert (cond1 == cond2);
14625       return CC_DLEmode;
14626
14627     case GE:
14628       gcc_assert (cond1 == cond2);
14629       return CC_DGEmode;
14630
14631     case LEU:
14632       gcc_assert (cond1 == cond2);
14633       return CC_DLEUmode;
14634
14635     case GEU:
14636       gcc_assert (cond1 == cond2);
14637       return CC_DGEUmode;
14638
14639     default:
14640       gcc_unreachable ();
14641     }
14642 }
14643
14644 machine_mode
14645 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14646 {
14647   /* All floating point compares return CCFP if it is an equality
14648      comparison, and CCFPE otherwise.  */
14649   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14650     {
14651       switch (op)
14652         {
14653         case EQ:
14654         case NE:
14655         case UNORDERED:
14656         case ORDERED:
14657         case UNLT:
14658         case UNLE:
14659         case UNGT:
14660         case UNGE:
14661         case UNEQ:
14662         case LTGT:
14663           return CCFPmode;
14664
14665         case LT:
14666         case LE:
14667         case GT:
14668         case GE:
14669           return CCFPEmode;
14670
14671         default:
14672           gcc_unreachable ();
14673         }
14674     }
14675
14676   /* A compare with a shifted operand.  Because of canonicalization, the
14677      comparison will have to be swapped when we emit the assembler.  */
14678   if (GET_MODE (y) == SImode
14679       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14680       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14681           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14682           || GET_CODE (x) == ROTATERT))
14683     return CC_SWPmode;
14684
14685   /* This operation is performed swapped, but since we only rely on the Z
14686      flag we don't need an additional mode.  */
14687   if (GET_MODE (y) == SImode
14688       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14689       && GET_CODE (x) == NEG
14690       && (op == EQ || op == NE))
14691     return CC_Zmode;
14692
14693   /* This is a special case that is used by combine to allow a
14694      comparison of a shifted byte load to be split into a zero-extend
14695      followed by a comparison of the shifted integer (only valid for
14696      equalities and unsigned inequalities).  */
14697   if (GET_MODE (x) == SImode
14698       && GET_CODE (x) == ASHIFT
14699       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14700       && GET_CODE (XEXP (x, 0)) == SUBREG
14701       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14702       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14703       && (op == EQ || op == NE
14704           || op == GEU || op == GTU || op == LTU || op == LEU)
14705       && CONST_INT_P (y))
14706     return CC_Zmode;
14707
14708   /* A construct for a conditional compare, if the false arm contains
14709      0, then both conditions must be true, otherwise either condition
14710      must be true.  Not all conditions are possible, so CCmode is
14711      returned if it can't be done.  */
14712   if (GET_CODE (x) == IF_THEN_ELSE
14713       && (XEXP (x, 2) == const0_rtx
14714           || XEXP (x, 2) == const1_rtx)
14715       && COMPARISON_P (XEXP (x, 0))
14716       && COMPARISON_P (XEXP (x, 1)))
14717     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14718                                          INTVAL (XEXP (x, 2)));
14719
14720   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14721   if (GET_CODE (x) == AND
14722       && (op == EQ || op == NE)
14723       && COMPARISON_P (XEXP (x, 0))
14724       && COMPARISON_P (XEXP (x, 1)))
14725     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14726                                          DOM_CC_X_AND_Y);
14727
14728   if (GET_CODE (x) == IOR
14729       && (op == EQ || op == NE)
14730       && COMPARISON_P (XEXP (x, 0))
14731       && COMPARISON_P (XEXP (x, 1)))
14732     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14733                                          DOM_CC_X_OR_Y);
14734
14735   /* An operation (on Thumb) where we want to test for a single bit.
14736      This is done by shifting that bit up into the top bit of a
14737      scratch register; we can then branch on the sign bit.  */
14738   if (TARGET_THUMB1
14739       && GET_MODE (x) == SImode
14740       && (op == EQ || op == NE)
14741       && GET_CODE (x) == ZERO_EXTRACT
14742       && XEXP (x, 1) == const1_rtx)
14743     return CC_Nmode;
14744
14745   /* An operation that sets the condition codes as a side-effect, the
14746      V flag is not set correctly, so we can only use comparisons where
14747      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14748      instead.)  */
14749   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14750   if (GET_MODE (x) == SImode
14751       && y == const0_rtx
14752       && (op == EQ || op == NE || op == LT || op == GE)
14753       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14754           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14755           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14756           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14757           || GET_CODE (x) == LSHIFTRT
14758           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14759           || GET_CODE (x) == ROTATERT
14760           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14761     return CC_NOOVmode;
14762
14763   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14764     return CC_Zmode;
14765
14766   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14767       && GET_CODE (x) == PLUS
14768       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14769     return CC_Cmode;
14770
14771   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14772     {
14773       switch (op)
14774         {
14775         case EQ:
14776         case NE:
14777           /* A DImode comparison against zero can be implemented by
14778              or'ing the two halves together.  */
14779           if (y == const0_rtx)
14780             return CC_Zmode;
14781
14782           /* We can do an equality test in three Thumb instructions.  */
14783           if (!TARGET_32BIT)
14784             return CC_Zmode;
14785
14786           /* FALLTHROUGH */
14787
14788         case LTU:
14789         case LEU:
14790         case GTU:
14791         case GEU:
14792           /* DImode unsigned comparisons can be implemented by cmp +
14793              cmpeq without a scratch register.  Not worth doing in
14794              Thumb-2.  */
14795           if (TARGET_32BIT)
14796             return CC_CZmode;
14797
14798           /* FALLTHROUGH */
14799
14800         case LT:
14801         case LE:
14802         case GT:
14803         case GE:
14804           /* DImode signed and unsigned comparisons can be implemented
14805              by cmp + sbcs with a scratch register, but that does not
14806              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14807           gcc_assert (op != EQ && op != NE);
14808           return CC_NCVmode;
14809
14810         default:
14811           gcc_unreachable ();
14812         }
14813     }
14814
14815   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14816     return GET_MODE (x);
14817
14818   return CCmode;
14819 }
14820
14821 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14822    return the rtx for register 0 in the proper mode.  FP means this is a
14823    floating point compare: I don't think that it is needed on the arm.  */
14824 rtx
14825 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14826 {
14827   machine_mode mode;
14828   rtx cc_reg;
14829   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14830
14831   /* We might have X as a constant, Y as a register because of the predicates
14832      used for cmpdi.  If so, force X to a register here.  */
14833   if (dimode_comparison && !REG_P (x))
14834     x = force_reg (DImode, x);
14835
14836   mode = SELECT_CC_MODE (code, x, y);
14837   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14838
14839   if (dimode_comparison
14840       && mode != CC_CZmode)
14841     {
14842       rtx clobber, set;
14843
14844       /* To compare two non-zero values for equality, XOR them and
14845          then compare against zero.  Not used for ARM mode; there
14846          CC_CZmode is cheaper.  */
14847       if (mode == CC_Zmode && y != const0_rtx)
14848         {
14849           gcc_assert (!reload_completed);
14850           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14851           y = const0_rtx;
14852         }
14853
14854       /* A scratch register is required.  */
14855       if (reload_completed)
14856         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14857       else
14858         scratch = gen_rtx_SCRATCH (SImode);
14859
14860       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14861       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14862       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14863     }
14864   else
14865     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14866
14867   return cc_reg;
14868 }
14869
14870 /* Generate a sequence of insns that will generate the correct return
14871    address mask depending on the physical architecture that the program
14872    is running on.  */
14873 rtx
14874 arm_gen_return_addr_mask (void)
14875 {
14876   rtx reg = gen_reg_rtx (Pmode);
14877
14878   emit_insn (gen_return_addr_mask (reg));
14879   return reg;
14880 }
14881
14882 void
14883 arm_reload_in_hi (rtx *operands)
14884 {
14885   rtx ref = operands[1];
14886   rtx base, scratch;
14887   HOST_WIDE_INT offset = 0;
14888
14889   if (GET_CODE (ref) == SUBREG)
14890     {
14891       offset = SUBREG_BYTE (ref);
14892       ref = SUBREG_REG (ref);
14893     }
14894
14895   if (REG_P (ref))
14896     {
14897       /* We have a pseudo which has been spilt onto the stack; there
14898          are two cases here: the first where there is a simple
14899          stack-slot replacement and a second where the stack-slot is
14900          out of range, or is used as a subreg.  */
14901       if (reg_equiv_mem (REGNO (ref)))
14902         {
14903           ref = reg_equiv_mem (REGNO (ref));
14904           base = find_replacement (&XEXP (ref, 0));
14905         }
14906       else
14907         /* The slot is out of range, or was dressed up in a SUBREG.  */
14908         base = reg_equiv_address (REGNO (ref));
14909
14910       /* PR 62554: If there is no equivalent memory location then just move
14911          the value as an SImode register move.  This happens when the target
14912          architecture variant does not have an HImode register move.  */
14913       if (base == NULL)
14914         {
14915           gcc_assert (REG_P (operands[0]));
14916           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14917                                 gen_rtx_SUBREG (SImode, ref, 0)));
14918           return;
14919         }
14920     }
14921   else
14922     base = find_replacement (&XEXP (ref, 0));
14923
14924   /* Handle the case where the address is too complex to be offset by 1.  */
14925   if (GET_CODE (base) == MINUS
14926       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14927     {
14928       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14929
14930       emit_set_insn (base_plus, base);
14931       base = base_plus;
14932     }
14933   else if (GET_CODE (base) == PLUS)
14934     {
14935       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14936       HOST_WIDE_INT hi, lo;
14937
14938       offset += INTVAL (XEXP (base, 1));
14939       base = XEXP (base, 0);
14940
14941       /* Rework the address into a legal sequence of insns.  */
14942       /* Valid range for lo is -4095 -> 4095 */
14943       lo = (offset >= 0
14944             ? (offset & 0xfff)
14945             : -((-offset) & 0xfff));
14946
14947       /* Corner case, if lo is the max offset then we would be out of range
14948          once we have added the additional 1 below, so bump the msb into the
14949          pre-loading insn(s).  */
14950       if (lo == 4095)
14951         lo &= 0x7ff;
14952
14953       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14954              ^ (HOST_WIDE_INT) 0x80000000)
14955             - (HOST_WIDE_INT) 0x80000000);
14956
14957       gcc_assert (hi + lo == offset);
14958
14959       if (hi != 0)
14960         {
14961           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14962
14963           /* Get the base address; addsi3 knows how to handle constants
14964              that require more than one insn.  */
14965           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14966           base = base_plus;
14967           offset = lo;
14968         }
14969     }
14970
14971   /* Operands[2] may overlap operands[0] (though it won't overlap
14972      operands[1]), that's why we asked for a DImode reg -- so we can
14973      use the bit that does not overlap.  */
14974   if (REGNO (operands[2]) == REGNO (operands[0]))
14975     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14976   else
14977     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14978
14979   emit_insn (gen_zero_extendqisi2 (scratch,
14980                                    gen_rtx_MEM (QImode,
14981                                                 plus_constant (Pmode, base,
14982                                                                offset))));
14983   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14984                                    gen_rtx_MEM (QImode,
14985                                                 plus_constant (Pmode, base,
14986                                                                offset + 1))));
14987   if (!BYTES_BIG_ENDIAN)
14988     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14989                    gen_rtx_IOR (SImode,
14990                                 gen_rtx_ASHIFT
14991                                 (SImode,
14992                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14993                                  GEN_INT (8)),
14994                                 scratch));
14995   else
14996     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14997                    gen_rtx_IOR (SImode,
14998                                 gen_rtx_ASHIFT (SImode, scratch,
14999                                                 GEN_INT (8)),
15000                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15001 }
15002
15003 /* Handle storing a half-word to memory during reload by synthesizing as two
15004    byte stores.  Take care not to clobber the input values until after we
15005    have moved them somewhere safe.  This code assumes that if the DImode
15006    scratch in operands[2] overlaps either the input value or output address
15007    in some way, then that value must die in this insn (we absolutely need
15008    two scratch registers for some corner cases).  */
15009 void
15010 arm_reload_out_hi (rtx *operands)
15011 {
15012   rtx ref = operands[0];
15013   rtx outval = operands[1];
15014   rtx base, scratch;
15015   HOST_WIDE_INT offset = 0;
15016
15017   if (GET_CODE (ref) == SUBREG)
15018     {
15019       offset = SUBREG_BYTE (ref);
15020       ref = SUBREG_REG (ref);
15021     }
15022
15023   if (REG_P (ref))
15024     {
15025       /* We have a pseudo which has been spilt onto the stack; there
15026          are two cases here: the first where there is a simple
15027          stack-slot replacement and a second where the stack-slot is
15028          out of range, or is used as a subreg.  */
15029       if (reg_equiv_mem (REGNO (ref)))
15030         {
15031           ref = reg_equiv_mem (REGNO (ref));
15032           base = find_replacement (&XEXP (ref, 0));
15033         }
15034       else
15035         /* The slot is out of range, or was dressed up in a SUBREG.  */
15036         base = reg_equiv_address (REGNO (ref));
15037
15038       /* PR 62254: If there is no equivalent memory location then just move
15039          the value as an SImode register move.  This happens when the target
15040          architecture variant does not have an HImode register move.  */
15041       if (base == NULL)
15042         {
15043           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15044
15045           if (REG_P (outval))
15046             {
15047               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15048                                     gen_rtx_SUBREG (SImode, outval, 0)));
15049             }
15050           else /* SUBREG_P (outval)  */
15051             {
15052               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15053                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15054                                       SUBREG_REG (outval)));
15055               else
15056                 /* FIXME: Handle other cases ?  */
15057                 gcc_unreachable ();
15058             }
15059           return;
15060         }
15061     }
15062   else
15063     base = find_replacement (&XEXP (ref, 0));
15064
15065   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15066
15067   /* Handle the case where the address is too complex to be offset by 1.  */
15068   if (GET_CODE (base) == MINUS
15069       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15070     {
15071       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15072
15073       /* Be careful not to destroy OUTVAL.  */
15074       if (reg_overlap_mentioned_p (base_plus, outval))
15075         {
15076           /* Updating base_plus might destroy outval, see if we can
15077              swap the scratch and base_plus.  */
15078           if (!reg_overlap_mentioned_p (scratch, outval))
15079             std::swap (scratch, base_plus);
15080           else
15081             {
15082               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15083
15084               /* Be conservative and copy OUTVAL into the scratch now,
15085                  this should only be necessary if outval is a subreg
15086                  of something larger than a word.  */
15087               /* XXX Might this clobber base?  I can't see how it can,
15088                  since scratch is known to overlap with OUTVAL, and
15089                  must be wider than a word.  */
15090               emit_insn (gen_movhi (scratch_hi, outval));
15091               outval = scratch_hi;
15092             }
15093         }
15094
15095       emit_set_insn (base_plus, base);
15096       base = base_plus;
15097     }
15098   else if (GET_CODE (base) == PLUS)
15099     {
15100       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15101       HOST_WIDE_INT hi, lo;
15102
15103       offset += INTVAL (XEXP (base, 1));
15104       base = XEXP (base, 0);
15105
15106       /* Rework the address into a legal sequence of insns.  */
15107       /* Valid range for lo is -4095 -> 4095 */
15108       lo = (offset >= 0
15109             ? (offset & 0xfff)
15110             : -((-offset) & 0xfff));
15111
15112       /* Corner case, if lo is the max offset then we would be out of range
15113          once we have added the additional 1 below, so bump the msb into the
15114          pre-loading insn(s).  */
15115       if (lo == 4095)
15116         lo &= 0x7ff;
15117
15118       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15119              ^ (HOST_WIDE_INT) 0x80000000)
15120             - (HOST_WIDE_INT) 0x80000000);
15121
15122       gcc_assert (hi + lo == offset);
15123
15124       if (hi != 0)
15125         {
15126           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15127
15128           /* Be careful not to destroy OUTVAL.  */
15129           if (reg_overlap_mentioned_p (base_plus, outval))
15130             {
15131               /* Updating base_plus might destroy outval, see if we
15132                  can swap the scratch and base_plus.  */
15133               if (!reg_overlap_mentioned_p (scratch, outval))
15134                 std::swap (scratch, base_plus);
15135               else
15136                 {
15137                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15138
15139                   /* Be conservative and copy outval into scratch now,
15140                      this should only be necessary if outval is a
15141                      subreg of something larger than a word.  */
15142                   /* XXX Might this clobber base?  I can't see how it
15143                      can, since scratch is known to overlap with
15144                      outval.  */
15145                   emit_insn (gen_movhi (scratch_hi, outval));
15146                   outval = scratch_hi;
15147                 }
15148             }
15149
15150           /* Get the base address; addsi3 knows how to handle constants
15151              that require more than one insn.  */
15152           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15153           base = base_plus;
15154           offset = lo;
15155         }
15156     }
15157
15158   if (BYTES_BIG_ENDIAN)
15159     {
15160       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15161                                          plus_constant (Pmode, base,
15162                                                         offset + 1)),
15163                             gen_lowpart (QImode, outval)));
15164       emit_insn (gen_lshrsi3 (scratch,
15165                               gen_rtx_SUBREG (SImode, outval, 0),
15166                               GEN_INT (8)));
15167       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15168                                                                 offset)),
15169                             gen_lowpart (QImode, scratch)));
15170     }
15171   else
15172     {
15173       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15174                                                                 offset)),
15175                             gen_lowpart (QImode, outval)));
15176       emit_insn (gen_lshrsi3 (scratch,
15177                               gen_rtx_SUBREG (SImode, outval, 0),
15178                               GEN_INT (8)));
15179       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15180                                          plus_constant (Pmode, base,
15181                                                         offset + 1)),
15182                             gen_lowpart (QImode, scratch)));
15183     }
15184 }
15185
15186 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15187    (padded to the size of a word) should be passed in a register.  */
15188
15189 static bool
15190 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15191 {
15192   if (TARGET_AAPCS_BASED)
15193     return must_pass_in_stack_var_size (mode, type);
15194   else
15195     return must_pass_in_stack_var_size_or_pad (mode, type);
15196 }
15197
15198
15199 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15200    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15201    the default.  For AAPCS based ABIs small aggregate types are placed
15202    in the lowest memory address.  */
15203
15204 static pad_direction
15205 arm_function_arg_padding (machine_mode mode, const_tree type)
15206 {
15207   if (!TARGET_AAPCS_BASED)
15208     return default_function_arg_padding (mode, type);
15209
15210   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15211     return PAD_DOWNWARD;
15212
15213   return PAD_UPWARD;
15214 }
15215
15216
15217 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15218    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15219    register has useful data, and return the opposite if the most
15220    significant byte does.  */
15221
15222 bool
15223 arm_pad_reg_upward (machine_mode mode,
15224                     tree type, int first ATTRIBUTE_UNUSED)
15225 {
15226   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15227     {
15228       /* For AAPCS, small aggregates, small fixed-point types,
15229          and small complex types are always padded upwards.  */
15230       if (type)
15231         {
15232           if ((AGGREGATE_TYPE_P (type)
15233                || TREE_CODE (type) == COMPLEX_TYPE
15234                || FIXED_POINT_TYPE_P (type))
15235               && int_size_in_bytes (type) <= 4)
15236             return true;
15237         }
15238       else
15239         {
15240           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15241               && GET_MODE_SIZE (mode) <= 4)
15242             return true;
15243         }
15244     }
15245
15246   /* Otherwise, use default padding.  */
15247   return !BYTES_BIG_ENDIAN;
15248 }
15249
15250 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15251    assuming that the address in the base register is word aligned.  */
15252 bool
15253 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15254 {
15255   HOST_WIDE_INT max_offset;
15256
15257   /* Offset must be a multiple of 4 in Thumb mode.  */
15258   if (TARGET_THUMB2 && ((offset & 3) != 0))
15259     return false;
15260
15261   if (TARGET_THUMB2)
15262     max_offset = 1020;
15263   else if (TARGET_ARM)
15264     max_offset = 255;
15265   else
15266     return false;
15267
15268   return ((offset <= max_offset) && (offset >= -max_offset));
15269 }
15270
15271 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15272    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15273    Assumes that the address in the base register RN is word aligned.  Pattern
15274    guarantees that both memory accesses use the same base register,
15275    the offsets are constants within the range, and the gap between the offsets is 4.
15276    If preload complete then check that registers are legal.  WBACK indicates whether
15277    address is updated.  LOAD indicates whether memory access is load or store.  */
15278 bool
15279 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15280                        bool wback, bool load)
15281 {
15282   unsigned int t, t2, n;
15283
15284   if (!reload_completed)
15285     return true;
15286
15287   if (!offset_ok_for_ldrd_strd (offset))
15288     return false;
15289
15290   t = REGNO (rt);
15291   t2 = REGNO (rt2);
15292   n = REGNO (rn);
15293
15294   if ((TARGET_THUMB2)
15295       && ((wback && (n == t || n == t2))
15296           || (t == SP_REGNUM)
15297           || (t == PC_REGNUM)
15298           || (t2 == SP_REGNUM)
15299           || (t2 == PC_REGNUM)
15300           || (!load && (n == PC_REGNUM))
15301           || (load && (t == t2))
15302           /* Triggers Cortex-M3 LDRD errata.  */
15303           || (!wback && load && fix_cm3_ldrd && (n == t))))
15304     return false;
15305
15306   if ((TARGET_ARM)
15307       && ((wback && (n == t || n == t2))
15308           || (t2 == PC_REGNUM)
15309           || (t % 2 != 0)   /* First destination register is not even.  */
15310           || (t2 != t + 1)
15311           /* PC can be used as base register (for offset addressing only),
15312              but it is depricated.  */
15313           || (n == PC_REGNUM)))
15314     return false;
15315
15316   return true;
15317 }
15318
15319 /* Return true if a 64-bit access with alignment ALIGN and with a
15320    constant offset OFFSET from the base pointer is permitted on this
15321    architecture.  */
15322 static bool
15323 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15324 {
15325   return (unaligned_access
15326           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15327           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15328 }
15329
15330 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15331    operand MEM's address contains an immediate offset from the base
15332    register and has no side effects, in which case it sets BASE,
15333    OFFSET and ALIGN accordingly.  */
15334 static bool
15335 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15336 {
15337   rtx addr;
15338
15339   gcc_assert (base != NULL && offset != NULL);
15340
15341   /* TODO: Handle more general memory operand patterns, such as
15342      PRE_DEC and PRE_INC.  */
15343
15344   if (side_effects_p (mem))
15345     return false;
15346
15347   /* Can't deal with subregs.  */
15348   if (GET_CODE (mem) == SUBREG)
15349     return false;
15350
15351   gcc_assert (MEM_P (mem));
15352
15353   *offset = const0_rtx;
15354   *align = MEM_ALIGN (mem);
15355
15356   addr = XEXP (mem, 0);
15357
15358   /* If addr isn't valid for DImode, then we can't handle it.  */
15359   if (!arm_legitimate_address_p (DImode, addr,
15360                                  reload_in_progress || reload_completed))
15361     return false;
15362
15363   if (REG_P (addr))
15364     {
15365       *base = addr;
15366       return true;
15367     }
15368   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15369     {
15370       *base = XEXP (addr, 0);
15371       *offset = XEXP (addr, 1);
15372       return (REG_P (*base) && CONST_INT_P (*offset));
15373     }
15374
15375   return false;
15376 }
15377
15378 /* Called from a peephole2 to replace two word-size accesses with a
15379    single LDRD/STRD instruction.  Returns true iff we can generate a
15380    new instruction sequence.  That is, both accesses use the same base
15381    register and the gap between constant offsets is 4.  This function
15382    may reorder its operands to match ldrd/strd RTL templates.
15383    OPERANDS are the operands found by the peephole matcher;
15384    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15385    corresponding memory operands.  LOAD indicaates whether the access
15386    is load or store.  CONST_STORE indicates a store of constant
15387    integer values held in OPERANDS[4,5] and assumes that the pattern
15388    is of length 4 insn, for the purpose of checking dead registers.
15389    COMMUTE indicates that register operands may be reordered.  */
15390 bool
15391 gen_operands_ldrd_strd (rtx *operands, bool load,
15392                         bool const_store, bool commute)
15393 {
15394   int nops = 2;
15395   HOST_WIDE_INT offsets[2], offset, align[2];
15396   rtx base = NULL_RTX;
15397   rtx cur_base, cur_offset, tmp;
15398   int i, gap;
15399   HARD_REG_SET regset;
15400
15401   gcc_assert (!const_store || !load);
15402   /* Check that the memory references are immediate offsets from the
15403      same base register.  Extract the base register, the destination
15404      registers, and the corresponding memory offsets.  */
15405   for (i = 0; i < nops; i++)
15406     {
15407       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15408                                  &align[i]))
15409         return false;
15410
15411       if (i == 0)
15412         base = cur_base;
15413       else if (REGNO (base) != REGNO (cur_base))
15414         return false;
15415
15416       offsets[i] = INTVAL (cur_offset);
15417       if (GET_CODE (operands[i]) == SUBREG)
15418         {
15419           tmp = SUBREG_REG (operands[i]);
15420           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15421           operands[i] = tmp;
15422         }
15423     }
15424
15425   /* Make sure there is no dependency between the individual loads.  */
15426   if (load && REGNO (operands[0]) == REGNO (base))
15427     return false; /* RAW */
15428
15429   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15430     return false; /* WAW */
15431
15432   /* If the same input register is used in both stores
15433      when storing different constants, try to find a free register.
15434      For example, the code
15435         mov r0, 0
15436         str r0, [r2]
15437         mov r0, 1
15438         str r0, [r2, #4]
15439      can be transformed into
15440         mov r1, 0
15441         mov r0, 1
15442         strd r1, r0, [r2]
15443      in Thumb mode assuming that r1 is free.
15444      For ARM mode do the same but only if the starting register
15445      can be made to be even.  */
15446   if (const_store
15447       && REGNO (operands[0]) == REGNO (operands[1])
15448       && INTVAL (operands[4]) != INTVAL (operands[5]))
15449     {
15450     if (TARGET_THUMB2)
15451       {
15452         CLEAR_HARD_REG_SET (regset);
15453         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15454         if (tmp == NULL_RTX)
15455           return false;
15456
15457         /* Use the new register in the first load to ensure that
15458            if the original input register is not dead after peephole,
15459            then it will have the correct constant value.  */
15460         operands[0] = tmp;
15461       }
15462     else if (TARGET_ARM)
15463       {
15464         int regno = REGNO (operands[0]);
15465         if (!peep2_reg_dead_p (4, operands[0]))
15466           {
15467             /* When the input register is even and is not dead after the
15468                pattern, it has to hold the second constant but we cannot
15469                form a legal STRD in ARM mode with this register as the second
15470                register.  */
15471             if (regno % 2 == 0)
15472               return false;
15473
15474             /* Is regno-1 free? */
15475             SET_HARD_REG_SET (regset);
15476             CLEAR_HARD_REG_BIT(regset, regno - 1);
15477             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15478             if (tmp == NULL_RTX)
15479               return false;
15480
15481             operands[0] = tmp;
15482           }
15483         else
15484           {
15485             /* Find a DImode register.  */
15486             CLEAR_HARD_REG_SET (regset);
15487             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15488             if (tmp != NULL_RTX)
15489               {
15490                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15491                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15492               }
15493             else
15494               {
15495                 /* Can we use the input register to form a DI register?  */
15496                 SET_HARD_REG_SET (regset);
15497                 CLEAR_HARD_REG_BIT(regset,
15498                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15499                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15500                 if (tmp == NULL_RTX)
15501                   return false;
15502                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15503               }
15504           }
15505
15506         gcc_assert (operands[0] != NULL_RTX);
15507         gcc_assert (operands[1] != NULL_RTX);
15508         gcc_assert (REGNO (operands[0]) % 2 == 0);
15509         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15510       }
15511     }
15512
15513   /* Make sure the instructions are ordered with lower memory access first.  */
15514   if (offsets[0] > offsets[1])
15515     {
15516       gap = offsets[0] - offsets[1];
15517       offset = offsets[1];
15518
15519       /* Swap the instructions such that lower memory is accessed first.  */
15520       std::swap (operands[0], operands[1]);
15521       std::swap (operands[2], operands[3]);
15522       std::swap (align[0], align[1]);
15523       if (const_store)
15524         std::swap (operands[4], operands[5]);
15525     }
15526   else
15527     {
15528       gap = offsets[1] - offsets[0];
15529       offset = offsets[0];
15530     }
15531
15532   /* Make sure accesses are to consecutive memory locations.  */
15533   if (gap != 4)
15534     return false;
15535
15536   if (!align_ok_ldrd_strd (align[0], offset))
15537     return false;
15538
15539   /* Make sure we generate legal instructions.  */
15540   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15541                              false, load))
15542     return true;
15543
15544   /* In Thumb state, where registers are almost unconstrained, there
15545      is little hope to fix it.  */
15546   if (TARGET_THUMB2)
15547     return false;
15548
15549   if (load && commute)
15550     {
15551       /* Try reordering registers.  */
15552       std::swap (operands[0], operands[1]);
15553       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15554                                  false, load))
15555         return true;
15556     }
15557
15558   if (const_store)
15559     {
15560       /* If input registers are dead after this pattern, they can be
15561          reordered or replaced by other registers that are free in the
15562          current pattern.  */
15563       if (!peep2_reg_dead_p (4, operands[0])
15564           || !peep2_reg_dead_p (4, operands[1]))
15565         return false;
15566
15567       /* Try to reorder the input registers.  */
15568       /* For example, the code
15569            mov r0, 0
15570            mov r1, 1
15571            str r1, [r2]
15572            str r0, [r2, #4]
15573          can be transformed into
15574            mov r1, 0
15575            mov r0, 1
15576            strd r0, [r2]
15577       */
15578       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15579                                   false, false))
15580         {
15581           std::swap (operands[0], operands[1]);
15582           return true;
15583         }
15584
15585       /* Try to find a free DI register.  */
15586       CLEAR_HARD_REG_SET (regset);
15587       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15588       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15589       while (true)
15590         {
15591           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15592           if (tmp == NULL_RTX)
15593             return false;
15594
15595           /* DREG must be an even-numbered register in DImode.
15596              Split it into SI registers.  */
15597           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15598           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15599           gcc_assert (operands[0] != NULL_RTX);
15600           gcc_assert (operands[1] != NULL_RTX);
15601           gcc_assert (REGNO (operands[0]) % 2 == 0);
15602           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15603
15604           return (operands_ok_ldrd_strd (operands[0], operands[1],
15605                                          base, offset,
15606                                          false, load));
15607         }
15608     }
15609
15610   return false;
15611 }
15612
15613
15614
15615 \f
15616 /* Print a symbolic form of X to the debug file, F.  */
15617 static void
15618 arm_print_value (FILE *f, rtx x)
15619 {
15620   switch (GET_CODE (x))
15621     {
15622     case CONST_INT:
15623       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15624       return;
15625
15626     case CONST_DOUBLE:
15627       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15628       return;
15629
15630     case CONST_VECTOR:
15631       {
15632         int i;
15633
15634         fprintf (f, "<");
15635         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15636           {
15637             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15638             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15639               fputc (',', f);
15640           }
15641         fprintf (f, ">");
15642       }
15643       return;
15644
15645     case CONST_STRING:
15646       fprintf (f, "\"%s\"", XSTR (x, 0));
15647       return;
15648
15649     case SYMBOL_REF:
15650       fprintf (f, "`%s'", XSTR (x, 0));
15651       return;
15652
15653     case LABEL_REF:
15654       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15655       return;
15656
15657     case CONST:
15658       arm_print_value (f, XEXP (x, 0));
15659       return;
15660
15661     case PLUS:
15662       arm_print_value (f, XEXP (x, 0));
15663       fprintf (f, "+");
15664       arm_print_value (f, XEXP (x, 1));
15665       return;
15666
15667     case PC:
15668       fprintf (f, "pc");
15669       return;
15670
15671     default:
15672       fprintf (f, "????");
15673       return;
15674     }
15675 }
15676 \f
15677 /* Routines for manipulation of the constant pool.  */
15678
15679 /* Arm instructions cannot load a large constant directly into a
15680    register; they have to come from a pc relative load.  The constant
15681    must therefore be placed in the addressable range of the pc
15682    relative load.  Depending on the precise pc relative load
15683    instruction the range is somewhere between 256 bytes and 4k.  This
15684    means that we often have to dump a constant inside a function, and
15685    generate code to branch around it.
15686
15687    It is important to minimize this, since the branches will slow
15688    things down and make the code larger.
15689
15690    Normally we can hide the table after an existing unconditional
15691    branch so that there is no interruption of the flow, but in the
15692    worst case the code looks like this:
15693
15694         ldr     rn, L1
15695         ...
15696         b       L2
15697         align
15698         L1:     .long value
15699         L2:
15700         ...
15701
15702         ldr     rn, L3
15703         ...
15704         b       L4
15705         align
15706         L3:     .long value
15707         L4:
15708         ...
15709
15710    We fix this by performing a scan after scheduling, which notices
15711    which instructions need to have their operands fetched from the
15712    constant table and builds the table.
15713
15714    The algorithm starts by building a table of all the constants that
15715    need fixing up and all the natural barriers in the function (places
15716    where a constant table can be dropped without breaking the flow).
15717    For each fixup we note how far the pc-relative replacement will be
15718    able to reach and the offset of the instruction into the function.
15719
15720    Having built the table we then group the fixes together to form
15721    tables that are as large as possible (subject to addressing
15722    constraints) and emit each table of constants after the last
15723    barrier that is within range of all the instructions in the group.
15724    If a group does not contain a barrier, then we forcibly create one
15725    by inserting a jump instruction into the flow.  Once the table has
15726    been inserted, the insns are then modified to reference the
15727    relevant entry in the pool.
15728
15729    Possible enhancements to the algorithm (not implemented) are:
15730
15731    1) For some processors and object formats, there may be benefit in
15732    aligning the pools to the start of cache lines; this alignment
15733    would need to be taken into account when calculating addressability
15734    of a pool.  */
15735
15736 /* These typedefs are located at the start of this file, so that
15737    they can be used in the prototypes there.  This comment is to
15738    remind readers of that fact so that the following structures
15739    can be understood more easily.
15740
15741      typedef struct minipool_node    Mnode;
15742      typedef struct minipool_fixup   Mfix;  */
15743
15744 struct minipool_node
15745 {
15746   /* Doubly linked chain of entries.  */
15747   Mnode * next;
15748   Mnode * prev;
15749   /* The maximum offset into the code that this entry can be placed.  While
15750      pushing fixes for forward references, all entries are sorted in order
15751      of increasing max_address.  */
15752   HOST_WIDE_INT max_address;
15753   /* Similarly for an entry inserted for a backwards ref.  */
15754   HOST_WIDE_INT min_address;
15755   /* The number of fixes referencing this entry.  This can become zero
15756      if we "unpush" an entry.  In this case we ignore the entry when we
15757      come to emit the code.  */
15758   int refcount;
15759   /* The offset from the start of the minipool.  */
15760   HOST_WIDE_INT offset;
15761   /* The value in table.  */
15762   rtx value;
15763   /* The mode of value.  */
15764   machine_mode mode;
15765   /* The size of the value.  With iWMMXt enabled
15766      sizes > 4 also imply an alignment of 8-bytes.  */
15767   int fix_size;
15768 };
15769
15770 struct minipool_fixup
15771 {
15772   Mfix *            next;
15773   rtx_insn *        insn;
15774   HOST_WIDE_INT     address;
15775   rtx *             loc;
15776   machine_mode mode;
15777   int               fix_size;
15778   rtx               value;
15779   Mnode *           minipool;
15780   HOST_WIDE_INT     forwards;
15781   HOST_WIDE_INT     backwards;
15782 };
15783
15784 /* Fixes less than a word need padding out to a word boundary.  */
15785 #define MINIPOOL_FIX_SIZE(mode) \
15786   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15787
15788 static Mnode *  minipool_vector_head;
15789 static Mnode *  minipool_vector_tail;
15790 static rtx_code_label   *minipool_vector_label;
15791 static int      minipool_pad;
15792
15793 /* The linked list of all minipool fixes required for this function.  */
15794 Mfix *          minipool_fix_head;
15795 Mfix *          minipool_fix_tail;
15796 /* The fix entry for the current minipool, once it has been placed.  */
15797 Mfix *          minipool_barrier;
15798
15799 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15800 #define JUMP_TABLES_IN_TEXT_SECTION 0
15801 #endif
15802
15803 static HOST_WIDE_INT
15804 get_jump_table_size (rtx_jump_table_data *insn)
15805 {
15806   /* ADDR_VECs only take room if read-only data does into the text
15807      section.  */
15808   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15809     {
15810       rtx body = PATTERN (insn);
15811       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15812       HOST_WIDE_INT size;
15813       HOST_WIDE_INT modesize;
15814
15815       modesize = GET_MODE_SIZE (GET_MODE (body));
15816       size = modesize * XVECLEN (body, elt);
15817       switch (modesize)
15818         {
15819         case 1:
15820           /* Round up size  of TBB table to a halfword boundary.  */
15821           size = (size + 1) & ~HOST_WIDE_INT_1;
15822           break;
15823         case 2:
15824           /* No padding necessary for TBH.  */
15825           break;
15826         case 4:
15827           /* Add two bytes for alignment on Thumb.  */
15828           if (TARGET_THUMB)
15829             size += 2;
15830           break;
15831         default:
15832           gcc_unreachable ();
15833         }
15834       return size;
15835     }
15836
15837   return 0;
15838 }
15839
15840 /* Return the maximum amount of padding that will be inserted before
15841    label LABEL.  */
15842
15843 static HOST_WIDE_INT
15844 get_label_padding (rtx label)
15845 {
15846   HOST_WIDE_INT align, min_insn_size;
15847
15848   align = 1 << label_to_alignment (label);
15849   min_insn_size = TARGET_THUMB ? 2 : 4;
15850   return align > min_insn_size ? align - min_insn_size : 0;
15851 }
15852
15853 /* Move a minipool fix MP from its current location to before MAX_MP.
15854    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15855    constraints may need updating.  */
15856 static Mnode *
15857 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15858                                HOST_WIDE_INT max_address)
15859 {
15860   /* The code below assumes these are different.  */
15861   gcc_assert (mp != max_mp);
15862
15863   if (max_mp == NULL)
15864     {
15865       if (max_address < mp->max_address)
15866         mp->max_address = max_address;
15867     }
15868   else
15869     {
15870       if (max_address > max_mp->max_address - mp->fix_size)
15871         mp->max_address = max_mp->max_address - mp->fix_size;
15872       else
15873         mp->max_address = max_address;
15874
15875       /* Unlink MP from its current position.  Since max_mp is non-null,
15876        mp->prev must be non-null.  */
15877       mp->prev->next = mp->next;
15878       if (mp->next != NULL)
15879         mp->next->prev = mp->prev;
15880       else
15881         minipool_vector_tail = mp->prev;
15882
15883       /* Re-insert it before MAX_MP.  */
15884       mp->next = max_mp;
15885       mp->prev = max_mp->prev;
15886       max_mp->prev = mp;
15887
15888       if (mp->prev != NULL)
15889         mp->prev->next = mp;
15890       else
15891         minipool_vector_head = mp;
15892     }
15893
15894   /* Save the new entry.  */
15895   max_mp = mp;
15896
15897   /* Scan over the preceding entries and adjust their addresses as
15898      required.  */
15899   while (mp->prev != NULL
15900          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15901     {
15902       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15903       mp = mp->prev;
15904     }
15905
15906   return max_mp;
15907 }
15908
15909 /* Add a constant to the minipool for a forward reference.  Returns the
15910    node added or NULL if the constant will not fit in this pool.  */
15911 static Mnode *
15912 add_minipool_forward_ref (Mfix *fix)
15913 {
15914   /* If set, max_mp is the first pool_entry that has a lower
15915      constraint than the one we are trying to add.  */
15916   Mnode *       max_mp = NULL;
15917   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15918   Mnode *       mp;
15919
15920   /* If the minipool starts before the end of FIX->INSN then this FIX
15921      can not be placed into the current pool.  Furthermore, adding the
15922      new constant pool entry may cause the pool to start FIX_SIZE bytes
15923      earlier.  */
15924   if (minipool_vector_head &&
15925       (fix->address + get_attr_length (fix->insn)
15926        >= minipool_vector_head->max_address - fix->fix_size))
15927     return NULL;
15928
15929   /* Scan the pool to see if a constant with the same value has
15930      already been added.  While we are doing this, also note the
15931      location where we must insert the constant if it doesn't already
15932      exist.  */
15933   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15934     {
15935       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15936           && fix->mode == mp->mode
15937           && (!LABEL_P (fix->value)
15938               || (CODE_LABEL_NUMBER (fix->value)
15939                   == CODE_LABEL_NUMBER (mp->value)))
15940           && rtx_equal_p (fix->value, mp->value))
15941         {
15942           /* More than one fix references this entry.  */
15943           mp->refcount++;
15944           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15945         }
15946
15947       /* Note the insertion point if necessary.  */
15948       if (max_mp == NULL
15949           && mp->max_address > max_address)
15950         max_mp = mp;
15951
15952       /* If we are inserting an 8-bytes aligned quantity and
15953          we have not already found an insertion point, then
15954          make sure that all such 8-byte aligned quantities are
15955          placed at the start of the pool.  */
15956       if (ARM_DOUBLEWORD_ALIGN
15957           && max_mp == NULL
15958           && fix->fix_size >= 8
15959           && mp->fix_size < 8)
15960         {
15961           max_mp = mp;
15962           max_address = mp->max_address;
15963         }
15964     }
15965
15966   /* The value is not currently in the minipool, so we need to create
15967      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15968      the end of the list since the placement is less constrained than
15969      any existing entry.  Otherwise, we insert the new fix before
15970      MAX_MP and, if necessary, adjust the constraints on the other
15971      entries.  */
15972   mp = XNEW (Mnode);
15973   mp->fix_size = fix->fix_size;
15974   mp->mode = fix->mode;
15975   mp->value = fix->value;
15976   mp->refcount = 1;
15977   /* Not yet required for a backwards ref.  */
15978   mp->min_address = -65536;
15979
15980   if (max_mp == NULL)
15981     {
15982       mp->max_address = max_address;
15983       mp->next = NULL;
15984       mp->prev = minipool_vector_tail;
15985
15986       if (mp->prev == NULL)
15987         {
15988           minipool_vector_head = mp;
15989           minipool_vector_label = gen_label_rtx ();
15990         }
15991       else
15992         mp->prev->next = mp;
15993
15994       minipool_vector_tail = mp;
15995     }
15996   else
15997     {
15998       if (max_address > max_mp->max_address - mp->fix_size)
15999         mp->max_address = max_mp->max_address - mp->fix_size;
16000       else
16001         mp->max_address = max_address;
16002
16003       mp->next = max_mp;
16004       mp->prev = max_mp->prev;
16005       max_mp->prev = mp;
16006       if (mp->prev != NULL)
16007         mp->prev->next = mp;
16008       else
16009         minipool_vector_head = mp;
16010     }
16011
16012   /* Save the new entry.  */
16013   max_mp = mp;
16014
16015   /* Scan over the preceding entries and adjust their addresses as
16016      required.  */
16017   while (mp->prev != NULL
16018          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16019     {
16020       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16021       mp = mp->prev;
16022     }
16023
16024   return max_mp;
16025 }
16026
16027 static Mnode *
16028 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16029                                 HOST_WIDE_INT  min_address)
16030 {
16031   HOST_WIDE_INT offset;
16032
16033   /* The code below assumes these are different.  */
16034   gcc_assert (mp != min_mp);
16035
16036   if (min_mp == NULL)
16037     {
16038       if (min_address > mp->min_address)
16039         mp->min_address = min_address;
16040     }
16041   else
16042     {
16043       /* We will adjust this below if it is too loose.  */
16044       mp->min_address = min_address;
16045
16046       /* Unlink MP from its current position.  Since min_mp is non-null,
16047          mp->next must be non-null.  */
16048       mp->next->prev = mp->prev;
16049       if (mp->prev != NULL)
16050         mp->prev->next = mp->next;
16051       else
16052         minipool_vector_head = mp->next;
16053
16054       /* Reinsert it after MIN_MP.  */
16055       mp->prev = min_mp;
16056       mp->next = min_mp->next;
16057       min_mp->next = mp;
16058       if (mp->next != NULL)
16059         mp->next->prev = mp;
16060       else
16061         minipool_vector_tail = mp;
16062     }
16063
16064   min_mp = mp;
16065
16066   offset = 0;
16067   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16068     {
16069       mp->offset = offset;
16070       if (mp->refcount > 0)
16071         offset += mp->fix_size;
16072
16073       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16074         mp->next->min_address = mp->min_address + mp->fix_size;
16075     }
16076
16077   return min_mp;
16078 }
16079
16080 /* Add a constant to the minipool for a backward reference.  Returns the
16081    node added or NULL if the constant will not fit in this pool.
16082
16083    Note that the code for insertion for a backwards reference can be
16084    somewhat confusing because the calculated offsets for each fix do
16085    not take into account the size of the pool (which is still under
16086    construction.  */
16087 static Mnode *
16088 add_minipool_backward_ref (Mfix *fix)
16089 {
16090   /* If set, min_mp is the last pool_entry that has a lower constraint
16091      than the one we are trying to add.  */
16092   Mnode *min_mp = NULL;
16093   /* This can be negative, since it is only a constraint.  */
16094   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16095   Mnode *mp;
16096
16097   /* If we can't reach the current pool from this insn, or if we can't
16098      insert this entry at the end of the pool without pushing other
16099      fixes out of range, then we don't try.  This ensures that we
16100      can't fail later on.  */
16101   if (min_address >= minipool_barrier->address
16102       || (minipool_vector_tail->min_address + fix->fix_size
16103           >= minipool_barrier->address))
16104     return NULL;
16105
16106   /* Scan the pool to see if a constant with the same value has
16107      already been added.  While we are doing this, also note the
16108      location where we must insert the constant if it doesn't already
16109      exist.  */
16110   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16111     {
16112       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16113           && fix->mode == mp->mode
16114           && (!LABEL_P (fix->value)
16115               || (CODE_LABEL_NUMBER (fix->value)
16116                   == CODE_LABEL_NUMBER (mp->value)))
16117           && rtx_equal_p (fix->value, mp->value)
16118           /* Check that there is enough slack to move this entry to the
16119              end of the table (this is conservative).  */
16120           && (mp->max_address
16121               > (minipool_barrier->address
16122                  + minipool_vector_tail->offset
16123                  + minipool_vector_tail->fix_size)))
16124         {
16125           mp->refcount++;
16126           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16127         }
16128
16129       if (min_mp != NULL)
16130         mp->min_address += fix->fix_size;
16131       else
16132         {
16133           /* Note the insertion point if necessary.  */
16134           if (mp->min_address < min_address)
16135             {
16136               /* For now, we do not allow the insertion of 8-byte alignment
16137                  requiring nodes anywhere but at the start of the pool.  */
16138               if (ARM_DOUBLEWORD_ALIGN
16139                   && fix->fix_size >= 8 && mp->fix_size < 8)
16140                 return NULL;
16141               else
16142                 min_mp = mp;
16143             }
16144           else if (mp->max_address
16145                    < minipool_barrier->address + mp->offset + fix->fix_size)
16146             {
16147               /* Inserting before this entry would push the fix beyond
16148                  its maximum address (which can happen if we have
16149                  re-located a forwards fix); force the new fix to come
16150                  after it.  */
16151               if (ARM_DOUBLEWORD_ALIGN
16152                   && fix->fix_size >= 8 && mp->fix_size < 8)
16153                 return NULL;
16154               else
16155                 {
16156                   min_mp = mp;
16157                   min_address = mp->min_address + fix->fix_size;
16158                 }
16159             }
16160           /* Do not insert a non-8-byte aligned quantity before 8-byte
16161              aligned quantities.  */
16162           else if (ARM_DOUBLEWORD_ALIGN
16163                    && fix->fix_size < 8
16164                    && mp->fix_size >= 8)
16165             {
16166               min_mp = mp;
16167               min_address = mp->min_address + fix->fix_size;
16168             }
16169         }
16170     }
16171
16172   /* We need to create a new entry.  */
16173   mp = XNEW (Mnode);
16174   mp->fix_size = fix->fix_size;
16175   mp->mode = fix->mode;
16176   mp->value = fix->value;
16177   mp->refcount = 1;
16178   mp->max_address = minipool_barrier->address + 65536;
16179
16180   mp->min_address = min_address;
16181
16182   if (min_mp == NULL)
16183     {
16184       mp->prev = NULL;
16185       mp->next = minipool_vector_head;
16186
16187       if (mp->next == NULL)
16188         {
16189           minipool_vector_tail = mp;
16190           minipool_vector_label = gen_label_rtx ();
16191         }
16192       else
16193         mp->next->prev = mp;
16194
16195       minipool_vector_head = mp;
16196     }
16197   else
16198     {
16199       mp->next = min_mp->next;
16200       mp->prev = min_mp;
16201       min_mp->next = mp;
16202
16203       if (mp->next != NULL)
16204         mp->next->prev = mp;
16205       else
16206         minipool_vector_tail = mp;
16207     }
16208
16209   /* Save the new entry.  */
16210   min_mp = mp;
16211
16212   if (mp->prev)
16213     mp = mp->prev;
16214   else
16215     mp->offset = 0;
16216
16217   /* Scan over the following entries and adjust their offsets.  */
16218   while (mp->next != NULL)
16219     {
16220       if (mp->next->min_address < mp->min_address + mp->fix_size)
16221         mp->next->min_address = mp->min_address + mp->fix_size;
16222
16223       if (mp->refcount)
16224         mp->next->offset = mp->offset + mp->fix_size;
16225       else
16226         mp->next->offset = mp->offset;
16227
16228       mp = mp->next;
16229     }
16230
16231   return min_mp;
16232 }
16233
16234 static void
16235 assign_minipool_offsets (Mfix *barrier)
16236 {
16237   HOST_WIDE_INT offset = 0;
16238   Mnode *mp;
16239
16240   minipool_barrier = barrier;
16241
16242   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16243     {
16244       mp->offset = offset;
16245
16246       if (mp->refcount > 0)
16247         offset += mp->fix_size;
16248     }
16249 }
16250
16251 /* Output the literal table */
16252 static void
16253 dump_minipool (rtx_insn *scan)
16254 {
16255   Mnode * mp;
16256   Mnode * nmp;
16257   int align64 = 0;
16258
16259   if (ARM_DOUBLEWORD_ALIGN)
16260     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16261       if (mp->refcount > 0 && mp->fix_size >= 8)
16262         {
16263           align64 = 1;
16264           break;
16265         }
16266
16267   if (dump_file)
16268     fprintf (dump_file,
16269              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16270              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16271
16272   scan = emit_label_after (gen_label_rtx (), scan);
16273   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16274   scan = emit_label_after (minipool_vector_label, scan);
16275
16276   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16277     {
16278       if (mp->refcount > 0)
16279         {
16280           if (dump_file)
16281             {
16282               fprintf (dump_file,
16283                        ";;  Offset %u, min %ld, max %ld ",
16284                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16285                        (unsigned long) mp->max_address);
16286               arm_print_value (dump_file, mp->value);
16287               fputc ('\n', dump_file);
16288             }
16289
16290           rtx val = copy_rtx (mp->value);
16291
16292           switch (GET_MODE_SIZE (mp->mode))
16293             {
16294 #ifdef HAVE_consttable_1
16295             case 1:
16296               scan = emit_insn_after (gen_consttable_1 (val), scan);
16297               break;
16298
16299 #endif
16300 #ifdef HAVE_consttable_2
16301             case 2:
16302               scan = emit_insn_after (gen_consttable_2 (val), scan);
16303               break;
16304
16305 #endif
16306 #ifdef HAVE_consttable_4
16307             case 4:
16308               scan = emit_insn_after (gen_consttable_4 (val), scan);
16309               break;
16310
16311 #endif
16312 #ifdef HAVE_consttable_8
16313             case 8:
16314               scan = emit_insn_after (gen_consttable_8 (val), scan);
16315               break;
16316
16317 #endif
16318 #ifdef HAVE_consttable_16
16319             case 16:
16320               scan = emit_insn_after (gen_consttable_16 (val), scan);
16321               break;
16322
16323 #endif
16324             default:
16325               gcc_unreachable ();
16326             }
16327         }
16328
16329       nmp = mp->next;
16330       free (mp);
16331     }
16332
16333   minipool_vector_head = minipool_vector_tail = NULL;
16334   scan = emit_insn_after (gen_consttable_end (), scan);
16335   scan = emit_barrier_after (scan);
16336 }
16337
16338 /* Return the cost of forcibly inserting a barrier after INSN.  */
16339 static int
16340 arm_barrier_cost (rtx_insn *insn)
16341 {
16342   /* Basing the location of the pool on the loop depth is preferable,
16343      but at the moment, the basic block information seems to be
16344      corrupt by this stage of the compilation.  */
16345   int base_cost = 50;
16346   rtx_insn *next = next_nonnote_insn (insn);
16347
16348   if (next != NULL && LABEL_P (next))
16349     base_cost -= 20;
16350
16351   switch (GET_CODE (insn))
16352     {
16353     case CODE_LABEL:
16354       /* It will always be better to place the table before the label, rather
16355          than after it.  */
16356       return 50;
16357
16358     case INSN:
16359     case CALL_INSN:
16360       return base_cost;
16361
16362     case JUMP_INSN:
16363       return base_cost - 10;
16364
16365     default:
16366       return base_cost + 10;
16367     }
16368 }
16369
16370 /* Find the best place in the insn stream in the range
16371    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16372    Create the barrier by inserting a jump and add a new fix entry for
16373    it.  */
16374 static Mfix *
16375 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16376 {
16377   HOST_WIDE_INT count = 0;
16378   rtx_barrier *barrier;
16379   rtx_insn *from = fix->insn;
16380   /* The instruction after which we will insert the jump.  */
16381   rtx_insn *selected = NULL;
16382   int selected_cost;
16383   /* The address at which the jump instruction will be placed.  */
16384   HOST_WIDE_INT selected_address;
16385   Mfix * new_fix;
16386   HOST_WIDE_INT max_count = max_address - fix->address;
16387   rtx_code_label *label = gen_label_rtx ();
16388
16389   selected_cost = arm_barrier_cost (from);
16390   selected_address = fix->address;
16391
16392   while (from && count < max_count)
16393     {
16394       rtx_jump_table_data *tmp;
16395       int new_cost;
16396
16397       /* This code shouldn't have been called if there was a natural barrier
16398          within range.  */
16399       gcc_assert (!BARRIER_P (from));
16400
16401       /* Count the length of this insn.  This must stay in sync with the
16402          code that pushes minipool fixes.  */
16403       if (LABEL_P (from))
16404         count += get_label_padding (from);
16405       else
16406         count += get_attr_length (from);
16407
16408       /* If there is a jump table, add its length.  */
16409       if (tablejump_p (from, NULL, &tmp))
16410         {
16411           count += get_jump_table_size (tmp);
16412
16413           /* Jump tables aren't in a basic block, so base the cost on
16414              the dispatch insn.  If we select this location, we will
16415              still put the pool after the table.  */
16416           new_cost = arm_barrier_cost (from);
16417
16418           if (count < max_count
16419               && (!selected || new_cost <= selected_cost))
16420             {
16421               selected = tmp;
16422               selected_cost = new_cost;
16423               selected_address = fix->address + count;
16424             }
16425
16426           /* Continue after the dispatch table.  */
16427           from = NEXT_INSN (tmp);
16428           continue;
16429         }
16430
16431       new_cost = arm_barrier_cost (from);
16432
16433       if (count < max_count
16434           && (!selected || new_cost <= selected_cost))
16435         {
16436           selected = from;
16437           selected_cost = new_cost;
16438           selected_address = fix->address + count;
16439         }
16440
16441       from = NEXT_INSN (from);
16442     }
16443
16444   /* Make sure that we found a place to insert the jump.  */
16445   gcc_assert (selected);
16446
16447   /* Make sure we do not split a call and its corresponding
16448      CALL_ARG_LOCATION note.  */
16449   if (CALL_P (selected))
16450     {
16451       rtx_insn *next = NEXT_INSN (selected);
16452       if (next && NOTE_P (next)
16453           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16454           selected = next;
16455     }
16456
16457   /* Create a new JUMP_INSN that branches around a barrier.  */
16458   from = emit_jump_insn_after (gen_jump (label), selected);
16459   JUMP_LABEL (from) = label;
16460   barrier = emit_barrier_after (from);
16461   emit_label_after (label, barrier);
16462
16463   /* Create a minipool barrier entry for the new barrier.  */
16464   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16465   new_fix->insn = barrier;
16466   new_fix->address = selected_address;
16467   new_fix->next = fix->next;
16468   fix->next = new_fix;
16469
16470   return new_fix;
16471 }
16472
16473 /* Record that there is a natural barrier in the insn stream at
16474    ADDRESS.  */
16475 static void
16476 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16477 {
16478   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16479
16480   fix->insn = insn;
16481   fix->address = address;
16482
16483   fix->next = NULL;
16484   if (minipool_fix_head != NULL)
16485     minipool_fix_tail->next = fix;
16486   else
16487     minipool_fix_head = fix;
16488
16489   minipool_fix_tail = fix;
16490 }
16491
16492 /* Record INSN, which will need fixing up to load a value from the
16493    minipool.  ADDRESS is the offset of the insn since the start of the
16494    function; LOC is a pointer to the part of the insn which requires
16495    fixing; VALUE is the constant that must be loaded, which is of type
16496    MODE.  */
16497 static void
16498 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16499                    machine_mode mode, rtx value)
16500 {
16501   gcc_assert (!arm_disable_literal_pool);
16502   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16503
16504   fix->insn = insn;
16505   fix->address = address;
16506   fix->loc = loc;
16507   fix->mode = mode;
16508   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16509   fix->value = value;
16510   fix->forwards = get_attr_pool_range (insn);
16511   fix->backwards = get_attr_neg_pool_range (insn);
16512   fix->minipool = NULL;
16513
16514   /* If an insn doesn't have a range defined for it, then it isn't
16515      expecting to be reworked by this code.  Better to stop now than
16516      to generate duff assembly code.  */
16517   gcc_assert (fix->forwards || fix->backwards);
16518
16519   /* If an entry requires 8-byte alignment then assume all constant pools
16520      require 4 bytes of padding.  Trying to do this later on a per-pool
16521      basis is awkward because existing pool entries have to be modified.  */
16522   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16523     minipool_pad = 4;
16524
16525   if (dump_file)
16526     {
16527       fprintf (dump_file,
16528                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16529                GET_MODE_NAME (mode),
16530                INSN_UID (insn), (unsigned long) address,
16531                -1 * (long)fix->backwards, (long)fix->forwards);
16532       arm_print_value (dump_file, fix->value);
16533       fprintf (dump_file, "\n");
16534     }
16535
16536   /* Add it to the chain of fixes.  */
16537   fix->next = NULL;
16538
16539   if (minipool_fix_head != NULL)
16540     minipool_fix_tail->next = fix;
16541   else
16542     minipool_fix_head = fix;
16543
16544   minipool_fix_tail = fix;
16545 }
16546
16547 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16548    Returns the number of insns needed, or 99 if we always want to synthesize
16549    the value.  */
16550 int
16551 arm_max_const_double_inline_cost ()
16552 {
16553   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16554 }
16555
16556 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16557    Returns the number of insns needed, or 99 if we don't know how to
16558    do it.  */
16559 int
16560 arm_const_double_inline_cost (rtx val)
16561 {
16562   rtx lowpart, highpart;
16563   machine_mode mode;
16564
16565   mode = GET_MODE (val);
16566
16567   if (mode == VOIDmode)
16568     mode = DImode;
16569
16570   gcc_assert (GET_MODE_SIZE (mode) == 8);
16571
16572   lowpart = gen_lowpart (SImode, val);
16573   highpart = gen_highpart_mode (SImode, mode, val);
16574
16575   gcc_assert (CONST_INT_P (lowpart));
16576   gcc_assert (CONST_INT_P (highpart));
16577
16578   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16579                             NULL_RTX, NULL_RTX, 0, 0)
16580           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16581                               NULL_RTX, NULL_RTX, 0, 0));
16582 }
16583
16584 /* Cost of loading a SImode constant.  */
16585 static inline int
16586 arm_const_inline_cost (enum rtx_code code, rtx val)
16587 {
16588   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16589                            NULL_RTX, NULL_RTX, 1, 0);
16590 }
16591
16592 /* Return true if it is worthwhile to split a 64-bit constant into two
16593    32-bit operations.  This is the case if optimizing for size, or
16594    if we have load delay slots, or if one 32-bit part can be done with
16595    a single data operation.  */
16596 bool
16597 arm_const_double_by_parts (rtx val)
16598 {
16599   machine_mode mode = GET_MODE (val);
16600   rtx part;
16601
16602   if (optimize_size || arm_ld_sched)
16603     return true;
16604
16605   if (mode == VOIDmode)
16606     mode = DImode;
16607
16608   part = gen_highpart_mode (SImode, mode, val);
16609
16610   gcc_assert (CONST_INT_P (part));
16611
16612   if (const_ok_for_arm (INTVAL (part))
16613       || const_ok_for_arm (~INTVAL (part)))
16614     return true;
16615
16616   part = gen_lowpart (SImode, val);
16617
16618   gcc_assert (CONST_INT_P (part));
16619
16620   if (const_ok_for_arm (INTVAL (part))
16621       || const_ok_for_arm (~INTVAL (part)))
16622     return true;
16623
16624   return false;
16625 }
16626
16627 /* Return true if it is possible to inline both the high and low parts
16628    of a 64-bit constant into 32-bit data processing instructions.  */
16629 bool
16630 arm_const_double_by_immediates (rtx val)
16631 {
16632   machine_mode mode = GET_MODE (val);
16633   rtx part;
16634
16635   if (mode == VOIDmode)
16636     mode = DImode;
16637
16638   part = gen_highpart_mode (SImode, mode, val);
16639
16640   gcc_assert (CONST_INT_P (part));
16641
16642   if (!const_ok_for_arm (INTVAL (part)))
16643     return false;
16644
16645   part = gen_lowpart (SImode, val);
16646
16647   gcc_assert (CONST_INT_P (part));
16648
16649   if (!const_ok_for_arm (INTVAL (part)))
16650     return false;
16651
16652   return true;
16653 }
16654
16655 /* Scan INSN and note any of its operands that need fixing.
16656    If DO_PUSHES is false we do not actually push any of the fixups
16657    needed.  */
16658 static void
16659 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16660 {
16661   int opno;
16662
16663   extract_constrain_insn (insn);
16664
16665   if (recog_data.n_alternatives == 0)
16666     return;
16667
16668   /* Fill in recog_op_alt with information about the constraints of
16669      this insn.  */
16670   preprocess_constraints (insn);
16671
16672   const operand_alternative *op_alt = which_op_alt ();
16673   for (opno = 0; opno < recog_data.n_operands; opno++)
16674     {
16675       /* Things we need to fix can only occur in inputs.  */
16676       if (recog_data.operand_type[opno] != OP_IN)
16677         continue;
16678
16679       /* If this alternative is a memory reference, then any mention
16680          of constants in this alternative is really to fool reload
16681          into allowing us to accept one there.  We need to fix them up
16682          now so that we output the right code.  */
16683       if (op_alt[opno].memory_ok)
16684         {
16685           rtx op = recog_data.operand[opno];
16686
16687           if (CONSTANT_P (op))
16688             {
16689               if (do_pushes)
16690                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16691                                    recog_data.operand_mode[opno], op);
16692             }
16693           else if (MEM_P (op)
16694                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16695                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16696             {
16697               if (do_pushes)
16698                 {
16699                   rtx cop = avoid_constant_pool_reference (op);
16700
16701                   /* Casting the address of something to a mode narrower
16702                      than a word can cause avoid_constant_pool_reference()
16703                      to return the pool reference itself.  That's no good to
16704                      us here.  Lets just hope that we can use the
16705                      constant pool value directly.  */
16706                   if (op == cop)
16707                     cop = get_pool_constant (XEXP (op, 0));
16708
16709                   push_minipool_fix (insn, address,
16710                                      recog_data.operand_loc[opno],
16711                                      recog_data.operand_mode[opno], cop);
16712                 }
16713
16714             }
16715         }
16716     }
16717
16718   return;
16719 }
16720
16721 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16722    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16723    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16724    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16725    or four masks, depending on whether it is being computed for a
16726    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16727    respectively.  The tree for the type of the argument or a field within an
16728    argument is passed in ARG_TYPE, the current register this argument or field
16729    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16730    argument or field starts at is passed in STARTING_BIT and the last used bit
16731    is kept in LAST_USED_BIT which is also updated accordingly.  */
16732
16733 static unsigned HOST_WIDE_INT
16734 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16735                                uint32_t * padding_bits_to_clear,
16736                                unsigned starting_bit, int * last_used_bit)
16737
16738 {
16739   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16740
16741   if (TREE_CODE (arg_type) == RECORD_TYPE)
16742     {
16743       unsigned current_bit = starting_bit;
16744       tree field;
16745       long int offset, size;
16746
16747
16748       field = TYPE_FIELDS (arg_type);
16749       while (field)
16750         {
16751           /* The offset within a structure is always an offset from
16752              the start of that structure.  Make sure we take that into the
16753              calculation of the register based offset that we use here.  */
16754           offset = starting_bit;
16755           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16756           offset %= 32;
16757
16758           /* This is the actual size of the field, for bitfields this is the
16759              bitfield width and not the container size.  */
16760           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16761
16762           if (*last_used_bit != offset)
16763             {
16764               if (offset < *last_used_bit)
16765                 {
16766                   /* This field's offset is before the 'last_used_bit', that
16767                      means this field goes on the next register.  So we need to
16768                      pad the rest of the current register and increase the
16769                      register number.  */
16770                   uint32_t mask;
16771                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16772                   mask++;
16773
16774                   padding_bits_to_clear[*regno] |= mask;
16775                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16776                   (*regno)++;
16777                 }
16778               else
16779                 {
16780                   /* Otherwise we pad the bits between the last field's end and
16781                      the start of the new field.  */
16782                   uint32_t mask;
16783
16784                   mask = ((uint32_t)-1) >> (32 - offset);
16785                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16786                   padding_bits_to_clear[*regno] |= mask;
16787                 }
16788               current_bit = offset;
16789             }
16790
16791           /* Calculate further padding bits for inner structs/unions too.  */
16792           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16793             {
16794               *last_used_bit = current_bit;
16795               not_to_clear_reg_mask
16796                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16797                                                   padding_bits_to_clear, offset,
16798                                                   last_used_bit);
16799             }
16800           else
16801             {
16802               /* Update 'current_bit' with this field's size.  If the
16803                  'current_bit' lies in a subsequent register, update 'regno' and
16804                  reset 'current_bit' to point to the current bit in that new
16805                  register.  */
16806               current_bit += size;
16807               while (current_bit >= 32)
16808                 {
16809                   current_bit-=32;
16810                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16811                   (*regno)++;
16812                 }
16813               *last_used_bit = current_bit;
16814             }
16815
16816           field = TREE_CHAIN (field);
16817         }
16818       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16819     }
16820   else if (TREE_CODE (arg_type) == UNION_TYPE)
16821     {
16822       tree field, field_t;
16823       int i, regno_t, field_size;
16824       int max_reg = -1;
16825       int max_bit = -1;
16826       uint32_t mask;
16827       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16828         = {-1, -1, -1, -1};
16829
16830       /* To compute the padding bits in a union we only consider bits as
16831          padding bits if they are always either a padding bit or fall outside a
16832          fields size for all fields in the union.  */
16833       field = TYPE_FIELDS (arg_type);
16834       while (field)
16835         {
16836           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16837             = {0U, 0U, 0U, 0U};
16838           int last_used_bit_t = *last_used_bit;
16839           regno_t = *regno;
16840           field_t = TREE_TYPE (field);
16841
16842           /* If the field's type is either a record or a union make sure to
16843              compute their padding bits too.  */
16844           if (RECORD_OR_UNION_TYPE_P (field_t))
16845             not_to_clear_reg_mask
16846               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16847                                                 &padding_bits_to_clear_t[0],
16848                                                 starting_bit, &last_used_bit_t);
16849           else
16850             {
16851               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16852               regno_t = (field_size / 32) + *regno;
16853               last_used_bit_t = (starting_bit + field_size) % 32;
16854             }
16855
16856           for (i = *regno; i < regno_t; i++)
16857             {
16858               /* For all but the last register used by this field only keep the
16859                  padding bits that were padding bits in this field.  */
16860               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16861             }
16862
16863             /* For the last register, keep all padding bits that were padding
16864                bits in this field and any padding bits that are still valid
16865                as padding bits but fall outside of this field's size.  */
16866             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16867             padding_bits_to_clear_res[regno_t]
16868               &= padding_bits_to_clear_t[regno_t] | mask;
16869
16870           /* Update the maximum size of the fields in terms of registers used
16871              ('max_reg') and the 'last_used_bit' in said register.  */
16872           if (max_reg < regno_t)
16873             {
16874               max_reg = regno_t;
16875               max_bit = last_used_bit_t;
16876             }
16877           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16878             max_bit = last_used_bit_t;
16879
16880           field = TREE_CHAIN (field);
16881         }
16882
16883       /* Update the current padding_bits_to_clear using the intersection of the
16884          padding bits of all the fields.  */
16885       for (i=*regno; i < max_reg; i++)
16886         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16887
16888       /* Do not keep trailing padding bits, we do not know yet whether this
16889          is the end of the argument.  */
16890       mask = ((uint32_t) 1 << max_bit) - 1;
16891       padding_bits_to_clear[max_reg]
16892         |= padding_bits_to_clear_res[max_reg] & mask;
16893
16894       *regno = max_reg;
16895       *last_used_bit = max_bit;
16896     }
16897   else
16898     /* This function should only be used for structs and unions.  */
16899     gcc_unreachable ();
16900
16901   return not_to_clear_reg_mask;
16902 }
16903
16904 /* In the context of ARMv8-M Security Extensions, this function is used for both
16905    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16906    registers are used when returning or passing arguments, which is then
16907    returned as a mask.  It will also compute a mask to indicate padding/unused
16908    bits for each of these registers, and passes this through the
16909    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16910    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16911    the starting register used to pass this argument or return value is passed
16912    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16913    for struct and union types.  */
16914
16915 static unsigned HOST_WIDE_INT
16916 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16917                              uint32_t * padding_bits_to_clear)
16918
16919 {
16920   int last_used_bit = 0;
16921   unsigned HOST_WIDE_INT not_to_clear_mask;
16922
16923   if (RECORD_OR_UNION_TYPE_P (arg_type))
16924     {
16925       not_to_clear_mask
16926         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16927                                          padding_bits_to_clear, 0,
16928                                          &last_used_bit);
16929
16930
16931       /* If the 'last_used_bit' is not zero, that means we are still using a
16932          part of the last 'regno'.  In such cases we must clear the trailing
16933          bits.  Otherwise we are not using regno and we should mark it as to
16934          clear.  */
16935       if (last_used_bit != 0)
16936         padding_bits_to_clear[regno]
16937           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16938       else
16939         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16940     }
16941   else
16942     {
16943       not_to_clear_mask = 0;
16944       /* We are not dealing with structs nor unions.  So these arguments may be
16945          passed in floating point registers too.  In some cases a BLKmode is
16946          used when returning or passing arguments in multiple VFP registers.  */
16947       if (GET_MODE (arg_rtx) == BLKmode)
16948         {
16949           int i, arg_regs;
16950           rtx reg;
16951
16952           /* This should really only occur when dealing with the hard-float
16953              ABI.  */
16954           gcc_assert (TARGET_HARD_FLOAT_ABI);
16955
16956           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16957             {
16958               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16959               gcc_assert (REG_P (reg));
16960
16961               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16962
16963               /* If we are dealing with DF mode, make sure we don't
16964                  clear either of the registers it addresses.  */
16965               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16966               if (arg_regs > 1)
16967                 {
16968                   unsigned HOST_WIDE_INT mask;
16969                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16970                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16971                   not_to_clear_mask |= mask;
16972                 }
16973             }
16974         }
16975       else
16976         {
16977           /* Otherwise we can rely on the MODE to determine how many registers
16978              are being used by this argument.  */
16979           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16980           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16981           if (arg_regs > 1)
16982             {
16983               unsigned HOST_WIDE_INT
16984               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16985               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16986               not_to_clear_mask |= mask;
16987             }
16988         }
16989     }
16990
16991   return not_to_clear_mask;
16992 }
16993
16994 /* Clears caller saved registers not used to pass arguments before a
16995    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16996    registers is done in __gnu_cmse_nonsecure_call libcall.
16997    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16998
16999 static void
17000 cmse_nonsecure_call_clear_caller_saved (void)
17001 {
17002   basic_block bb;
17003
17004   FOR_EACH_BB_FN (bb, cfun)
17005     {
17006       rtx_insn *insn;
17007
17008       FOR_BB_INSNS (bb, insn)
17009         {
17010           uint64_t to_clear_mask, float_mask;
17011           rtx_insn *seq;
17012           rtx pat, call, unspec, reg, cleared_reg, tmp;
17013           unsigned int regno, maxregno;
17014           rtx address;
17015           CUMULATIVE_ARGS args_so_far_v;
17016           cumulative_args_t args_so_far;
17017           tree arg_type, fntype;
17018           bool using_r4, first_param = true;
17019           function_args_iterator args_iter;
17020           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17021           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17022
17023           if (!NONDEBUG_INSN_P (insn))
17024             continue;
17025
17026           if (!CALL_P (insn))
17027             continue;
17028
17029           pat = PATTERN (insn);
17030           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17031           call = XVECEXP (pat, 0, 0);
17032
17033           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17034           if (GET_CODE (call) == SET)
17035               call = SET_SRC (call);
17036
17037           /* Check if it is a cmse_nonsecure_call.  */
17038           unspec = XEXP (call, 0);
17039           if (GET_CODE (unspec) != UNSPEC
17040               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17041             continue;
17042
17043           /* Determine the caller-saved registers we need to clear.  */
17044           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17045           maxregno = NUM_ARG_REGS - 1;
17046           /* Only look at the caller-saved floating point registers in case of
17047              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17048              lazy store and loads which clear both caller- and callee-saved
17049              registers.  */
17050           if (TARGET_HARD_FLOAT_ABI)
17051             {
17052               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17053               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17054               to_clear_mask |= float_mask;
17055               maxregno = D7_VFP_REGNUM;
17056             }
17057
17058           /* Make sure the register used to hold the function address is not
17059              cleared.  */
17060           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17061           gcc_assert (MEM_P (address));
17062           gcc_assert (REG_P (XEXP (address, 0)));
17063           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17064
17065           /* Set basic block of call insn so that df rescan is performed on
17066              insns inserted here.  */
17067           set_block_for_insn (insn, bb);
17068           df_set_flags (DF_DEFER_INSN_RESCAN);
17069           start_sequence ();
17070
17071           /* Make sure the scheduler doesn't schedule other insns beyond
17072              here.  */
17073           emit_insn (gen_blockage ());
17074
17075           /* Walk through all arguments and clear registers appropriately.
17076           */
17077           fntype = TREE_TYPE (MEM_EXPR (address));
17078           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17079                                     NULL_TREE);
17080           args_so_far = pack_cumulative_args (&args_so_far_v);
17081           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17082             {
17083               rtx arg_rtx;
17084               machine_mode arg_mode = TYPE_MODE (arg_type);
17085
17086               if (VOID_TYPE_P (arg_type))
17087                 continue;
17088
17089               if (!first_param)
17090                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17091                                           true);
17092
17093               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17094                                           true);
17095               gcc_assert (REG_P (arg_rtx));
17096               to_clear_mask
17097                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17098                                                REGNO (arg_rtx),
17099                                                padding_bits_to_clear_ptr);
17100
17101               first_param = false;
17102             }
17103
17104           /* Clear padding bits where needed.  */
17105           cleared_reg = XEXP (address, 0);
17106           reg = gen_rtx_REG (SImode, IP_REGNUM);
17107           using_r4 = false;
17108           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17109             {
17110               if (padding_bits_to_clear[regno] == 0)
17111                 continue;
17112
17113               /* If this is a Thumb-1 target copy the address of the function
17114                  we are calling from 'r4' into 'ip' such that we can use r4 to
17115                  clear the unused bits in the arguments.  */
17116               if (TARGET_THUMB1 && !using_r4)
17117                 {
17118                   using_r4 =  true;
17119                   reg = cleared_reg;
17120                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17121                                           reg);
17122                 }
17123
17124               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17125               emit_move_insn (reg, tmp);
17126               /* Also fill the top half of the negated
17127                  padding_bits_to_clear.  */
17128               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17129                 {
17130                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17131                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17132                                                                 GEN_INT (16),
17133                                                                 GEN_INT (16)),
17134                                           tmp));
17135                 }
17136
17137               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17138                                      gen_rtx_REG (SImode, regno),
17139                                      reg));
17140
17141             }
17142           if (using_r4)
17143             emit_move_insn (cleared_reg,
17144                             gen_rtx_REG (SImode, IP_REGNUM));
17145
17146           /* We use right shift and left shift to clear the LSB of the address
17147              we jump to instead of using bic, to avoid having to use an extra
17148              register on Thumb-1.  */
17149           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17150           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17151           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17152           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17153
17154           /* Clearing all registers that leak before doing a non-secure
17155              call.  */
17156           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17157             {
17158               if (!(to_clear_mask & (1LL << regno)))
17159                 continue;
17160
17161               /* If regno is an even vfp register and its successor is also to
17162                  be cleared, use vmov.  */
17163               if (IS_VFP_REGNUM (regno))
17164                 {
17165                   if (TARGET_VFP_DOUBLE
17166                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17167                       && to_clear_mask & (1LL << (regno + 1)))
17168                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17169                                     CONST0_RTX (DFmode));
17170                   else
17171                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17172                                     CONST0_RTX (SFmode));
17173                 }
17174               else
17175                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17176             }
17177
17178           seq = get_insns ();
17179           end_sequence ();
17180           emit_insn_before (seq, insn);
17181
17182         }
17183     }
17184 }
17185
17186 /* Rewrite move insn into subtract of 0 if the condition codes will
17187    be useful in next conditional jump insn.  */
17188
17189 static void
17190 thumb1_reorg (void)
17191 {
17192   basic_block bb;
17193
17194   FOR_EACH_BB_FN (bb, cfun)
17195     {
17196       rtx dest, src;
17197       rtx cmp, op0, op1, set = NULL;
17198       rtx_insn *prev, *insn = BB_END (bb);
17199       bool insn_clobbered = false;
17200
17201       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17202         insn = PREV_INSN (insn);
17203
17204       /* Find the last cbranchsi4_insn in basic block BB.  */
17205       if (insn == BB_HEAD (bb)
17206           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17207         continue;
17208
17209       /* Get the register with which we are comparing.  */
17210       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17211       op0 = XEXP (cmp, 0);
17212       op1 = XEXP (cmp, 1);
17213
17214       /* Check that comparison is against ZERO.  */
17215       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17216         continue;
17217
17218       /* Find the first flag setting insn before INSN in basic block BB.  */
17219       gcc_assert (insn != BB_HEAD (bb));
17220       for (prev = PREV_INSN (insn);
17221            (!insn_clobbered
17222             && prev != BB_HEAD (bb)
17223             && (NOTE_P (prev)
17224                 || DEBUG_INSN_P (prev)
17225                 || ((set = single_set (prev)) != NULL
17226                     && get_attr_conds (prev) == CONDS_NOCOND)));
17227            prev = PREV_INSN (prev))
17228         {
17229           if (reg_set_p (op0, prev))
17230             insn_clobbered = true;
17231         }
17232
17233       /* Skip if op0 is clobbered by insn other than prev. */
17234       if (insn_clobbered)
17235         continue;
17236
17237       if (!set)
17238         continue;
17239
17240       dest = SET_DEST (set);
17241       src = SET_SRC (set);
17242       if (!low_register_operand (dest, SImode)
17243           || !low_register_operand (src, SImode))
17244         continue;
17245
17246       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17247          in INSN.  Both src and dest of the move insn are checked.  */
17248       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17249         {
17250           dest = copy_rtx (dest);
17251           src = copy_rtx (src);
17252           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17253           PATTERN (prev) = gen_rtx_SET (dest, src);
17254           INSN_CODE (prev) = -1;
17255           /* Set test register in INSN to dest.  */
17256           XEXP (cmp, 0) = copy_rtx (dest);
17257           INSN_CODE (insn) = -1;
17258         }
17259     }
17260 }
17261
17262 /* Convert instructions to their cc-clobbering variant if possible, since
17263    that allows us to use smaller encodings.  */
17264
17265 static void
17266 thumb2_reorg (void)
17267 {
17268   basic_block bb;
17269   regset_head live;
17270
17271   INIT_REG_SET (&live);
17272
17273   /* We are freeing block_for_insn in the toplev to keep compatibility
17274      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17275   compute_bb_for_insn ();
17276   df_analyze ();
17277
17278   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17279
17280   FOR_EACH_BB_FN (bb, cfun)
17281     {
17282       if ((current_tune->disparage_flag_setting_t16_encodings
17283            == tune_params::DISPARAGE_FLAGS_ALL)
17284           && optimize_bb_for_speed_p (bb))
17285         continue;
17286
17287       rtx_insn *insn;
17288       Convert_Action action = SKIP;
17289       Convert_Action action_for_partial_flag_setting
17290         = ((current_tune->disparage_flag_setting_t16_encodings
17291             != tune_params::DISPARAGE_FLAGS_NEITHER)
17292            && optimize_bb_for_speed_p (bb))
17293           ? SKIP : CONV;
17294
17295       COPY_REG_SET (&live, DF_LR_OUT (bb));
17296       df_simulate_initialize_backwards (bb, &live);
17297       FOR_BB_INSNS_REVERSE (bb, insn)
17298         {
17299           if (NONJUMP_INSN_P (insn)
17300               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17301               && GET_CODE (PATTERN (insn)) == SET)
17302             {
17303               action = SKIP;
17304               rtx pat = PATTERN (insn);
17305               rtx dst = XEXP (pat, 0);
17306               rtx src = XEXP (pat, 1);
17307               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17308
17309               if (UNARY_P (src) || BINARY_P (src))
17310                   op0 = XEXP (src, 0);
17311
17312               if (BINARY_P (src))
17313                   op1 = XEXP (src, 1);
17314
17315               if (low_register_operand (dst, SImode))
17316                 {
17317                   switch (GET_CODE (src))
17318                     {
17319                     case PLUS:
17320                       /* Adding two registers and storing the result
17321                          in the first source is already a 16-bit
17322                          operation.  */
17323                       if (rtx_equal_p (dst, op0)
17324                           && register_operand (op1, SImode))
17325                         break;
17326
17327                       if (low_register_operand (op0, SImode))
17328                         {
17329                           /* ADDS <Rd>,<Rn>,<Rm>  */
17330                           if (low_register_operand (op1, SImode))
17331                             action = CONV;
17332                           /* ADDS <Rdn>,#<imm8>  */
17333                           /* SUBS <Rdn>,#<imm8>  */
17334                           else if (rtx_equal_p (dst, op0)
17335                                    && CONST_INT_P (op1)
17336                                    && IN_RANGE (INTVAL (op1), -255, 255))
17337                             action = CONV;
17338                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17339                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17340                           else if (CONST_INT_P (op1)
17341                                    && IN_RANGE (INTVAL (op1), -7, 7))
17342                             action = CONV;
17343                         }
17344                       /* ADCS <Rd>, <Rn>  */
17345                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17346                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17347                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17348                                                        SImode)
17349                               && COMPARISON_P (op1)
17350                               && cc_register (XEXP (op1, 0), VOIDmode)
17351                               && maybe_get_arm_condition_code (op1) == ARM_CS
17352                               && XEXP (op1, 1) == const0_rtx)
17353                         action = CONV;
17354                       break;
17355
17356                     case MINUS:
17357                       /* RSBS <Rd>,<Rn>,#0
17358                          Not handled here: see NEG below.  */
17359                       /* SUBS <Rd>,<Rn>,#<imm3>
17360                          SUBS <Rdn>,#<imm8>
17361                          Not handled here: see PLUS above.  */
17362                       /* SUBS <Rd>,<Rn>,<Rm>  */
17363                       if (low_register_operand (op0, SImode)
17364                           && low_register_operand (op1, SImode))
17365                             action = CONV;
17366                       break;
17367
17368                     case MULT:
17369                       /* MULS <Rdm>,<Rn>,<Rdm>
17370                          As an exception to the rule, this is only used
17371                          when optimizing for size since MULS is slow on all
17372                          known implementations.  We do not even want to use
17373                          MULS in cold code, if optimizing for speed, so we
17374                          test the global flag here.  */
17375                       if (!optimize_size)
17376                         break;
17377                       /* Fall through.  */
17378                     case AND:
17379                     case IOR:
17380                     case XOR:
17381                       /* ANDS <Rdn>,<Rm>  */
17382                       if (rtx_equal_p (dst, op0)
17383                           && low_register_operand (op1, SImode))
17384                         action = action_for_partial_flag_setting;
17385                       else if (rtx_equal_p (dst, op1)
17386                                && low_register_operand (op0, SImode))
17387                         action = action_for_partial_flag_setting == SKIP
17388                                  ? SKIP : SWAP_CONV;
17389                       break;
17390
17391                     case ASHIFTRT:
17392                     case ASHIFT:
17393                     case LSHIFTRT:
17394                       /* ASRS <Rdn>,<Rm> */
17395                       /* LSRS <Rdn>,<Rm> */
17396                       /* LSLS <Rdn>,<Rm> */
17397                       if (rtx_equal_p (dst, op0)
17398                           && low_register_operand (op1, SImode))
17399                         action = action_for_partial_flag_setting;
17400                       /* ASRS <Rd>,<Rm>,#<imm5> */
17401                       /* LSRS <Rd>,<Rm>,#<imm5> */
17402                       /* LSLS <Rd>,<Rm>,#<imm5> */
17403                       else if (low_register_operand (op0, SImode)
17404                                && CONST_INT_P (op1)
17405                                && IN_RANGE (INTVAL (op1), 0, 31))
17406                         action = action_for_partial_flag_setting;
17407                       break;
17408
17409                     case ROTATERT:
17410                       /* RORS <Rdn>,<Rm>  */
17411                       if (rtx_equal_p (dst, op0)
17412                           && low_register_operand (op1, SImode))
17413                         action = action_for_partial_flag_setting;
17414                       break;
17415
17416                     case NOT:
17417                       /* MVNS <Rd>,<Rm>  */
17418                       if (low_register_operand (op0, SImode))
17419                         action = action_for_partial_flag_setting;
17420                       break;
17421
17422                     case NEG:
17423                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17424                       if (low_register_operand (op0, SImode))
17425                         action = CONV;
17426                       break;
17427
17428                     case CONST_INT:
17429                       /* MOVS <Rd>,#<imm8>  */
17430                       if (CONST_INT_P (src)
17431                           && IN_RANGE (INTVAL (src), 0, 255))
17432                         action = action_for_partial_flag_setting;
17433                       break;
17434
17435                     case REG:
17436                       /* MOVS and MOV<c> with registers have different
17437                          encodings, so are not relevant here.  */
17438                       break;
17439
17440                     default:
17441                       break;
17442                     }
17443                 }
17444
17445               if (action != SKIP)
17446                 {
17447                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17448                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17449                   rtvec vec;
17450
17451                   if (action == SWAP_CONV)
17452                     {
17453                       src = copy_rtx (src);
17454                       XEXP (src, 0) = op1;
17455                       XEXP (src, 1) = op0;
17456                       pat = gen_rtx_SET (dst, src);
17457                       vec = gen_rtvec (2, pat, clobber);
17458                     }
17459                   else /* action == CONV */
17460                     vec = gen_rtvec (2, pat, clobber);
17461
17462                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17463                   INSN_CODE (insn) = -1;
17464                 }
17465             }
17466
17467           if (NONDEBUG_INSN_P (insn))
17468             df_simulate_one_insn_backwards (bb, insn, &live);
17469         }
17470     }
17471
17472   CLEAR_REG_SET (&live);
17473 }
17474
17475 /* Gcc puts the pool in the wrong place for ARM, since we can only
17476    load addresses a limited distance around the pc.  We do some
17477    special munging to move the constant pool values to the correct
17478    point in the code.  */
17479 static void
17480 arm_reorg (void)
17481 {
17482   rtx_insn *insn;
17483   HOST_WIDE_INT address = 0;
17484   Mfix * fix;
17485
17486   if (use_cmse)
17487     cmse_nonsecure_call_clear_caller_saved ();
17488   if (TARGET_THUMB1)
17489     thumb1_reorg ();
17490   else if (TARGET_THUMB2)
17491     thumb2_reorg ();
17492
17493   /* Ensure all insns that must be split have been split at this point.
17494      Otherwise, the pool placement code below may compute incorrect
17495      insn lengths.  Note that when optimizing, all insns have already
17496      been split at this point.  */
17497   if (!optimize)
17498     split_all_insns_noflow ();
17499
17500   /* Make sure we do not attempt to create a literal pool even though it should
17501      no longer be necessary to create any.  */
17502   if (arm_disable_literal_pool)
17503     return ;
17504
17505   minipool_fix_head = minipool_fix_tail = NULL;
17506
17507   /* The first insn must always be a note, or the code below won't
17508      scan it properly.  */
17509   insn = get_insns ();
17510   gcc_assert (NOTE_P (insn));
17511   minipool_pad = 0;
17512
17513   /* Scan all the insns and record the operands that will need fixing.  */
17514   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17515     {
17516       if (BARRIER_P (insn))
17517         push_minipool_barrier (insn, address);
17518       else if (INSN_P (insn))
17519         {
17520           rtx_jump_table_data *table;
17521
17522           note_invalid_constants (insn, address, true);
17523           address += get_attr_length (insn);
17524
17525           /* If the insn is a vector jump, add the size of the table
17526              and skip the table.  */
17527           if (tablejump_p (insn, NULL, &table))
17528             {
17529               address += get_jump_table_size (table);
17530               insn = table;
17531             }
17532         }
17533       else if (LABEL_P (insn))
17534         /* Add the worst-case padding due to alignment.  We don't add
17535            the _current_ padding because the minipool insertions
17536            themselves might change it.  */
17537         address += get_label_padding (insn);
17538     }
17539
17540   fix = minipool_fix_head;
17541
17542   /* Now scan the fixups and perform the required changes.  */
17543   while (fix)
17544     {
17545       Mfix * ftmp;
17546       Mfix * fdel;
17547       Mfix *  last_added_fix;
17548       Mfix * last_barrier = NULL;
17549       Mfix * this_fix;
17550
17551       /* Skip any further barriers before the next fix.  */
17552       while (fix && BARRIER_P (fix->insn))
17553         fix = fix->next;
17554
17555       /* No more fixes.  */
17556       if (fix == NULL)
17557         break;
17558
17559       last_added_fix = NULL;
17560
17561       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17562         {
17563           if (BARRIER_P (ftmp->insn))
17564             {
17565               if (ftmp->address >= minipool_vector_head->max_address)
17566                 break;
17567
17568               last_barrier = ftmp;
17569             }
17570           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17571             break;
17572
17573           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17574         }
17575
17576       /* If we found a barrier, drop back to that; any fixes that we
17577          could have reached but come after the barrier will now go in
17578          the next mini-pool.  */
17579       if (last_barrier != NULL)
17580         {
17581           /* Reduce the refcount for those fixes that won't go into this
17582              pool after all.  */
17583           for (fdel = last_barrier->next;
17584                fdel && fdel != ftmp;
17585                fdel = fdel->next)
17586             {
17587               fdel->minipool->refcount--;
17588               fdel->minipool = NULL;
17589             }
17590
17591           ftmp = last_barrier;
17592         }
17593       else
17594         {
17595           /* ftmp is first fix that we can't fit into this pool and
17596              there no natural barriers that we could use.  Insert a
17597              new barrier in the code somewhere between the previous
17598              fix and this one, and arrange to jump around it.  */
17599           HOST_WIDE_INT max_address;
17600
17601           /* The last item on the list of fixes must be a barrier, so
17602              we can never run off the end of the list of fixes without
17603              last_barrier being set.  */
17604           gcc_assert (ftmp);
17605
17606           max_address = minipool_vector_head->max_address;
17607           /* Check that there isn't another fix that is in range that
17608              we couldn't fit into this pool because the pool was
17609              already too large: we need to put the pool before such an
17610              instruction.  The pool itself may come just after the
17611              fix because create_fix_barrier also allows space for a
17612              jump instruction.  */
17613           if (ftmp->address < max_address)
17614             max_address = ftmp->address + 1;
17615
17616           last_barrier = create_fix_barrier (last_added_fix, max_address);
17617         }
17618
17619       assign_minipool_offsets (last_barrier);
17620
17621       while (ftmp)
17622         {
17623           if (!BARRIER_P (ftmp->insn)
17624               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17625                   == NULL))
17626             break;
17627
17628           ftmp = ftmp->next;
17629         }
17630
17631       /* Scan over the fixes we have identified for this pool, fixing them
17632          up and adding the constants to the pool itself.  */
17633       for (this_fix = fix; this_fix && ftmp != this_fix;
17634            this_fix = this_fix->next)
17635         if (!BARRIER_P (this_fix->insn))
17636           {
17637             rtx addr
17638               = plus_constant (Pmode,
17639                                gen_rtx_LABEL_REF (VOIDmode,
17640                                                   minipool_vector_label),
17641                                this_fix->minipool->offset);
17642             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17643           }
17644
17645       dump_minipool (last_barrier->insn);
17646       fix = ftmp;
17647     }
17648
17649   /* From now on we must synthesize any constants that we can't handle
17650      directly.  This can happen if the RTL gets split during final
17651      instruction generation.  */
17652   cfun->machine->after_arm_reorg = 1;
17653
17654   /* Free the minipool memory.  */
17655   obstack_free (&minipool_obstack, minipool_startobj);
17656 }
17657 \f
17658 /* Routines to output assembly language.  */
17659
17660 /* Return string representation of passed in real value.  */
17661 static const char *
17662 fp_const_from_val (REAL_VALUE_TYPE *r)
17663 {
17664   if (!fp_consts_inited)
17665     init_fp_table ();
17666
17667   gcc_assert (real_equal (r, &value_fp0));
17668   return "0";
17669 }
17670
17671 /* OPERANDS[0] is the entire list of insns that constitute pop,
17672    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17673    is in the list, UPDATE is true iff the list contains explicit
17674    update of base register.  */
17675 void
17676 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17677                          bool update)
17678 {
17679   int i;
17680   char pattern[100];
17681   int offset;
17682   const char *conditional;
17683   int num_saves = XVECLEN (operands[0], 0);
17684   unsigned int regno;
17685   unsigned int regno_base = REGNO (operands[1]);
17686   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17687
17688   offset = 0;
17689   offset += update ? 1 : 0;
17690   offset += return_pc ? 1 : 0;
17691
17692   /* Is the base register in the list?  */
17693   for (i = offset; i < num_saves; i++)
17694     {
17695       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17696       /* If SP is in the list, then the base register must be SP.  */
17697       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17698       /* If base register is in the list, there must be no explicit update.  */
17699       if (regno == regno_base)
17700         gcc_assert (!update);
17701     }
17702
17703   conditional = reverse ? "%?%D0" : "%?%d0";
17704   /* Can't use POP if returning from an interrupt.  */
17705   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17706     sprintf (pattern, "pop%s\t{", conditional);
17707   else
17708     {
17709       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17710          It's just a convention, their semantics are identical.  */
17711       if (regno_base == SP_REGNUM)
17712         sprintf (pattern, "ldmfd%s\t", conditional);
17713       else if (update)
17714         sprintf (pattern, "ldmia%s\t", conditional);
17715       else
17716         sprintf (pattern, "ldm%s\t", conditional);
17717
17718       strcat (pattern, reg_names[regno_base]);
17719       if (update)
17720         strcat (pattern, "!, {");
17721       else
17722         strcat (pattern, ", {");
17723     }
17724
17725   /* Output the first destination register.  */
17726   strcat (pattern,
17727           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17728
17729   /* Output the rest of the destination registers.  */
17730   for (i = offset + 1; i < num_saves; i++)
17731     {
17732       strcat (pattern, ", ");
17733       strcat (pattern,
17734               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17735     }
17736
17737   strcat (pattern, "}");
17738
17739   if (interrupt_p && return_pc)
17740     strcat (pattern, "^");
17741
17742   output_asm_insn (pattern, &cond);
17743 }
17744
17745
17746 /* Output the assembly for a store multiple.  */
17747
17748 const char *
17749 vfp_output_vstmd (rtx * operands)
17750 {
17751   char pattern[100];
17752   int p;
17753   int base;
17754   int i;
17755   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17756                    ? XEXP (operands[0], 0)
17757                    : XEXP (XEXP (operands[0], 0), 0);
17758   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17759
17760   if (push_p)
17761     strcpy (pattern, "vpush%?.64\t{%P1");
17762   else
17763     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17764
17765   p = strlen (pattern);
17766
17767   gcc_assert (REG_P (operands[1]));
17768
17769   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17770   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17771     {
17772       p += sprintf (&pattern[p], ", d%d", base + i);
17773     }
17774   strcpy (&pattern[p], "}");
17775
17776   output_asm_insn (pattern, operands);
17777   return "";
17778 }
17779
17780
17781 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17782    number of bytes pushed.  */
17783
17784 static int
17785 vfp_emit_fstmd (int base_reg, int count)
17786 {
17787   rtx par;
17788   rtx dwarf;
17789   rtx tmp, reg;
17790   int i;
17791
17792   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17793      register pairs are stored by a store multiple insn.  We avoid this
17794      by pushing an extra pair.  */
17795   if (count == 2 && !arm_arch6)
17796     {
17797       if (base_reg == LAST_VFP_REGNUM - 3)
17798         base_reg -= 2;
17799       count++;
17800     }
17801
17802   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17803      larger stores into multiple parts (up to a maximum of two, in
17804      practice).  */
17805   if (count > 16)
17806     {
17807       int saved;
17808       /* NOTE: base_reg is an internal register number, so each D register
17809          counts as 2.  */
17810       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17811       saved += vfp_emit_fstmd (base_reg, 16);
17812       return saved;
17813     }
17814
17815   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17816   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17817
17818   reg = gen_rtx_REG (DFmode, base_reg);
17819   base_reg += 2;
17820
17821   XVECEXP (par, 0, 0)
17822     = gen_rtx_SET (gen_frame_mem
17823                    (BLKmode,
17824                     gen_rtx_PRE_MODIFY (Pmode,
17825                                         stack_pointer_rtx,
17826                                         plus_constant
17827                                         (Pmode, stack_pointer_rtx,
17828                                          - (count * 8)))
17829                     ),
17830                    gen_rtx_UNSPEC (BLKmode,
17831                                    gen_rtvec (1, reg),
17832                                    UNSPEC_PUSH_MULT));
17833
17834   tmp = gen_rtx_SET (stack_pointer_rtx,
17835                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17836   RTX_FRAME_RELATED_P (tmp) = 1;
17837   XVECEXP (dwarf, 0, 0) = tmp;
17838
17839   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17840   RTX_FRAME_RELATED_P (tmp) = 1;
17841   XVECEXP (dwarf, 0, 1) = tmp;
17842
17843   for (i = 1; i < count; i++)
17844     {
17845       reg = gen_rtx_REG (DFmode, base_reg);
17846       base_reg += 2;
17847       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17848
17849       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17850                                         plus_constant (Pmode,
17851                                                        stack_pointer_rtx,
17852                                                        i * 8)),
17853                          reg);
17854       RTX_FRAME_RELATED_P (tmp) = 1;
17855       XVECEXP (dwarf, 0, i + 1) = tmp;
17856     }
17857
17858   par = emit_insn (par);
17859   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17860   RTX_FRAME_RELATED_P (par) = 1;
17861
17862   return count * 8;
17863 }
17864
17865 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17866    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17867
17868 bool
17869 detect_cmse_nonsecure_call (tree addr)
17870 {
17871   if (!addr)
17872     return FALSE;
17873
17874   tree fntype = TREE_TYPE (addr);
17875   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17876                                     TYPE_ATTRIBUTES (fntype)))
17877     return TRUE;
17878   return FALSE;
17879 }
17880
17881
17882 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17883    the call target.  */
17884
17885 void
17886 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17887 {
17888   rtx insn;
17889
17890   insn = emit_call_insn (pat);
17891
17892   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17893      If the call might use such an entry, add a use of the PIC register
17894      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17895   if (TARGET_VXWORKS_RTP
17896       && flag_pic
17897       && !sibcall
17898       && GET_CODE (addr) == SYMBOL_REF
17899       && (SYMBOL_REF_DECL (addr)
17900           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17901           : !SYMBOL_REF_LOCAL_P (addr)))
17902     {
17903       require_pic_register ();
17904       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17905     }
17906
17907   if (TARGET_AAPCS_BASED)
17908     {
17909       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17910          linker.  We need to add an IP clobber to allow setting
17911          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17912          is not needed since it's a fixed register.  */
17913       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17914       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17915     }
17916 }
17917
17918 /* Output a 'call' insn.  */
17919 const char *
17920 output_call (rtx *operands)
17921 {
17922   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17923
17924   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17925   if (REGNO (operands[0]) == LR_REGNUM)
17926     {
17927       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17928       output_asm_insn ("mov%?\t%0, %|lr", operands);
17929     }
17930
17931   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17932
17933   if (TARGET_INTERWORK || arm_arch4t)
17934     output_asm_insn ("bx%?\t%0", operands);
17935   else
17936     output_asm_insn ("mov%?\t%|pc, %0", operands);
17937
17938   return "";
17939 }
17940
17941 /* Output a move from arm registers to arm registers of a long double
17942    OPERANDS[0] is the destination.
17943    OPERANDS[1] is the source.  */
17944 const char *
17945 output_mov_long_double_arm_from_arm (rtx *operands)
17946 {
17947   /* We have to be careful here because the two might overlap.  */
17948   int dest_start = REGNO (operands[0]);
17949   int src_start = REGNO (operands[1]);
17950   rtx ops[2];
17951   int i;
17952
17953   if (dest_start < src_start)
17954     {
17955       for (i = 0; i < 3; i++)
17956         {
17957           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17958           ops[1] = gen_rtx_REG (SImode, src_start + i);
17959           output_asm_insn ("mov%?\t%0, %1", ops);
17960         }
17961     }
17962   else
17963     {
17964       for (i = 2; i >= 0; i--)
17965         {
17966           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17967           ops[1] = gen_rtx_REG (SImode, src_start + i);
17968           output_asm_insn ("mov%?\t%0, %1", ops);
17969         }
17970     }
17971
17972   return "";
17973 }
17974
17975 void
17976 arm_emit_movpair (rtx dest, rtx src)
17977  {
17978   /* If the src is an immediate, simplify it.  */
17979   if (CONST_INT_P (src))
17980     {
17981       HOST_WIDE_INT val = INTVAL (src);
17982       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17983       if ((val >> 16) & 0x0000ffff)
17984         {
17985           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17986                                                GEN_INT (16)),
17987                          GEN_INT ((val >> 16) & 0x0000ffff));
17988           rtx_insn *insn = get_last_insn ();
17989           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17990         }
17991       return;
17992     }
17993    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17994    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17995    rtx_insn *insn = get_last_insn ();
17996    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17997  }
17998
17999 /* Output a move between double words.  It must be REG<-MEM
18000    or MEM<-REG.  */
18001 const char *
18002 output_move_double (rtx *operands, bool emit, int *count)
18003 {
18004   enum rtx_code code0 = GET_CODE (operands[0]);
18005   enum rtx_code code1 = GET_CODE (operands[1]);
18006   rtx otherops[3];
18007   if (count)
18008     *count = 1;
18009
18010   /* The only case when this might happen is when
18011      you are looking at the length of a DImode instruction
18012      that has an invalid constant in it.  */
18013   if (code0 == REG && code1 != MEM)
18014     {
18015       gcc_assert (!emit);
18016       *count = 2;
18017       return "";
18018     }
18019
18020   if (code0 == REG)
18021     {
18022       unsigned int reg0 = REGNO (operands[0]);
18023
18024       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18025
18026       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18027
18028       switch (GET_CODE (XEXP (operands[1], 0)))
18029         {
18030         case REG:
18031
18032           if (emit)
18033             {
18034               if (TARGET_LDRD
18035                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18036                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18037               else
18038                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18039             }
18040           break;
18041
18042         case PRE_INC:
18043           gcc_assert (TARGET_LDRD);
18044           if (emit)
18045             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18046           break;
18047
18048         case PRE_DEC:
18049           if (emit)
18050             {
18051               if (TARGET_LDRD)
18052                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18053               else
18054                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18055             }
18056           break;
18057
18058         case POST_INC:
18059           if (emit)
18060             {
18061               if (TARGET_LDRD)
18062                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18063               else
18064                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18065             }
18066           break;
18067
18068         case POST_DEC:
18069           gcc_assert (TARGET_LDRD);
18070           if (emit)
18071             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18072           break;
18073
18074         case PRE_MODIFY:
18075         case POST_MODIFY:
18076           /* Autoicrement addressing modes should never have overlapping
18077              base and destination registers, and overlapping index registers
18078              are already prohibited, so this doesn't need to worry about
18079              fix_cm3_ldrd.  */
18080           otherops[0] = operands[0];
18081           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18082           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18083
18084           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18085             {
18086               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18087                 {
18088                   /* Registers overlap so split out the increment.  */
18089                   if (emit)
18090                     {
18091                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18092                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18093                     }
18094                   if (count)
18095                     *count = 2;
18096                 }
18097               else
18098                 {
18099                   /* Use a single insn if we can.
18100                      FIXME: IWMMXT allows offsets larger than ldrd can
18101                      handle, fix these up with a pair of ldr.  */
18102                   if (TARGET_THUMB2
18103                       || !CONST_INT_P (otherops[2])
18104                       || (INTVAL (otherops[2]) > -256
18105                           && INTVAL (otherops[2]) < 256))
18106                     {
18107                       if (emit)
18108                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18109                     }
18110                   else
18111                     {
18112                       if (emit)
18113                         {
18114                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18115                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18116                         }
18117                       if (count)
18118                         *count = 2;
18119
18120                     }
18121                 }
18122             }
18123           else
18124             {
18125               /* Use a single insn if we can.
18126                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18127                  fix these up with a pair of ldr.  */
18128               if (TARGET_THUMB2
18129                   || !CONST_INT_P (otherops[2])
18130                   || (INTVAL (otherops[2]) > -256
18131                       && INTVAL (otherops[2]) < 256))
18132                 {
18133                   if (emit)
18134                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18135                 }
18136               else
18137                 {
18138                   if (emit)
18139                     {
18140                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18141                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18142                     }
18143                   if (count)
18144                     *count = 2;
18145                 }
18146             }
18147           break;
18148
18149         case LABEL_REF:
18150         case CONST:
18151           /* We might be able to use ldrd %0, %1 here.  However the range is
18152              different to ldr/adr, and it is broken on some ARMv7-M
18153              implementations.  */
18154           /* Use the second register of the pair to avoid problematic
18155              overlap.  */
18156           otherops[1] = operands[1];
18157           if (emit)
18158             output_asm_insn ("adr%?\t%0, %1", otherops);
18159           operands[1] = otherops[0];
18160           if (emit)
18161             {
18162               if (TARGET_LDRD)
18163                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18164               else
18165                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18166             }
18167
18168           if (count)
18169             *count = 2;
18170           break;
18171
18172           /* ??? This needs checking for thumb2.  */
18173         default:
18174           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18175                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18176             {
18177               otherops[0] = operands[0];
18178               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18179               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18180
18181               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18182                 {
18183                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18184                     {
18185                       switch ((int) INTVAL (otherops[2]))
18186                         {
18187                         case -8:
18188                           if (emit)
18189                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18190                           return "";
18191                         case -4:
18192                           if (TARGET_THUMB2)
18193                             break;
18194                           if (emit)
18195                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18196                           return "";
18197                         case 4:
18198                           if (TARGET_THUMB2)
18199                             break;
18200                           if (emit)
18201                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18202                           return "";
18203                         }
18204                     }
18205                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18206                   operands[1] = otherops[0];
18207                   if (TARGET_LDRD
18208                       && (REG_P (otherops[2])
18209                           || TARGET_THUMB2
18210                           || (CONST_INT_P (otherops[2])
18211                               && INTVAL (otherops[2]) > -256
18212                               && INTVAL (otherops[2]) < 256)))
18213                     {
18214                       if (reg_overlap_mentioned_p (operands[0],
18215                                                    otherops[2]))
18216                         {
18217                           /* Swap base and index registers over to
18218                              avoid a conflict.  */
18219                           std::swap (otherops[1], otherops[2]);
18220                         }
18221                       /* If both registers conflict, it will usually
18222                          have been fixed by a splitter.  */
18223                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18224                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18225                         {
18226                           if (emit)
18227                             {
18228                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18229                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18230                             }
18231                           if (count)
18232                             *count = 2;
18233                         }
18234                       else
18235                         {
18236                           otherops[0] = operands[0];
18237                           if (emit)
18238                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18239                         }
18240                       return "";
18241                     }
18242
18243                   if (CONST_INT_P (otherops[2]))
18244                     {
18245                       if (emit)
18246                         {
18247                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18248                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18249                           else
18250                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18251                         }
18252                     }
18253                   else
18254                     {
18255                       if (emit)
18256                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18257                     }
18258                 }
18259               else
18260                 {
18261                   if (emit)
18262                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18263                 }
18264
18265               if (count)
18266                 *count = 2;
18267
18268               if (TARGET_LDRD)
18269                 return "ldrd%?\t%0, [%1]";
18270
18271               return "ldmia%?\t%1, %M0";
18272             }
18273           else
18274             {
18275               otherops[1] = adjust_address (operands[1], SImode, 4);
18276               /* Take care of overlapping base/data reg.  */
18277               if (reg_mentioned_p (operands[0], operands[1]))
18278                 {
18279                   if (emit)
18280                     {
18281                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18282                       output_asm_insn ("ldr%?\t%0, %1", operands);
18283                     }
18284                   if (count)
18285                     *count = 2;
18286
18287                 }
18288               else
18289                 {
18290                   if (emit)
18291                     {
18292                       output_asm_insn ("ldr%?\t%0, %1", operands);
18293                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18294                     }
18295                   if (count)
18296                     *count = 2;
18297                 }
18298             }
18299         }
18300     }
18301   else
18302     {
18303       /* Constraints should ensure this.  */
18304       gcc_assert (code0 == MEM && code1 == REG);
18305       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18306                   || (TARGET_ARM && TARGET_LDRD));
18307
18308       switch (GET_CODE (XEXP (operands[0], 0)))
18309         {
18310         case REG:
18311           if (emit)
18312             {
18313               if (TARGET_LDRD)
18314                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18315               else
18316                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18317             }
18318           break;
18319
18320         case PRE_INC:
18321           gcc_assert (TARGET_LDRD);
18322           if (emit)
18323             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18324           break;
18325
18326         case PRE_DEC:
18327           if (emit)
18328             {
18329               if (TARGET_LDRD)
18330                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18331               else
18332                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18333             }
18334           break;
18335
18336         case POST_INC:
18337           if (emit)
18338             {
18339               if (TARGET_LDRD)
18340                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18341               else
18342                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18343             }
18344           break;
18345
18346         case POST_DEC:
18347           gcc_assert (TARGET_LDRD);
18348           if (emit)
18349             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18350           break;
18351
18352         case PRE_MODIFY:
18353         case POST_MODIFY:
18354           otherops[0] = operands[1];
18355           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18356           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18357
18358           /* IWMMXT allows offsets larger than ldrd can handle,
18359              fix these up with a pair of ldr.  */
18360           if (!TARGET_THUMB2
18361               && CONST_INT_P (otherops[2])
18362               && (INTVAL(otherops[2]) <= -256
18363                   || INTVAL(otherops[2]) >= 256))
18364             {
18365               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18366                 {
18367                   if (emit)
18368                     {
18369                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18370                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18371                     }
18372                   if (count)
18373                     *count = 2;
18374                 }
18375               else
18376                 {
18377                   if (emit)
18378                     {
18379                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18380                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18381                     }
18382                   if (count)
18383                     *count = 2;
18384                 }
18385             }
18386           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18387             {
18388               if (emit)
18389                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18390             }
18391           else
18392             {
18393               if (emit)
18394                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18395             }
18396           break;
18397
18398         case PLUS:
18399           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18400           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18401             {
18402               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18403                 {
18404                 case -8:
18405                   if (emit)
18406                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18407                   return "";
18408
18409                 case -4:
18410                   if (TARGET_THUMB2)
18411                     break;
18412                   if (emit)
18413                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18414                   return "";
18415
18416                 case 4:
18417                   if (TARGET_THUMB2)
18418                     break;
18419                   if (emit)
18420                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18421                   return "";
18422                 }
18423             }
18424           if (TARGET_LDRD
18425               && (REG_P (otherops[2])
18426                   || TARGET_THUMB2
18427                   || (CONST_INT_P (otherops[2])
18428                       && INTVAL (otherops[2]) > -256
18429                       && INTVAL (otherops[2]) < 256)))
18430             {
18431               otherops[0] = operands[1];
18432               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18433               if (emit)
18434                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18435               return "";
18436             }
18437           /* Fall through */
18438
18439         default:
18440           otherops[0] = adjust_address (operands[0], SImode, 4);
18441           otherops[1] = operands[1];
18442           if (emit)
18443             {
18444               output_asm_insn ("str%?\t%1, %0", operands);
18445               output_asm_insn ("str%?\t%H1, %0", otherops);
18446             }
18447           if (count)
18448             *count = 2;
18449         }
18450     }
18451
18452   return "";
18453 }
18454
18455 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18456    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18457
18458 const char *
18459 output_move_quad (rtx *operands)
18460 {
18461   if (REG_P (operands[0]))
18462     {
18463       /* Load, or reg->reg move.  */
18464
18465       if (MEM_P (operands[1]))
18466         {
18467           switch (GET_CODE (XEXP (operands[1], 0)))
18468             {
18469             case REG:
18470               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18471               break;
18472
18473             case LABEL_REF:
18474             case CONST:
18475               output_asm_insn ("adr%?\t%0, %1", operands);
18476               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18477               break;
18478
18479             default:
18480               gcc_unreachable ();
18481             }
18482         }
18483       else
18484         {
18485           rtx ops[2];
18486           int dest, src, i;
18487
18488           gcc_assert (REG_P (operands[1]));
18489
18490           dest = REGNO (operands[0]);
18491           src = REGNO (operands[1]);
18492
18493           /* This seems pretty dumb, but hopefully GCC won't try to do it
18494              very often.  */
18495           if (dest < src)
18496             for (i = 0; i < 4; i++)
18497               {
18498                 ops[0] = gen_rtx_REG (SImode, dest + i);
18499                 ops[1] = gen_rtx_REG (SImode, src + i);
18500                 output_asm_insn ("mov%?\t%0, %1", ops);
18501               }
18502           else
18503             for (i = 3; i >= 0; i--)
18504               {
18505                 ops[0] = gen_rtx_REG (SImode, dest + i);
18506                 ops[1] = gen_rtx_REG (SImode, src + i);
18507                 output_asm_insn ("mov%?\t%0, %1", ops);
18508               }
18509         }
18510     }
18511   else
18512     {
18513       gcc_assert (MEM_P (operands[0]));
18514       gcc_assert (REG_P (operands[1]));
18515       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18516
18517       switch (GET_CODE (XEXP (operands[0], 0)))
18518         {
18519         case REG:
18520           output_asm_insn ("stm%?\t%m0, %M1", operands);
18521           break;
18522
18523         default:
18524           gcc_unreachable ();
18525         }
18526     }
18527
18528   return "";
18529 }
18530
18531 /* Output a VFP load or store instruction.  */
18532
18533 const char *
18534 output_move_vfp (rtx *operands)
18535 {
18536   rtx reg, mem, addr, ops[2];
18537   int load = REG_P (operands[0]);
18538   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18539   int sp = (!TARGET_VFP_FP16INST
18540             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18541   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18542   const char *templ;
18543   char buff[50];
18544   machine_mode mode;
18545
18546   reg = operands[!load];
18547   mem = operands[load];
18548
18549   mode = GET_MODE (reg);
18550
18551   gcc_assert (REG_P (reg));
18552   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18553   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18554               || mode == SFmode
18555               || mode == DFmode
18556               || mode == HImode
18557               || mode == SImode
18558               || mode == DImode
18559               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18560   gcc_assert (MEM_P (mem));
18561
18562   addr = XEXP (mem, 0);
18563
18564   switch (GET_CODE (addr))
18565     {
18566     case PRE_DEC:
18567       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18568       ops[0] = XEXP (addr, 0);
18569       ops[1] = reg;
18570       break;
18571
18572     case POST_INC:
18573       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18574       ops[0] = XEXP (addr, 0);
18575       ops[1] = reg;
18576       break;
18577
18578     default:
18579       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18580       ops[0] = reg;
18581       ops[1] = mem;
18582       break;
18583     }
18584
18585   sprintf (buff, templ,
18586            load ? "ld" : "st",
18587            dp ? "64" : sp ? "32" : "16",
18588            dp ? "P" : "",
18589            integer_p ? "\t%@ int" : "");
18590   output_asm_insn (buff, ops);
18591
18592   return "";
18593 }
18594
18595 /* Output a Neon double-word or quad-word load or store, or a load
18596    or store for larger structure modes.
18597
18598    WARNING: The ordering of elements is weird in big-endian mode,
18599    because the EABI requires that vectors stored in memory appear
18600    as though they were stored by a VSTM, as required by the EABI.
18601    GCC RTL defines element ordering based on in-memory order.
18602    This can be different from the architectural ordering of elements
18603    within a NEON register. The intrinsics defined in arm_neon.h use the
18604    NEON register element ordering, not the GCC RTL element ordering.
18605
18606    For example, the in-memory ordering of a big-endian a quadword
18607    vector with 16-bit elements when stored from register pair {d0,d1}
18608    will be (lowest address first, d0[N] is NEON register element N):
18609
18610      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18611
18612    When necessary, quadword registers (dN, dN+1) are moved to ARM
18613    registers from rN in the order:
18614
18615      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18616
18617    So that STM/LDM can be used on vectors in ARM registers, and the
18618    same memory layout will result as if VSTM/VLDM were used.
18619
18620    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18621    possible, which allows use of appropriate alignment tags.
18622    Note that the choice of "64" is independent of the actual vector
18623    element size; this size simply ensures that the behavior is
18624    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18625
18626    Due to limitations of those instructions, use of VST1.64/VLD1.64
18627    is not possible if:
18628     - the address contains PRE_DEC, or
18629     - the mode refers to more than 4 double-word registers
18630
18631    In those cases, it would be possible to replace VSTM/VLDM by a
18632    sequence of instructions; this is not currently implemented since
18633    this is not certain to actually improve performance.  */
18634
18635 const char *
18636 output_move_neon (rtx *operands)
18637 {
18638   rtx reg, mem, addr, ops[2];
18639   int regno, nregs, load = REG_P (operands[0]);
18640   const char *templ;
18641   char buff[50];
18642   machine_mode mode;
18643
18644   reg = operands[!load];
18645   mem = operands[load];
18646
18647   mode = GET_MODE (reg);
18648
18649   gcc_assert (REG_P (reg));
18650   regno = REGNO (reg);
18651   nregs = REG_NREGS (reg) / 2;
18652   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18653               || NEON_REGNO_OK_FOR_QUAD (regno));
18654   gcc_assert (VALID_NEON_DREG_MODE (mode)
18655               || VALID_NEON_QREG_MODE (mode)
18656               || VALID_NEON_STRUCT_MODE (mode));
18657   gcc_assert (MEM_P (mem));
18658
18659   addr = XEXP (mem, 0);
18660
18661   /* Strip off const from addresses like (const (plus (...))).  */
18662   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18663     addr = XEXP (addr, 0);
18664
18665   switch (GET_CODE (addr))
18666     {
18667     case POST_INC:
18668       /* We have to use vldm / vstm for too-large modes.  */
18669       if (nregs > 4)
18670         {
18671           templ = "v%smia%%?\t%%0!, %%h1";
18672           ops[0] = XEXP (addr, 0);
18673         }
18674       else
18675         {
18676           templ = "v%s1.64\t%%h1, %%A0";
18677           ops[0] = mem;
18678         }
18679       ops[1] = reg;
18680       break;
18681
18682     case PRE_DEC:
18683       /* We have to use vldm / vstm in this case, since there is no
18684          pre-decrement form of the vld1 / vst1 instructions.  */
18685       templ = "v%smdb%%?\t%%0!, %%h1";
18686       ops[0] = XEXP (addr, 0);
18687       ops[1] = reg;
18688       break;
18689
18690     case POST_MODIFY:
18691       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18692       gcc_unreachable ();
18693
18694     case REG:
18695       /* We have to use vldm / vstm for too-large modes.  */
18696       if (nregs > 1)
18697         {
18698           if (nregs > 4)
18699             templ = "v%smia%%?\t%%m0, %%h1";
18700           else
18701             templ = "v%s1.64\t%%h1, %%A0";
18702
18703           ops[0] = mem;
18704           ops[1] = reg;
18705           break;
18706         }
18707       /* Fall through.  */
18708     case LABEL_REF:
18709     case PLUS:
18710       {
18711         int i;
18712         int overlap = -1;
18713         for (i = 0; i < nregs; i++)
18714           {
18715             /* We're only using DImode here because it's a convenient size.  */
18716             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18717             ops[1] = adjust_address (mem, DImode, 8 * i);
18718             if (reg_overlap_mentioned_p (ops[0], mem))
18719               {
18720                 gcc_assert (overlap == -1);
18721                 overlap = i;
18722               }
18723             else
18724               {
18725                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18726                 output_asm_insn (buff, ops);
18727               }
18728           }
18729         if (overlap != -1)
18730           {
18731             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18732             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18733             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18734             output_asm_insn (buff, ops);
18735           }
18736
18737         return "";
18738       }
18739
18740     default:
18741       gcc_unreachable ();
18742     }
18743
18744   sprintf (buff, templ, load ? "ld" : "st");
18745   output_asm_insn (buff, ops);
18746
18747   return "";
18748 }
18749
18750 /* Compute and return the length of neon_mov<mode>, where <mode> is
18751    one of VSTRUCT modes: EI, OI, CI or XI.  */
18752 int
18753 arm_attr_length_move_neon (rtx_insn *insn)
18754 {
18755   rtx reg, mem, addr;
18756   int load;
18757   machine_mode mode;
18758
18759   extract_insn_cached (insn);
18760
18761   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18762     {
18763       mode = GET_MODE (recog_data.operand[0]);
18764       switch (mode)
18765         {
18766         case E_EImode:
18767         case E_OImode:
18768           return 8;
18769         case E_CImode:
18770           return 12;
18771         case E_XImode:
18772           return 16;
18773         default:
18774           gcc_unreachable ();
18775         }
18776     }
18777
18778   load = REG_P (recog_data.operand[0]);
18779   reg = recog_data.operand[!load];
18780   mem = recog_data.operand[load];
18781
18782   gcc_assert (MEM_P (mem));
18783
18784   addr = XEXP (mem, 0);
18785
18786   /* Strip off const from addresses like (const (plus (...))).  */
18787   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18788     addr = XEXP (addr, 0);
18789
18790   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18791     {
18792       int insns = REG_NREGS (reg) / 2;
18793       return insns * 4;
18794     }
18795   else
18796     return 4;
18797 }
18798
18799 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18800    return zero.  */
18801
18802 int
18803 arm_address_offset_is_imm (rtx_insn *insn)
18804 {
18805   rtx mem, addr;
18806
18807   extract_insn_cached (insn);
18808
18809   if (REG_P (recog_data.operand[0]))
18810     return 0;
18811
18812   mem = recog_data.operand[0];
18813
18814   gcc_assert (MEM_P (mem));
18815
18816   addr = XEXP (mem, 0);
18817
18818   if (REG_P (addr)
18819       || (GET_CODE (addr) == PLUS
18820           && REG_P (XEXP (addr, 0))
18821           && CONST_INT_P (XEXP (addr, 1))))
18822     return 1;
18823   else
18824     return 0;
18825 }
18826
18827 /* Output an ADD r, s, #n where n may be too big for one instruction.
18828    If adding zero to one register, output nothing.  */
18829 const char *
18830 output_add_immediate (rtx *operands)
18831 {
18832   HOST_WIDE_INT n = INTVAL (operands[2]);
18833
18834   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18835     {
18836       if (n < 0)
18837         output_multi_immediate (operands,
18838                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18839                                 -n);
18840       else
18841         output_multi_immediate (operands,
18842                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18843                                 n);
18844     }
18845
18846   return "";
18847 }
18848
18849 /* Output a multiple immediate operation.
18850    OPERANDS is the vector of operands referred to in the output patterns.
18851    INSTR1 is the output pattern to use for the first constant.
18852    INSTR2 is the output pattern to use for subsequent constants.
18853    IMMED_OP is the index of the constant slot in OPERANDS.
18854    N is the constant value.  */
18855 static const char *
18856 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18857                         int immed_op, HOST_WIDE_INT n)
18858 {
18859 #if HOST_BITS_PER_WIDE_INT > 32
18860   n &= 0xffffffff;
18861 #endif
18862
18863   if (n == 0)
18864     {
18865       /* Quick and easy output.  */
18866       operands[immed_op] = const0_rtx;
18867       output_asm_insn (instr1, operands);
18868     }
18869   else
18870     {
18871       int i;
18872       const char * instr = instr1;
18873
18874       /* Note that n is never zero here (which would give no output).  */
18875       for (i = 0; i < 32; i += 2)
18876         {
18877           if (n & (3 << i))
18878             {
18879               operands[immed_op] = GEN_INT (n & (255 << i));
18880               output_asm_insn (instr, operands);
18881               instr = instr2;
18882               i += 6;
18883             }
18884         }
18885     }
18886
18887   return "";
18888 }
18889
18890 /* Return the name of a shifter operation.  */
18891 static const char *
18892 arm_shift_nmem(enum rtx_code code)
18893 {
18894   switch (code)
18895     {
18896     case ASHIFT:
18897       return ARM_LSL_NAME;
18898
18899     case ASHIFTRT:
18900       return "asr";
18901
18902     case LSHIFTRT:
18903       return "lsr";
18904
18905     case ROTATERT:
18906       return "ror";
18907
18908     default:
18909       abort();
18910     }
18911 }
18912
18913 /* Return the appropriate ARM instruction for the operation code.
18914    The returned result should not be overwritten.  OP is the rtx of the
18915    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18916    was shifted.  */
18917 const char *
18918 arithmetic_instr (rtx op, int shift_first_arg)
18919 {
18920   switch (GET_CODE (op))
18921     {
18922     case PLUS:
18923       return "add";
18924
18925     case MINUS:
18926       return shift_first_arg ? "rsb" : "sub";
18927
18928     case IOR:
18929       return "orr";
18930
18931     case XOR:
18932       return "eor";
18933
18934     case AND:
18935       return "and";
18936
18937     case ASHIFT:
18938     case ASHIFTRT:
18939     case LSHIFTRT:
18940     case ROTATERT:
18941       return arm_shift_nmem(GET_CODE(op));
18942
18943     default:
18944       gcc_unreachable ();
18945     }
18946 }
18947
18948 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18949    for the operation code.  The returned result should not be overwritten.
18950    OP is the rtx code of the shift.
18951    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18952    shift.  */
18953 static const char *
18954 shift_op (rtx op, HOST_WIDE_INT *amountp)
18955 {
18956   const char * mnem;
18957   enum rtx_code code = GET_CODE (op);
18958
18959   switch (code)
18960     {
18961     case ROTATE:
18962       if (!CONST_INT_P (XEXP (op, 1)))
18963         {
18964           output_operand_lossage ("invalid shift operand");
18965           return NULL;
18966         }
18967
18968       code = ROTATERT;
18969       *amountp = 32 - INTVAL (XEXP (op, 1));
18970       mnem = "ror";
18971       break;
18972
18973     case ASHIFT:
18974     case ASHIFTRT:
18975     case LSHIFTRT:
18976     case ROTATERT:
18977       mnem = arm_shift_nmem(code);
18978       if (CONST_INT_P (XEXP (op, 1)))
18979         {
18980           *amountp = INTVAL (XEXP (op, 1));
18981         }
18982       else if (REG_P (XEXP (op, 1)))
18983         {
18984           *amountp = -1;
18985           return mnem;
18986         }
18987       else
18988         {
18989           output_operand_lossage ("invalid shift operand");
18990           return NULL;
18991         }
18992       break;
18993
18994     case MULT:
18995       /* We never have to worry about the amount being other than a
18996          power of 2, since this case can never be reloaded from a reg.  */
18997       if (!CONST_INT_P (XEXP (op, 1)))
18998         {
18999           output_operand_lossage ("invalid shift operand");
19000           return NULL;
19001         }
19002
19003       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19004
19005       /* Amount must be a power of two.  */
19006       if (*amountp & (*amountp - 1))
19007         {
19008           output_operand_lossage ("invalid shift operand");
19009           return NULL;
19010         }
19011
19012       *amountp = exact_log2 (*amountp);
19013       gcc_assert (IN_RANGE (*amountp, 0, 31));
19014       return ARM_LSL_NAME;
19015
19016     default:
19017       output_operand_lossage ("invalid shift operand");
19018       return NULL;
19019     }
19020
19021   /* This is not 100% correct, but follows from the desire to merge
19022      multiplication by a power of 2 with the recognizer for a
19023      shift.  >=32 is not a valid shift for "lsl", so we must try and
19024      output a shift that produces the correct arithmetical result.
19025      Using lsr #32 is identical except for the fact that the carry bit
19026      is not set correctly if we set the flags; but we never use the
19027      carry bit from such an operation, so we can ignore that.  */
19028   if (code == ROTATERT)
19029     /* Rotate is just modulo 32.  */
19030     *amountp &= 31;
19031   else if (*amountp != (*amountp & 31))
19032     {
19033       if (code == ASHIFT)
19034         mnem = "lsr";
19035       *amountp = 32;
19036     }
19037
19038   /* Shifts of 0 are no-ops.  */
19039   if (*amountp == 0)
19040     return NULL;
19041
19042   return mnem;
19043 }
19044
19045 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19046    because /bin/as is horribly restrictive.  The judgement about
19047    whether or not each character is 'printable' (and can be output as
19048    is) or not (and must be printed with an octal escape) must be made
19049    with reference to the *host* character set -- the situation is
19050    similar to that discussed in the comments above pp_c_char in
19051    c-pretty-print.c.  */
19052
19053 #define MAX_ASCII_LEN 51
19054
19055 void
19056 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19057 {
19058   int i;
19059   int len_so_far = 0;
19060
19061   fputs ("\t.ascii\t\"", stream);
19062
19063   for (i = 0; i < len; i++)
19064     {
19065       int c = p[i];
19066
19067       if (len_so_far >= MAX_ASCII_LEN)
19068         {
19069           fputs ("\"\n\t.ascii\t\"", stream);
19070           len_so_far = 0;
19071         }
19072
19073       if (ISPRINT (c))
19074         {
19075           if (c == '\\' || c == '\"')
19076             {
19077               putc ('\\', stream);
19078               len_so_far++;
19079             }
19080           putc (c, stream);
19081           len_so_far++;
19082         }
19083       else
19084         {
19085           fprintf (stream, "\\%03o", c);
19086           len_so_far += 4;
19087         }
19088     }
19089
19090   fputs ("\"\n", stream);
19091 }
19092 \f
19093 /* Whether a register is callee saved or not.  This is necessary because high
19094    registers are marked as caller saved when optimizing for size on Thumb-1
19095    targets despite being callee saved in order to avoid using them.  */
19096 #define callee_saved_reg_p(reg) \
19097   (!call_used_regs[reg] \
19098    || (TARGET_THUMB1 && optimize_size \
19099        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19100
19101 /* Compute the register save mask for registers 0 through 12
19102    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19103
19104 static unsigned long
19105 arm_compute_save_reg0_reg12_mask (void)
19106 {
19107   unsigned long func_type = arm_current_func_type ();
19108   unsigned long save_reg_mask = 0;
19109   unsigned int reg;
19110
19111   if (IS_INTERRUPT (func_type))
19112     {
19113       unsigned int max_reg;
19114       /* Interrupt functions must not corrupt any registers,
19115          even call clobbered ones.  If this is a leaf function
19116          we can just examine the registers used by the RTL, but
19117          otherwise we have to assume that whatever function is
19118          called might clobber anything, and so we have to save
19119          all the call-clobbered registers as well.  */
19120       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19121         /* FIQ handlers have registers r8 - r12 banked, so
19122            we only need to check r0 - r7, Normal ISRs only
19123            bank r14 and r15, so we must check up to r12.
19124            r13 is the stack pointer which is always preserved,
19125            so we do not need to consider it here.  */
19126         max_reg = 7;
19127       else
19128         max_reg = 12;
19129
19130       for (reg = 0; reg <= max_reg; reg++)
19131         if (df_regs_ever_live_p (reg)
19132             || (! crtl->is_leaf && call_used_regs[reg]))
19133           save_reg_mask |= (1 << reg);
19134
19135       /* Also save the pic base register if necessary.  */
19136       if (flag_pic
19137           && !TARGET_SINGLE_PIC_BASE
19138           && arm_pic_register != INVALID_REGNUM
19139           && crtl->uses_pic_offset_table)
19140         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19141     }
19142   else if (IS_VOLATILE(func_type))
19143     {
19144       /* For noreturn functions we historically omitted register saves
19145          altogether.  However this really messes up debugging.  As a
19146          compromise save just the frame pointers.  Combined with the link
19147          register saved elsewhere this should be sufficient to get
19148          a backtrace.  */
19149       if (frame_pointer_needed)
19150         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19151       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19152         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19153       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19154         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19155     }
19156   else
19157     {
19158       /* In the normal case we only need to save those registers
19159          which are call saved and which are used by this function.  */
19160       for (reg = 0; reg <= 11; reg++)
19161         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19162           save_reg_mask |= (1 << reg);
19163
19164       /* Handle the frame pointer as a special case.  */
19165       if (frame_pointer_needed)
19166         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19167
19168       /* If we aren't loading the PIC register,
19169          don't stack it even though it may be live.  */
19170       if (flag_pic
19171           && !TARGET_SINGLE_PIC_BASE
19172           && arm_pic_register != INVALID_REGNUM
19173           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19174               || crtl->uses_pic_offset_table))
19175         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19176
19177       /* The prologue will copy SP into R0, so save it.  */
19178       if (IS_STACKALIGN (func_type))
19179         save_reg_mask |= 1;
19180     }
19181
19182   /* Save registers so the exception handler can modify them.  */
19183   if (crtl->calls_eh_return)
19184     {
19185       unsigned int i;
19186
19187       for (i = 0; ; i++)
19188         {
19189           reg = EH_RETURN_DATA_REGNO (i);
19190           if (reg == INVALID_REGNUM)
19191             break;
19192           save_reg_mask |= 1 << reg;
19193         }
19194     }
19195
19196   return save_reg_mask;
19197 }
19198
19199 /* Return true if r3 is live at the start of the function.  */
19200
19201 static bool
19202 arm_r3_live_at_start_p (void)
19203 {
19204   /* Just look at cfg info, which is still close enough to correct at this
19205      point.  This gives false positives for broken functions that might use
19206      uninitialized data that happens to be allocated in r3, but who cares?  */
19207   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19208 }
19209
19210 /* Compute the number of bytes used to store the static chain register on the
19211    stack, above the stack frame.  We need to know this accurately to get the
19212    alignment of the rest of the stack frame correct.  */
19213
19214 static int
19215 arm_compute_static_chain_stack_bytes (void)
19216 {
19217   /* See the defining assertion in arm_expand_prologue.  */
19218   if (IS_NESTED (arm_current_func_type ())
19219       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19220           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19221                || flag_stack_clash_protection)
19222               && !df_regs_ever_live_p (LR_REGNUM)))
19223       && arm_r3_live_at_start_p ()
19224       && crtl->args.pretend_args_size == 0)
19225     return 4;
19226
19227   return 0;
19228 }
19229
19230 /* Compute a bit mask of which core registers need to be
19231    saved on the stack for the current function.
19232    This is used by arm_compute_frame_layout, which may add extra registers.  */
19233
19234 static unsigned long
19235 arm_compute_save_core_reg_mask (void)
19236 {
19237   unsigned int save_reg_mask = 0;
19238   unsigned long func_type = arm_current_func_type ();
19239   unsigned int reg;
19240
19241   if (IS_NAKED (func_type))
19242     /* This should never really happen.  */
19243     return 0;
19244
19245   /* If we are creating a stack frame, then we must save the frame pointer,
19246      IP (which will hold the old stack pointer), LR and the PC.  */
19247   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19248     save_reg_mask |=
19249       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19250       | (1 << IP_REGNUM)
19251       | (1 << LR_REGNUM)
19252       | (1 << PC_REGNUM);
19253
19254   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19255
19256   /* Decide if we need to save the link register.
19257      Interrupt routines have their own banked link register,
19258      so they never need to save it.
19259      Otherwise if we do not use the link register we do not need to save
19260      it.  If we are pushing other registers onto the stack however, we
19261      can save an instruction in the epilogue by pushing the link register
19262      now and then popping it back into the PC.  This incurs extra memory
19263      accesses though, so we only do it when optimizing for size, and only
19264      if we know that we will not need a fancy return sequence.  */
19265   if (df_regs_ever_live_p (LR_REGNUM)
19266       || (save_reg_mask
19267           && optimize_size
19268           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19269           && !crtl->tail_call_emit
19270           && !crtl->calls_eh_return))
19271     save_reg_mask |= 1 << LR_REGNUM;
19272
19273   if (cfun->machine->lr_save_eliminated)
19274     save_reg_mask &= ~ (1 << LR_REGNUM);
19275
19276   if (TARGET_REALLY_IWMMXT
19277       && ((bit_count (save_reg_mask)
19278            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19279                            arm_compute_static_chain_stack_bytes())
19280            ) % 2) != 0)
19281     {
19282       /* The total number of registers that are going to be pushed
19283          onto the stack is odd.  We need to ensure that the stack
19284          is 64-bit aligned before we start to save iWMMXt registers,
19285          and also before we start to create locals.  (A local variable
19286          might be a double or long long which we will load/store using
19287          an iWMMXt instruction).  Therefore we need to push another
19288          ARM register, so that the stack will be 64-bit aligned.  We
19289          try to avoid using the arg registers (r0 -r3) as they might be
19290          used to pass values in a tail call.  */
19291       for (reg = 4; reg <= 12; reg++)
19292         if ((save_reg_mask & (1 << reg)) == 0)
19293           break;
19294
19295       if (reg <= 12)
19296         save_reg_mask |= (1 << reg);
19297       else
19298         {
19299           cfun->machine->sibcall_blocked = 1;
19300           save_reg_mask |= (1 << 3);
19301         }
19302     }
19303
19304   /* We may need to push an additional register for use initializing the
19305      PIC base register.  */
19306   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19307       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19308     {
19309       reg = thumb_find_work_register (1 << 4);
19310       if (!call_used_regs[reg])
19311         save_reg_mask |= (1 << reg);
19312     }
19313
19314   return save_reg_mask;
19315 }
19316
19317 /* Compute a bit mask of which core registers need to be
19318    saved on the stack for the current function.  */
19319 static unsigned long
19320 thumb1_compute_save_core_reg_mask (void)
19321 {
19322   unsigned long mask;
19323   unsigned reg;
19324
19325   mask = 0;
19326   for (reg = 0; reg < 12; reg ++)
19327     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19328       mask |= 1 << reg;
19329
19330   /* Handle the frame pointer as a special case.  */
19331   if (frame_pointer_needed)
19332     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19333
19334   if (flag_pic
19335       && !TARGET_SINGLE_PIC_BASE
19336       && arm_pic_register != INVALID_REGNUM
19337       && crtl->uses_pic_offset_table)
19338     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19339
19340   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19341   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19342     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19343
19344   /* LR will also be pushed if any lo regs are pushed.  */
19345   if (mask & 0xff || thumb_force_lr_save ())
19346     mask |= (1 << LR_REGNUM);
19347
19348   /* Make sure we have a low work register if we need one.
19349      We will need one if we are going to push a high register,
19350      but we are not currently intending to push a low register.  */
19351   if ((mask & 0xff) == 0
19352       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19353     {
19354       /* Use thumb_find_work_register to choose which register
19355          we will use.  If the register is live then we will
19356          have to push it.  Use LAST_LO_REGNUM as our fallback
19357          choice for the register to select.  */
19358       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19359       /* Make sure the register returned by thumb_find_work_register is
19360          not part of the return value.  */
19361       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19362         reg = LAST_LO_REGNUM;
19363
19364       if (callee_saved_reg_p (reg))
19365         mask |= 1 << reg;
19366     }
19367
19368   /* The 504 below is 8 bytes less than 512 because there are two possible
19369      alignment words.  We can't tell here if they will be present or not so we
19370      have to play it safe and assume that they are. */
19371   if ((CALLER_INTERWORKING_SLOT_SIZE +
19372        ROUND_UP_WORD (get_frame_size ()) +
19373        crtl->outgoing_args_size) >= 504)
19374     {
19375       /* This is the same as the code in thumb1_expand_prologue() which
19376          determines which register to use for stack decrement. */
19377       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19378         if (mask & (1 << reg))
19379           break;
19380
19381       if (reg > LAST_LO_REGNUM)
19382         {
19383           /* Make sure we have a register available for stack decrement. */
19384           mask |= 1 << LAST_LO_REGNUM;
19385         }
19386     }
19387
19388   return mask;
19389 }
19390
19391
19392 /* Return the number of bytes required to save VFP registers.  */
19393 static int
19394 arm_get_vfp_saved_size (void)
19395 {
19396   unsigned int regno;
19397   int count;
19398   int saved;
19399
19400   saved = 0;
19401   /* Space for saved VFP registers.  */
19402   if (TARGET_HARD_FLOAT)
19403     {
19404       count = 0;
19405       for (regno = FIRST_VFP_REGNUM;
19406            regno < LAST_VFP_REGNUM;
19407            regno += 2)
19408         {
19409           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19410               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19411             {
19412               if (count > 0)
19413                 {
19414                   /* Workaround ARM10 VFPr1 bug.  */
19415                   if (count == 2 && !arm_arch6)
19416                     count++;
19417                   saved += count * 8;
19418                 }
19419               count = 0;
19420             }
19421           else
19422             count++;
19423         }
19424       if (count > 0)
19425         {
19426           if (count == 2 && !arm_arch6)
19427             count++;
19428           saved += count * 8;
19429         }
19430     }
19431   return saved;
19432 }
19433
19434
19435 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19436    everything bar the final return instruction.  If simple_return is true,
19437    then do not output epilogue, because it has already been emitted in RTL.
19438
19439    Note: do not forget to update length attribute of corresponding insn pattern
19440    when changing assembly output (eg. length attribute of
19441    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19442    register clearing sequences).  */
19443 const char *
19444 output_return_instruction (rtx operand, bool really_return, bool reverse,
19445                            bool simple_return)
19446 {
19447   char conditional[10];
19448   char instr[100];
19449   unsigned reg;
19450   unsigned long live_regs_mask;
19451   unsigned long func_type;
19452   arm_stack_offsets *offsets;
19453
19454   func_type = arm_current_func_type ();
19455
19456   if (IS_NAKED (func_type))
19457     return "";
19458
19459   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19460     {
19461       /* If this function was declared non-returning, and we have
19462          found a tail call, then we have to trust that the called
19463          function won't return.  */
19464       if (really_return)
19465         {
19466           rtx ops[2];
19467
19468           /* Otherwise, trap an attempted return by aborting.  */
19469           ops[0] = operand;
19470           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19471                                        : "abort");
19472           assemble_external_libcall (ops[1]);
19473           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19474         }
19475
19476       return "";
19477     }
19478
19479   gcc_assert (!cfun->calls_alloca || really_return);
19480
19481   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19482
19483   cfun->machine->return_used_this_function = 1;
19484
19485   offsets = arm_get_frame_offsets ();
19486   live_regs_mask = offsets->saved_regs_mask;
19487
19488   if (!simple_return && live_regs_mask)
19489     {
19490       const char * return_reg;
19491
19492       /* If we do not have any special requirements for function exit
19493          (e.g. interworking) then we can load the return address
19494          directly into the PC.  Otherwise we must load it into LR.  */
19495       if (really_return
19496           && !IS_CMSE_ENTRY (func_type)
19497           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19498         return_reg = reg_names[PC_REGNUM];
19499       else
19500         return_reg = reg_names[LR_REGNUM];
19501
19502       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19503         {
19504           /* There are three possible reasons for the IP register
19505              being saved.  1) a stack frame was created, in which case
19506              IP contains the old stack pointer, or 2) an ISR routine
19507              corrupted it, or 3) it was saved to align the stack on
19508              iWMMXt.  In case 1, restore IP into SP, otherwise just
19509              restore IP.  */
19510           if (frame_pointer_needed)
19511             {
19512               live_regs_mask &= ~ (1 << IP_REGNUM);
19513               live_regs_mask |=   (1 << SP_REGNUM);
19514             }
19515           else
19516             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19517         }
19518
19519       /* On some ARM architectures it is faster to use LDR rather than
19520          LDM to load a single register.  On other architectures, the
19521          cost is the same.  In 26 bit mode, or for exception handlers,
19522          we have to use LDM to load the PC so that the CPSR is also
19523          restored.  */
19524       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19525         if (live_regs_mask == (1U << reg))
19526           break;
19527
19528       if (reg <= LAST_ARM_REGNUM
19529           && (reg != LR_REGNUM
19530               || ! really_return
19531               || ! IS_INTERRUPT (func_type)))
19532         {
19533           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19534                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19535         }
19536       else
19537         {
19538           char *p;
19539           int first = 1;
19540
19541           /* Generate the load multiple instruction to restore the
19542              registers.  Note we can get here, even if
19543              frame_pointer_needed is true, but only if sp already
19544              points to the base of the saved core registers.  */
19545           if (live_regs_mask & (1 << SP_REGNUM))
19546             {
19547               unsigned HOST_WIDE_INT stack_adjust;
19548
19549               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19550               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19551
19552               if (stack_adjust && arm_arch5 && TARGET_ARM)
19553                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19554               else
19555                 {
19556                   /* If we can't use ldmib (SA110 bug),
19557                      then try to pop r3 instead.  */
19558                   if (stack_adjust)
19559                     live_regs_mask |= 1 << 3;
19560
19561                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19562                 }
19563             }
19564           /* For interrupt returns we have to use an LDM rather than
19565              a POP so that we can use the exception return variant.  */
19566           else if (IS_INTERRUPT (func_type))
19567             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19568           else
19569             sprintf (instr, "pop%s\t{", conditional);
19570
19571           p = instr + strlen (instr);
19572
19573           for (reg = 0; reg <= SP_REGNUM; reg++)
19574             if (live_regs_mask & (1 << reg))
19575               {
19576                 int l = strlen (reg_names[reg]);
19577
19578                 if (first)
19579                   first = 0;
19580                 else
19581                   {
19582                     memcpy (p, ", ", 2);
19583                     p += 2;
19584                   }
19585
19586                 memcpy (p, "%|", 2);
19587                 memcpy (p + 2, reg_names[reg], l);
19588                 p += l + 2;
19589               }
19590
19591           if (live_regs_mask & (1 << LR_REGNUM))
19592             {
19593               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19594               /* If returning from an interrupt, restore the CPSR.  */
19595               if (IS_INTERRUPT (func_type))
19596                 strcat (p, "^");
19597             }
19598           else
19599             strcpy (p, "}");
19600         }
19601
19602       output_asm_insn (instr, & operand);
19603
19604       /* See if we need to generate an extra instruction to
19605          perform the actual function return.  */
19606       if (really_return
19607           && func_type != ARM_FT_INTERWORKED
19608           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19609         {
19610           /* The return has already been handled
19611              by loading the LR into the PC.  */
19612           return "";
19613         }
19614     }
19615
19616   if (really_return)
19617     {
19618       switch ((int) ARM_FUNC_TYPE (func_type))
19619         {
19620         case ARM_FT_ISR:
19621         case ARM_FT_FIQ:
19622           /* ??? This is wrong for unified assembly syntax.  */
19623           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19624           break;
19625
19626         case ARM_FT_INTERWORKED:
19627           gcc_assert (arm_arch5 || arm_arch4t);
19628           sprintf (instr, "bx%s\t%%|lr", conditional);
19629           break;
19630
19631         case ARM_FT_EXCEPTION:
19632           /* ??? This is wrong for unified assembly syntax.  */
19633           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19634           break;
19635
19636         default:
19637           if (IS_CMSE_ENTRY (func_type))
19638             {
19639               /* Check if we have to clear the 'GE bits' which is only used if
19640                  parallel add and subtraction instructions are available.  */
19641               if (TARGET_INT_SIMD)
19642                 snprintf (instr, sizeof (instr),
19643                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19644               else
19645                 snprintf (instr, sizeof (instr),
19646                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19647
19648               output_asm_insn (instr, & operand);
19649               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19650                 {
19651                   /* Clear the cumulative exception-status bits (0-4,7) and the
19652                      condition code bits (28-31) of the FPSCR.  We need to
19653                      remember to clear the first scratch register used (IP) and
19654                      save and restore the second (r4).  */
19655                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19656                   output_asm_insn (instr, & operand);
19657                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19658                   output_asm_insn (instr, & operand);
19659                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19660                   output_asm_insn (instr, & operand);
19661                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19662                   output_asm_insn (instr, & operand);
19663                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19664                   output_asm_insn (instr, & operand);
19665                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19666                   output_asm_insn (instr, & operand);
19667                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19668                   output_asm_insn (instr, & operand);
19669                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19670                   output_asm_insn (instr, & operand);
19671                 }
19672               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19673             }
19674           /* Use bx if it's available.  */
19675           else if (arm_arch5 || arm_arch4t)
19676             sprintf (instr, "bx%s\t%%|lr", conditional);
19677           else
19678             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19679           break;
19680         }
19681
19682       output_asm_insn (instr, & operand);
19683     }
19684
19685   return "";
19686 }
19687
19688 /* Output in FILE asm statements needed to declare the NAME of the function
19689    defined by its DECL node.  */
19690
19691 void
19692 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19693 {
19694   size_t cmse_name_len;
19695   char *cmse_name = 0;
19696   char cmse_prefix[] = "__acle_se_";
19697
19698   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19699      extra function label for each function with the 'cmse_nonsecure_entry'
19700      attribute.  This extra function label should be prepended with
19701      '__acle_se_', telling the linker that it needs to create secure gateway
19702      veneers for this function.  */
19703   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19704                                     DECL_ATTRIBUTES (decl)))
19705     {
19706       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19707       cmse_name = XALLOCAVEC (char, cmse_name_len);
19708       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19709       targetm.asm_out.globalize_label (file, cmse_name);
19710
19711       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19712       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19713     }
19714
19715   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19716   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19717   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19718   ASM_OUTPUT_LABEL (file, name);
19719
19720   if (cmse_name)
19721     ASM_OUTPUT_LABEL (file, cmse_name);
19722
19723   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19724 }
19725
19726 /* Write the function name into the code section, directly preceding
19727    the function prologue.
19728
19729    Code will be output similar to this:
19730      t0
19731          .ascii "arm_poke_function_name", 0
19732          .align
19733      t1
19734          .word 0xff000000 + (t1 - t0)
19735      arm_poke_function_name
19736          mov     ip, sp
19737          stmfd   sp!, {fp, ip, lr, pc}
19738          sub     fp, ip, #4
19739
19740    When performing a stack backtrace, code can inspect the value
19741    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19742    at location pc - 12 and the top 8 bits are set, then we know
19743    that there is a function name embedded immediately preceding this
19744    location and has length ((pc[-3]) & 0xff000000).
19745
19746    We assume that pc is declared as a pointer to an unsigned long.
19747
19748    It is of no benefit to output the function name if we are assembling
19749    a leaf function.  These function types will not contain a stack
19750    backtrace structure, therefore it is not possible to determine the
19751    function name.  */
19752 void
19753 arm_poke_function_name (FILE *stream, const char *name)
19754 {
19755   unsigned long alignlength;
19756   unsigned long length;
19757   rtx           x;
19758
19759   length      = strlen (name) + 1;
19760   alignlength = ROUND_UP_WORD (length);
19761
19762   ASM_OUTPUT_ASCII (stream, name, length);
19763   ASM_OUTPUT_ALIGN (stream, 2);
19764   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19765   assemble_aligned_integer (UNITS_PER_WORD, x);
19766 }
19767
19768 /* Place some comments into the assembler stream
19769    describing the current function.  */
19770 static void
19771 arm_output_function_prologue (FILE *f)
19772 {
19773   unsigned long func_type;
19774
19775   /* Sanity check.  */
19776   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19777
19778   func_type = arm_current_func_type ();
19779
19780   switch ((int) ARM_FUNC_TYPE (func_type))
19781     {
19782     default:
19783     case ARM_FT_NORMAL:
19784       break;
19785     case ARM_FT_INTERWORKED:
19786       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19787       break;
19788     case ARM_FT_ISR:
19789       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19790       break;
19791     case ARM_FT_FIQ:
19792       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19793       break;
19794     case ARM_FT_EXCEPTION:
19795       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19796       break;
19797     }
19798
19799   if (IS_NAKED (func_type))
19800     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19801
19802   if (IS_VOLATILE (func_type))
19803     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19804
19805   if (IS_NESTED (func_type))
19806     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19807   if (IS_STACKALIGN (func_type))
19808     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19809   if (IS_CMSE_ENTRY (func_type))
19810     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19811
19812   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19813                crtl->args.size,
19814                crtl->args.pretend_args_size,
19815                (HOST_WIDE_INT) get_frame_size ());
19816
19817   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19818                frame_pointer_needed,
19819                cfun->machine->uses_anonymous_args);
19820
19821   if (cfun->machine->lr_save_eliminated)
19822     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19823
19824   if (crtl->calls_eh_return)
19825     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19826
19827 }
19828
19829 static void
19830 arm_output_function_epilogue (FILE *)
19831 {
19832   arm_stack_offsets *offsets;
19833
19834   if (TARGET_THUMB1)
19835     {
19836       int regno;
19837
19838       /* Emit any call-via-reg trampolines that are needed for v4t support
19839          of call_reg and call_value_reg type insns.  */
19840       for (regno = 0; regno < LR_REGNUM; regno++)
19841         {
19842           rtx label = cfun->machine->call_via[regno];
19843
19844           if (label != NULL)
19845             {
19846               switch_to_section (function_section (current_function_decl));
19847               targetm.asm_out.internal_label (asm_out_file, "L",
19848                                               CODE_LABEL_NUMBER (label));
19849               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19850             }
19851         }
19852
19853       /* ??? Probably not safe to set this here, since it assumes that a
19854          function will be emitted as assembly immediately after we generate
19855          RTL for it.  This does not happen for inline functions.  */
19856       cfun->machine->return_used_this_function = 0;
19857     }
19858   else /* TARGET_32BIT */
19859     {
19860       /* We need to take into account any stack-frame rounding.  */
19861       offsets = arm_get_frame_offsets ();
19862
19863       gcc_assert (!use_return_insn (FALSE, NULL)
19864                   || (cfun->machine->return_used_this_function != 0)
19865                   || offsets->saved_regs == offsets->outgoing_args
19866                   || frame_pointer_needed);
19867     }
19868 }
19869
19870 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19871    STR and STRD.  If an even number of registers are being pushed, one
19872    or more STRD patterns are created for each register pair.  If an
19873    odd number of registers are pushed, emit an initial STR followed by
19874    as many STRD instructions as are needed.  This works best when the
19875    stack is initially 64-bit aligned (the normal case), since it
19876    ensures that each STRD is also 64-bit aligned.  */
19877 static void
19878 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19879 {
19880   int num_regs = 0;
19881   int i;
19882   int regno;
19883   rtx par = NULL_RTX;
19884   rtx dwarf = NULL_RTX;
19885   rtx tmp;
19886   bool first = true;
19887
19888   num_regs = bit_count (saved_regs_mask);
19889
19890   /* Must be at least one register to save, and can't save SP or PC.  */
19891   gcc_assert (num_regs > 0 && num_regs <= 14);
19892   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19893   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19894
19895   /* Create sequence for DWARF info.  All the frame-related data for
19896      debugging is held in this wrapper.  */
19897   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19898
19899   /* Describe the stack adjustment.  */
19900   tmp = gen_rtx_SET (stack_pointer_rtx,
19901                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19902   RTX_FRAME_RELATED_P (tmp) = 1;
19903   XVECEXP (dwarf, 0, 0) = tmp;
19904
19905   /* Find the first register.  */
19906   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19907     ;
19908
19909   i = 0;
19910
19911   /* If there's an odd number of registers to push.  Start off by
19912      pushing a single register.  This ensures that subsequent strd
19913      operations are dword aligned (assuming that SP was originally
19914      64-bit aligned).  */
19915   if ((num_regs & 1) != 0)
19916     {
19917       rtx reg, mem, insn;
19918
19919       reg = gen_rtx_REG (SImode, regno);
19920       if (num_regs == 1)
19921         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19922                                                      stack_pointer_rtx));
19923       else
19924         mem = gen_frame_mem (Pmode,
19925                              gen_rtx_PRE_MODIFY
19926                              (Pmode, stack_pointer_rtx,
19927                               plus_constant (Pmode, stack_pointer_rtx,
19928                                              -4 * num_regs)));
19929
19930       tmp = gen_rtx_SET (mem, reg);
19931       RTX_FRAME_RELATED_P (tmp) = 1;
19932       insn = emit_insn (tmp);
19933       RTX_FRAME_RELATED_P (insn) = 1;
19934       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19935       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19936       RTX_FRAME_RELATED_P (tmp) = 1;
19937       i++;
19938       regno++;
19939       XVECEXP (dwarf, 0, i) = tmp;
19940       first = false;
19941     }
19942
19943   while (i < num_regs)
19944     if (saved_regs_mask & (1 << regno))
19945       {
19946         rtx reg1, reg2, mem1, mem2;
19947         rtx tmp0, tmp1, tmp2;
19948         int regno2;
19949
19950         /* Find the register to pair with this one.  */
19951         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19952              regno2++)
19953           ;
19954
19955         reg1 = gen_rtx_REG (SImode, regno);
19956         reg2 = gen_rtx_REG (SImode, regno2);
19957
19958         if (first)
19959           {
19960             rtx insn;
19961
19962             first = false;
19963             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19964                                                         stack_pointer_rtx,
19965                                                         -4 * num_regs));
19966             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19967                                                         stack_pointer_rtx,
19968                                                         -4 * (num_regs - 1)));
19969             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19970                                 plus_constant (Pmode, stack_pointer_rtx,
19971                                                -4 * (num_regs)));
19972             tmp1 = gen_rtx_SET (mem1, reg1);
19973             tmp2 = gen_rtx_SET (mem2, reg2);
19974             RTX_FRAME_RELATED_P (tmp0) = 1;
19975             RTX_FRAME_RELATED_P (tmp1) = 1;
19976             RTX_FRAME_RELATED_P (tmp2) = 1;
19977             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19978             XVECEXP (par, 0, 0) = tmp0;
19979             XVECEXP (par, 0, 1) = tmp1;
19980             XVECEXP (par, 0, 2) = tmp2;
19981             insn = emit_insn (par);
19982             RTX_FRAME_RELATED_P (insn) = 1;
19983             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19984           }
19985         else
19986           {
19987             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19988                                                         stack_pointer_rtx,
19989                                                         4 * i));
19990             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19991                                                         stack_pointer_rtx,
19992                                                         4 * (i + 1)));
19993             tmp1 = gen_rtx_SET (mem1, reg1);
19994             tmp2 = gen_rtx_SET (mem2, reg2);
19995             RTX_FRAME_RELATED_P (tmp1) = 1;
19996             RTX_FRAME_RELATED_P (tmp2) = 1;
19997             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19998             XVECEXP (par, 0, 0) = tmp1;
19999             XVECEXP (par, 0, 1) = tmp2;
20000             emit_insn (par);
20001           }
20002
20003         /* Create unwind information.  This is an approximation.  */
20004         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20005                                            plus_constant (Pmode,
20006                                                           stack_pointer_rtx,
20007                                                           4 * i)),
20008                             reg1);
20009         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20010                                            plus_constant (Pmode,
20011                                                           stack_pointer_rtx,
20012                                                           4 * (i + 1))),
20013                             reg2);
20014
20015         RTX_FRAME_RELATED_P (tmp1) = 1;
20016         RTX_FRAME_RELATED_P (tmp2) = 1;
20017         XVECEXP (dwarf, 0, i + 1) = tmp1;
20018         XVECEXP (dwarf, 0, i + 2) = tmp2;
20019         i += 2;
20020         regno = regno2 + 1;
20021       }
20022     else
20023       regno++;
20024
20025   return;
20026 }
20027
20028 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20029    whenever possible, otherwise it emits single-word stores.  The first store
20030    also allocates stack space for all saved registers, using writeback with
20031    post-addressing mode.  All other stores use offset addressing.  If no STRD
20032    can be emitted, this function emits a sequence of single-word stores,
20033    and not an STM as before, because single-word stores provide more freedom
20034    scheduling and can be turned into an STM by peephole optimizations.  */
20035 static void
20036 arm_emit_strd_push (unsigned long saved_regs_mask)
20037 {
20038   int num_regs = 0;
20039   int i, j, dwarf_index  = 0;
20040   int offset = 0;
20041   rtx dwarf = NULL_RTX;
20042   rtx insn = NULL_RTX;
20043   rtx tmp, mem;
20044
20045   /* TODO: A more efficient code can be emitted by changing the
20046      layout, e.g., first push all pairs that can use STRD to keep the
20047      stack aligned, and then push all other registers.  */
20048   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20049     if (saved_regs_mask & (1 << i))
20050       num_regs++;
20051
20052   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20053   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20054   gcc_assert (num_regs > 0);
20055
20056   /* Create sequence for DWARF info.  */
20057   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20058
20059   /* For dwarf info, we generate explicit stack update.  */
20060   tmp = gen_rtx_SET (stack_pointer_rtx,
20061                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20062   RTX_FRAME_RELATED_P (tmp) = 1;
20063   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20064
20065   /* Save registers.  */
20066   offset = - 4 * num_regs;
20067   j = 0;
20068   while (j <= LAST_ARM_REGNUM)
20069     if (saved_regs_mask & (1 << j))
20070       {
20071         if ((j % 2 == 0)
20072             && (saved_regs_mask & (1 << (j + 1))))
20073           {
20074             /* Current register and previous register form register pair for
20075                which STRD can be generated.  */
20076             if (offset < 0)
20077               {
20078                 /* Allocate stack space for all saved registers.  */
20079                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20080                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20081                 mem = gen_frame_mem (DImode, tmp);
20082                 offset = 0;
20083               }
20084             else if (offset > 0)
20085               mem = gen_frame_mem (DImode,
20086                                    plus_constant (Pmode,
20087                                                   stack_pointer_rtx,
20088                                                   offset));
20089             else
20090               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20091
20092             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20093             RTX_FRAME_RELATED_P (tmp) = 1;
20094             tmp = emit_insn (tmp);
20095
20096             /* Record the first store insn.  */
20097             if (dwarf_index == 1)
20098               insn = tmp;
20099
20100             /* Generate dwarf info.  */
20101             mem = gen_frame_mem (SImode,
20102                                  plus_constant (Pmode,
20103                                                 stack_pointer_rtx,
20104                                                 offset));
20105             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20106             RTX_FRAME_RELATED_P (tmp) = 1;
20107             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20108
20109             mem = gen_frame_mem (SImode,
20110                                  plus_constant (Pmode,
20111                                                 stack_pointer_rtx,
20112                                                 offset + 4));
20113             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20114             RTX_FRAME_RELATED_P (tmp) = 1;
20115             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20116
20117             offset += 8;
20118             j += 2;
20119           }
20120         else
20121           {
20122             /* Emit a single word store.  */
20123             if (offset < 0)
20124               {
20125                 /* Allocate stack space for all saved registers.  */
20126                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20127                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20128                 mem = gen_frame_mem (SImode, tmp);
20129                 offset = 0;
20130               }
20131             else if (offset > 0)
20132               mem = gen_frame_mem (SImode,
20133                                    plus_constant (Pmode,
20134                                                   stack_pointer_rtx,
20135                                                   offset));
20136             else
20137               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20138
20139             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20140             RTX_FRAME_RELATED_P (tmp) = 1;
20141             tmp = emit_insn (tmp);
20142
20143             /* Record the first store insn.  */
20144             if (dwarf_index == 1)
20145               insn = tmp;
20146
20147             /* Generate dwarf info.  */
20148             mem = gen_frame_mem (SImode,
20149                                  plus_constant(Pmode,
20150                                                stack_pointer_rtx,
20151                                                offset));
20152             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20153             RTX_FRAME_RELATED_P (tmp) = 1;
20154             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20155
20156             offset += 4;
20157             j += 1;
20158           }
20159       }
20160     else
20161       j++;
20162
20163   /* Attach dwarf info to the first insn we generate.  */
20164   gcc_assert (insn != NULL_RTX);
20165   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20166   RTX_FRAME_RELATED_P (insn) = 1;
20167 }
20168
20169 /* Generate and emit an insn that we will recognize as a push_multi.
20170    Unfortunately, since this insn does not reflect very well the actual
20171    semantics of the operation, we need to annotate the insn for the benefit
20172    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20173    MASK for registers that should be annotated for DWARF2 frame unwind
20174    information.  */
20175 static rtx
20176 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20177 {
20178   int num_regs = 0;
20179   int num_dwarf_regs = 0;
20180   int i, j;
20181   rtx par;
20182   rtx dwarf;
20183   int dwarf_par_index;
20184   rtx tmp, reg;
20185
20186   /* We don't record the PC in the dwarf frame information.  */
20187   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20188
20189   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20190     {
20191       if (mask & (1 << i))
20192         num_regs++;
20193       if (dwarf_regs_mask & (1 << i))
20194         num_dwarf_regs++;
20195     }
20196
20197   gcc_assert (num_regs && num_regs <= 16);
20198   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20199
20200   /* For the body of the insn we are going to generate an UNSPEC in
20201      parallel with several USEs.  This allows the insn to be recognized
20202      by the push_multi pattern in the arm.md file.
20203
20204      The body of the insn looks something like this:
20205
20206        (parallel [
20207            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20208                                         (const_int:SI <num>)))
20209                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20210            (use (reg:SI XX))
20211            (use (reg:SI YY))
20212            ...
20213         ])
20214
20215      For the frame note however, we try to be more explicit and actually
20216      show each register being stored into the stack frame, plus a (single)
20217      decrement of the stack pointer.  We do it this way in order to be
20218      friendly to the stack unwinding code, which only wants to see a single
20219      stack decrement per instruction.  The RTL we generate for the note looks
20220      something like this:
20221
20222       (sequence [
20223            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20224            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20225            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20226            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20227            ...
20228         ])
20229
20230      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20231      instead we'd have a parallel expression detailing all
20232      the stores to the various memory addresses so that debug
20233      information is more up-to-date. Remember however while writing
20234      this to take care of the constraints with the push instruction.
20235
20236      Note also that this has to be taken care of for the VFP registers.
20237
20238      For more see PR43399.  */
20239
20240   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20241   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20242   dwarf_par_index = 1;
20243
20244   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20245     {
20246       if (mask & (1 << i))
20247         {
20248           reg = gen_rtx_REG (SImode, i);
20249
20250           XVECEXP (par, 0, 0)
20251             = gen_rtx_SET (gen_frame_mem
20252                            (BLKmode,
20253                             gen_rtx_PRE_MODIFY (Pmode,
20254                                                 stack_pointer_rtx,
20255                                                 plus_constant
20256                                                 (Pmode, stack_pointer_rtx,
20257                                                  -4 * num_regs))
20258                             ),
20259                            gen_rtx_UNSPEC (BLKmode,
20260                                            gen_rtvec (1, reg),
20261                                            UNSPEC_PUSH_MULT));
20262
20263           if (dwarf_regs_mask & (1 << i))
20264             {
20265               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20266                                  reg);
20267               RTX_FRAME_RELATED_P (tmp) = 1;
20268               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20269             }
20270
20271           break;
20272         }
20273     }
20274
20275   for (j = 1, i++; j < num_regs; i++)
20276     {
20277       if (mask & (1 << i))
20278         {
20279           reg = gen_rtx_REG (SImode, i);
20280
20281           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20282
20283           if (dwarf_regs_mask & (1 << i))
20284             {
20285               tmp
20286                 = gen_rtx_SET (gen_frame_mem
20287                                (SImode,
20288                                 plus_constant (Pmode, stack_pointer_rtx,
20289                                                4 * j)),
20290                                reg);
20291               RTX_FRAME_RELATED_P (tmp) = 1;
20292               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20293             }
20294
20295           j++;
20296         }
20297     }
20298
20299   par = emit_insn (par);
20300
20301   tmp = gen_rtx_SET (stack_pointer_rtx,
20302                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20303   RTX_FRAME_RELATED_P (tmp) = 1;
20304   XVECEXP (dwarf, 0, 0) = tmp;
20305
20306   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20307
20308   return par;
20309 }
20310
20311 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20312    SIZE is the offset to be adjusted.
20313    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20314 static void
20315 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20316 {
20317   rtx dwarf;
20318
20319   RTX_FRAME_RELATED_P (insn) = 1;
20320   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20321   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20322 }
20323
20324 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20325    SAVED_REGS_MASK shows which registers need to be restored.
20326
20327    Unfortunately, since this insn does not reflect very well the actual
20328    semantics of the operation, we need to annotate the insn for the benefit
20329    of DWARF2 frame unwind information.  */
20330 static void
20331 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20332 {
20333   int num_regs = 0;
20334   int i, j;
20335   rtx par;
20336   rtx dwarf = NULL_RTX;
20337   rtx tmp, reg;
20338   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20339   int offset_adj;
20340   int emit_update;
20341
20342   offset_adj = return_in_pc ? 1 : 0;
20343   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20344     if (saved_regs_mask & (1 << i))
20345       num_regs++;
20346
20347   gcc_assert (num_regs && num_regs <= 16);
20348
20349   /* If SP is in reglist, then we don't emit SP update insn.  */
20350   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20351
20352   /* The parallel needs to hold num_regs SETs
20353      and one SET for the stack update.  */
20354   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20355
20356   if (return_in_pc)
20357     XVECEXP (par, 0, 0) = ret_rtx;
20358
20359   if (emit_update)
20360     {
20361       /* Increment the stack pointer, based on there being
20362          num_regs 4-byte registers to restore.  */
20363       tmp = gen_rtx_SET (stack_pointer_rtx,
20364                          plus_constant (Pmode,
20365                                         stack_pointer_rtx,
20366                                         4 * num_regs));
20367       RTX_FRAME_RELATED_P (tmp) = 1;
20368       XVECEXP (par, 0, offset_adj) = tmp;
20369     }
20370
20371   /* Now restore every reg, which may include PC.  */
20372   for (j = 0, i = 0; j < num_regs; i++)
20373     if (saved_regs_mask & (1 << i))
20374       {
20375         reg = gen_rtx_REG (SImode, i);
20376         if ((num_regs == 1) && emit_update && !return_in_pc)
20377           {
20378             /* Emit single load with writeback.  */
20379             tmp = gen_frame_mem (SImode,
20380                                  gen_rtx_POST_INC (Pmode,
20381                                                    stack_pointer_rtx));
20382             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20383             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20384             return;
20385           }
20386
20387         tmp = gen_rtx_SET (reg,
20388                            gen_frame_mem
20389                            (SImode,
20390                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20391         RTX_FRAME_RELATED_P (tmp) = 1;
20392         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20393
20394         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20395            should not have PC, skip PC.  */
20396         if (i != PC_REGNUM)
20397           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20398
20399         j++;
20400       }
20401
20402   if (return_in_pc)
20403     par = emit_jump_insn (par);
20404   else
20405     par = emit_insn (par);
20406
20407   REG_NOTES (par) = dwarf;
20408   if (!return_in_pc)
20409     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20410                                  stack_pointer_rtx, stack_pointer_rtx);
20411 }
20412
20413 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20414    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20415
20416    Unfortunately, since this insn does not reflect very well the actual
20417    semantics of the operation, we need to annotate the insn for the benefit
20418    of DWARF2 frame unwind information.  */
20419 static void
20420 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20421 {
20422   int i, j;
20423   rtx par;
20424   rtx dwarf = NULL_RTX;
20425   rtx tmp, reg;
20426
20427   gcc_assert (num_regs && num_regs <= 32);
20428
20429     /* Workaround ARM10 VFPr1 bug.  */
20430   if (num_regs == 2 && !arm_arch6)
20431     {
20432       if (first_reg == 15)
20433         first_reg--;
20434
20435       num_regs++;
20436     }
20437
20438   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20439      there could be up to 32 D-registers to restore.
20440      If there are more than 16 D-registers, make two recursive calls,
20441      each of which emits one pop_multi instruction.  */
20442   if (num_regs > 16)
20443     {
20444       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20445       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20446       return;
20447     }
20448
20449   /* The parallel needs to hold num_regs SETs
20450      and one SET for the stack update.  */
20451   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20452
20453   /* Increment the stack pointer, based on there being
20454      num_regs 8-byte registers to restore.  */
20455   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20456   RTX_FRAME_RELATED_P (tmp) = 1;
20457   XVECEXP (par, 0, 0) = tmp;
20458
20459   /* Now show every reg that will be restored, using a SET for each.  */
20460   for (j = 0, i=first_reg; j < num_regs; i += 2)
20461     {
20462       reg = gen_rtx_REG (DFmode, i);
20463
20464       tmp = gen_rtx_SET (reg,
20465                          gen_frame_mem
20466                          (DFmode,
20467                           plus_constant (Pmode, base_reg, 8 * j)));
20468       RTX_FRAME_RELATED_P (tmp) = 1;
20469       XVECEXP (par, 0, j + 1) = tmp;
20470
20471       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20472
20473       j++;
20474     }
20475
20476   par = emit_insn (par);
20477   REG_NOTES (par) = dwarf;
20478
20479   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20480   if (REGNO (base_reg) == IP_REGNUM)
20481     {
20482       RTX_FRAME_RELATED_P (par) = 1;
20483       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20484     }
20485   else
20486     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20487                                  base_reg, base_reg);
20488 }
20489
20490 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20491    number of registers are being popped, multiple LDRD patterns are created for
20492    all register pairs.  If odd number of registers are popped, last register is
20493    loaded by using LDR pattern.  */
20494 static void
20495 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20496 {
20497   int num_regs = 0;
20498   int i, j;
20499   rtx par = NULL_RTX;
20500   rtx dwarf = NULL_RTX;
20501   rtx tmp, reg, tmp1;
20502   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20503
20504   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20505     if (saved_regs_mask & (1 << i))
20506       num_regs++;
20507
20508   gcc_assert (num_regs && num_regs <= 16);
20509
20510   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20511      to be popped.  So, if num_regs is even, now it will become odd,
20512      and we can generate pop with PC.  If num_regs is odd, it will be
20513      even now, and ldr with return can be generated for PC.  */
20514   if (return_in_pc)
20515     num_regs--;
20516
20517   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20518
20519   /* Var j iterates over all the registers to gather all the registers in
20520      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20521      A PARALLEL RTX of register-pair is created here, so that pattern for
20522      LDRD can be matched.  As PC is always last register to be popped, and
20523      we have already decremented num_regs if PC, we don't have to worry
20524      about PC in this loop.  */
20525   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20526     if (saved_regs_mask & (1 << j))
20527       {
20528         /* Create RTX for memory load.  */
20529         reg = gen_rtx_REG (SImode, j);
20530         tmp = gen_rtx_SET (reg,
20531                            gen_frame_mem (SImode,
20532                                plus_constant (Pmode,
20533                                               stack_pointer_rtx, 4 * i)));
20534         RTX_FRAME_RELATED_P (tmp) = 1;
20535
20536         if (i % 2 == 0)
20537           {
20538             /* When saved-register index (i) is even, the RTX to be emitted is
20539                yet to be created.  Hence create it first.  The LDRD pattern we
20540                are generating is :
20541                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20542                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20543                where target registers need not be consecutive.  */
20544             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20545             dwarf = NULL_RTX;
20546           }
20547
20548         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20549            added as 0th element and if i is odd, reg_i is added as 1st element
20550            of LDRD pattern shown above.  */
20551         XVECEXP (par, 0, (i % 2)) = tmp;
20552         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20553
20554         if ((i % 2) == 1)
20555           {
20556             /* When saved-register index (i) is odd, RTXs for both the registers
20557                to be loaded are generated in above given LDRD pattern, and the
20558                pattern can be emitted now.  */
20559             par = emit_insn (par);
20560             REG_NOTES (par) = dwarf;
20561             RTX_FRAME_RELATED_P (par) = 1;
20562           }
20563
20564         i++;
20565       }
20566
20567   /* If the number of registers pushed is odd AND return_in_pc is false OR
20568      number of registers are even AND return_in_pc is true, last register is
20569      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20570      then LDR with post increment.  */
20571
20572   /* Increment the stack pointer, based on there being
20573      num_regs 4-byte registers to restore.  */
20574   tmp = gen_rtx_SET (stack_pointer_rtx,
20575                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20576   RTX_FRAME_RELATED_P (tmp) = 1;
20577   tmp = emit_insn (tmp);
20578   if (!return_in_pc)
20579     {
20580       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20581                                    stack_pointer_rtx, stack_pointer_rtx);
20582     }
20583
20584   dwarf = NULL_RTX;
20585
20586   if (((num_regs % 2) == 1 && !return_in_pc)
20587       || ((num_regs % 2) == 0 && return_in_pc))
20588     {
20589       /* Scan for the single register to be popped.  Skip until the saved
20590          register is found.  */
20591       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20592
20593       /* Gen LDR with post increment here.  */
20594       tmp1 = gen_rtx_MEM (SImode,
20595                           gen_rtx_POST_INC (SImode,
20596                                             stack_pointer_rtx));
20597       set_mem_alias_set (tmp1, get_frame_alias_set ());
20598
20599       reg = gen_rtx_REG (SImode, j);
20600       tmp = gen_rtx_SET (reg, tmp1);
20601       RTX_FRAME_RELATED_P (tmp) = 1;
20602       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20603
20604       if (return_in_pc)
20605         {
20606           /* If return_in_pc, j must be PC_REGNUM.  */
20607           gcc_assert (j == PC_REGNUM);
20608           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20609           XVECEXP (par, 0, 0) = ret_rtx;
20610           XVECEXP (par, 0, 1) = tmp;
20611           par = emit_jump_insn (par);
20612         }
20613       else
20614         {
20615           par = emit_insn (tmp);
20616           REG_NOTES (par) = dwarf;
20617           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20618                                        stack_pointer_rtx, stack_pointer_rtx);
20619         }
20620
20621     }
20622   else if ((num_regs % 2) == 1 && return_in_pc)
20623     {
20624       /* There are 2 registers to be popped.  So, generate the pattern
20625          pop_multiple_with_stack_update_and_return to pop in PC.  */
20626       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20627     }
20628
20629   return;
20630 }
20631
20632 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20633    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20634    offset addressing and then generates one separate stack udpate. This provides
20635    more scheduling freedom, compared to writeback on every load.  However,
20636    if the function returns using load into PC directly
20637    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20638    before the last load.  TODO: Add a peephole optimization to recognize
20639    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20640    peephole optimization to merge the load at stack-offset zero
20641    with the stack update instruction using load with writeback
20642    in post-index addressing mode.  */
20643 static void
20644 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20645 {
20646   int j = 0;
20647   int offset = 0;
20648   rtx par = NULL_RTX;
20649   rtx dwarf = NULL_RTX;
20650   rtx tmp, mem;
20651
20652   /* Restore saved registers.  */
20653   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20654   j = 0;
20655   while (j <= LAST_ARM_REGNUM)
20656     if (saved_regs_mask & (1 << j))
20657       {
20658         if ((j % 2) == 0
20659             && (saved_regs_mask & (1 << (j + 1)))
20660             && (j + 1) != PC_REGNUM)
20661           {
20662             /* Current register and next register form register pair for which
20663                LDRD can be generated. PC is always the last register popped, and
20664                we handle it separately.  */
20665             if (offset > 0)
20666               mem = gen_frame_mem (DImode,
20667                                    plus_constant (Pmode,
20668                                                   stack_pointer_rtx,
20669                                                   offset));
20670             else
20671               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20672
20673             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20674             tmp = emit_insn (tmp);
20675             RTX_FRAME_RELATED_P (tmp) = 1;
20676
20677             /* Generate dwarf info.  */
20678
20679             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20680                                     gen_rtx_REG (SImode, j),
20681                                     NULL_RTX);
20682             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20683                                     gen_rtx_REG (SImode, j + 1),
20684                                     dwarf);
20685
20686             REG_NOTES (tmp) = dwarf;
20687
20688             offset += 8;
20689             j += 2;
20690           }
20691         else if (j != PC_REGNUM)
20692           {
20693             /* Emit a single word load.  */
20694             if (offset > 0)
20695               mem = gen_frame_mem (SImode,
20696                                    plus_constant (Pmode,
20697                                                   stack_pointer_rtx,
20698                                                   offset));
20699             else
20700               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20701
20702             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20703             tmp = emit_insn (tmp);
20704             RTX_FRAME_RELATED_P (tmp) = 1;
20705
20706             /* Generate dwarf info.  */
20707             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20708                                               gen_rtx_REG (SImode, j),
20709                                               NULL_RTX);
20710
20711             offset += 4;
20712             j += 1;
20713           }
20714         else /* j == PC_REGNUM */
20715           j++;
20716       }
20717     else
20718       j++;
20719
20720   /* Update the stack.  */
20721   if (offset > 0)
20722     {
20723       tmp = gen_rtx_SET (stack_pointer_rtx,
20724                          plus_constant (Pmode,
20725                                         stack_pointer_rtx,
20726                                         offset));
20727       tmp = emit_insn (tmp);
20728       arm_add_cfa_adjust_cfa_note (tmp, offset,
20729                                    stack_pointer_rtx, stack_pointer_rtx);
20730       offset = 0;
20731     }
20732
20733   if (saved_regs_mask & (1 << PC_REGNUM))
20734     {
20735       /* Only PC is to be popped.  */
20736       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20737       XVECEXP (par, 0, 0) = ret_rtx;
20738       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20739                          gen_frame_mem (SImode,
20740                                         gen_rtx_POST_INC (SImode,
20741                                                           stack_pointer_rtx)));
20742       RTX_FRAME_RELATED_P (tmp) = 1;
20743       XVECEXP (par, 0, 1) = tmp;
20744       par = emit_jump_insn (par);
20745
20746       /* Generate dwarf info.  */
20747       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20748                               gen_rtx_REG (SImode, PC_REGNUM),
20749                               NULL_RTX);
20750       REG_NOTES (par) = dwarf;
20751       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20752                                    stack_pointer_rtx, stack_pointer_rtx);
20753     }
20754 }
20755
20756 /* Calculate the size of the return value that is passed in registers.  */
20757 static unsigned
20758 arm_size_return_regs (void)
20759 {
20760   machine_mode mode;
20761
20762   if (crtl->return_rtx != 0)
20763     mode = GET_MODE (crtl->return_rtx);
20764   else
20765     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20766
20767   return GET_MODE_SIZE (mode);
20768 }
20769
20770 /* Return true if the current function needs to save/restore LR.  */
20771 static bool
20772 thumb_force_lr_save (void)
20773 {
20774   return !cfun->machine->lr_save_eliminated
20775          && (!crtl->is_leaf
20776              || thumb_far_jump_used_p ()
20777              || df_regs_ever_live_p (LR_REGNUM));
20778 }
20779
20780 /* We do not know if r3 will be available because
20781    we do have an indirect tailcall happening in this
20782    particular case.  */
20783 static bool
20784 is_indirect_tailcall_p (rtx call)
20785 {
20786   rtx pat = PATTERN (call);
20787
20788   /* Indirect tail call.  */
20789   pat = XVECEXP (pat, 0, 0);
20790   if (GET_CODE (pat) == SET)
20791     pat = SET_SRC (pat);
20792
20793   pat = XEXP (XEXP (pat, 0), 0);
20794   return REG_P (pat);
20795 }
20796
20797 /* Return true if r3 is used by any of the tail call insns in the
20798    current function.  */
20799 static bool
20800 any_sibcall_could_use_r3 (void)
20801 {
20802   edge_iterator ei;
20803   edge e;
20804
20805   if (!crtl->tail_call_emit)
20806     return false;
20807   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20808     if (e->flags & EDGE_SIBCALL)
20809       {
20810         rtx_insn *call = BB_END (e->src);
20811         if (!CALL_P (call))
20812           call = prev_nonnote_nondebug_insn (call);
20813         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20814         if (find_regno_fusage (call, USE, 3)
20815             || is_indirect_tailcall_p (call))
20816           return true;
20817       }
20818   return false;
20819 }
20820
20821
20822 /* Compute the distance from register FROM to register TO.
20823    These can be the arg pointer (26), the soft frame pointer (25),
20824    the stack pointer (13) or the hard frame pointer (11).
20825    In thumb mode r7 is used as the soft frame pointer, if needed.
20826    Typical stack layout looks like this:
20827
20828        old stack pointer -> |    |
20829                              ----
20830                             |    | \
20831                             |    |   saved arguments for
20832                             |    |   vararg functions
20833                             |    | /
20834                               --
20835    hard FP & arg pointer -> |    | \
20836                             |    |   stack
20837                             |    |   frame
20838                             |    | /
20839                               --
20840                             |    | \
20841                             |    |   call saved
20842                             |    |   registers
20843       soft frame pointer -> |    | /
20844                               --
20845                             |    | \
20846                             |    |   local
20847                             |    |   variables
20848      locals base pointer -> |    | /
20849                               --
20850                             |    | \
20851                             |    |   outgoing
20852                             |    |   arguments
20853    current stack pointer -> |    | /
20854                               --
20855
20856   For a given function some or all of these stack components
20857   may not be needed, giving rise to the possibility of
20858   eliminating some of the registers.
20859
20860   The values returned by this function must reflect the behavior
20861   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20862
20863   The sign of the number returned reflects the direction of stack
20864   growth, so the values are positive for all eliminations except
20865   from the soft frame pointer to the hard frame pointer.
20866
20867   SFP may point just inside the local variables block to ensure correct
20868   alignment.  */
20869
20870
20871 /* Return cached stack offsets.  */
20872
20873 static arm_stack_offsets *
20874 arm_get_frame_offsets (void)
20875 {
20876   struct arm_stack_offsets *offsets;
20877
20878   offsets = &cfun->machine->stack_offsets;
20879
20880   return offsets;
20881 }
20882
20883
20884 /* Calculate stack offsets.  These are used to calculate register elimination
20885    offsets and in prologue/epilogue code.  Also calculates which registers
20886    should be saved.  */
20887
20888 static void
20889 arm_compute_frame_layout (void)
20890 {
20891   struct arm_stack_offsets *offsets;
20892   unsigned long func_type;
20893   int saved;
20894   int core_saved;
20895   HOST_WIDE_INT frame_size;
20896   int i;
20897
20898   offsets = &cfun->machine->stack_offsets;
20899
20900   /* Initially this is the size of the local variables.  It will translated
20901      into an offset once we have determined the size of preceding data.  */
20902   frame_size = ROUND_UP_WORD (get_frame_size ());
20903
20904   /* Space for variadic functions.  */
20905   offsets->saved_args = crtl->args.pretend_args_size;
20906
20907   /* In Thumb mode this is incorrect, but never used.  */
20908   offsets->frame
20909     = (offsets->saved_args
20910        + arm_compute_static_chain_stack_bytes ()
20911        + (frame_pointer_needed ? 4 : 0));
20912
20913   if (TARGET_32BIT)
20914     {
20915       unsigned int regno;
20916
20917       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20918       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20919       saved = core_saved;
20920
20921       /* We know that SP will be doubleword aligned on entry, and we must
20922          preserve that condition at any subroutine call.  We also require the
20923          soft frame pointer to be doubleword aligned.  */
20924
20925       if (TARGET_REALLY_IWMMXT)
20926         {
20927           /* Check for the call-saved iWMMXt registers.  */
20928           for (regno = FIRST_IWMMXT_REGNUM;
20929                regno <= LAST_IWMMXT_REGNUM;
20930                regno++)
20931             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20932               saved += 8;
20933         }
20934
20935       func_type = arm_current_func_type ();
20936       /* Space for saved VFP registers.  */
20937       if (! IS_VOLATILE (func_type)
20938           && TARGET_HARD_FLOAT)
20939         saved += arm_get_vfp_saved_size ();
20940     }
20941   else /* TARGET_THUMB1 */
20942     {
20943       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20944       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20945       saved = core_saved;
20946       if (TARGET_BACKTRACE)
20947         saved += 16;
20948     }
20949
20950   /* Saved registers include the stack frame.  */
20951   offsets->saved_regs
20952     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20953   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20954
20955   /* A leaf function does not need any stack alignment if it has nothing
20956      on the stack.  */
20957   if (crtl->is_leaf && frame_size == 0
20958       /* However if it calls alloca(), we have a dynamically allocated
20959          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20960       && ! cfun->calls_alloca)
20961     {
20962       offsets->outgoing_args = offsets->soft_frame;
20963       offsets->locals_base = offsets->soft_frame;
20964       return;
20965     }
20966
20967   /* Ensure SFP has the correct alignment.  */
20968   if (ARM_DOUBLEWORD_ALIGN
20969       && (offsets->soft_frame & 7))
20970     {
20971       offsets->soft_frame += 4;
20972       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20973          when there is a stack frame as the alignment will be rolled into
20974          the normal stack adjustment.  */
20975       if (frame_size + crtl->outgoing_args_size == 0)
20976         {
20977           int reg = -1;
20978
20979           /* Register r3 is caller-saved.  Normally it does not need to be
20980              saved on entry by the prologue.  However if we choose to save
20981              it for padding then we may confuse the compiler into thinking
20982              a prologue sequence is required when in fact it is not.  This
20983              will occur when shrink-wrapping if r3 is used as a scratch
20984              register and there are no other callee-saved writes.
20985
20986              This situation can be avoided when other callee-saved registers
20987              are available and r3 is not mandatory if we choose a callee-saved
20988              register for padding.  */
20989           bool prefer_callee_reg_p = false;
20990
20991           /* If it is safe to use r3, then do so.  This sometimes
20992              generates better code on Thumb-2 by avoiding the need to
20993              use 32-bit push/pop instructions.  */
20994           if (! any_sibcall_could_use_r3 ()
20995               && arm_size_return_regs () <= 12
20996               && (offsets->saved_regs_mask & (1 << 3)) == 0
20997               && (TARGET_THUMB2
20998                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20999             {
21000               reg = 3;
21001               if (!TARGET_THUMB2)
21002                 prefer_callee_reg_p = true;
21003             }
21004           if (reg == -1
21005               || prefer_callee_reg_p)
21006             {
21007               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21008                 {
21009                   /* Avoid fixed registers; they may be changed at
21010                      arbitrary times so it's unsafe to restore them
21011                      during the epilogue.  */
21012                   if (!fixed_regs[i]
21013                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21014                     {
21015                       reg = i;
21016                       break;
21017                     }
21018                 }
21019             }
21020
21021           if (reg != -1)
21022             {
21023               offsets->saved_regs += 4;
21024               offsets->saved_regs_mask |= (1 << reg);
21025             }
21026         }
21027     }
21028
21029   offsets->locals_base = offsets->soft_frame + frame_size;
21030   offsets->outgoing_args = (offsets->locals_base
21031                             + crtl->outgoing_args_size);
21032
21033   if (ARM_DOUBLEWORD_ALIGN)
21034     {
21035       /* Ensure SP remains doubleword aligned.  */
21036       if (offsets->outgoing_args & 7)
21037         offsets->outgoing_args += 4;
21038       gcc_assert (!(offsets->outgoing_args & 7));
21039     }
21040 }
21041
21042
21043 /* Calculate the relative offsets for the different stack pointers.  Positive
21044    offsets are in the direction of stack growth.  */
21045
21046 HOST_WIDE_INT
21047 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21048 {
21049   arm_stack_offsets *offsets;
21050
21051   offsets = arm_get_frame_offsets ();
21052
21053   /* OK, now we have enough information to compute the distances.
21054      There must be an entry in these switch tables for each pair
21055      of registers in ELIMINABLE_REGS, even if some of the entries
21056      seem to be redundant or useless.  */
21057   switch (from)
21058     {
21059     case ARG_POINTER_REGNUM:
21060       switch (to)
21061         {
21062         case THUMB_HARD_FRAME_POINTER_REGNUM:
21063           return 0;
21064
21065         case FRAME_POINTER_REGNUM:
21066           /* This is the reverse of the soft frame pointer
21067              to hard frame pointer elimination below.  */
21068           return offsets->soft_frame - offsets->saved_args;
21069
21070         case ARM_HARD_FRAME_POINTER_REGNUM:
21071           /* This is only non-zero in the case where the static chain register
21072              is stored above the frame.  */
21073           return offsets->frame - offsets->saved_args - 4;
21074
21075         case STACK_POINTER_REGNUM:
21076           /* If nothing has been pushed on the stack at all
21077              then this will return -4.  This *is* correct!  */
21078           return offsets->outgoing_args - (offsets->saved_args + 4);
21079
21080         default:
21081           gcc_unreachable ();
21082         }
21083       gcc_unreachable ();
21084
21085     case FRAME_POINTER_REGNUM:
21086       switch (to)
21087         {
21088         case THUMB_HARD_FRAME_POINTER_REGNUM:
21089           return 0;
21090
21091         case ARM_HARD_FRAME_POINTER_REGNUM:
21092           /* The hard frame pointer points to the top entry in the
21093              stack frame.  The soft frame pointer to the bottom entry
21094              in the stack frame.  If there is no stack frame at all,
21095              then they are identical.  */
21096
21097           return offsets->frame - offsets->soft_frame;
21098
21099         case STACK_POINTER_REGNUM:
21100           return offsets->outgoing_args - offsets->soft_frame;
21101
21102         default:
21103           gcc_unreachable ();
21104         }
21105       gcc_unreachable ();
21106
21107     default:
21108       /* You cannot eliminate from the stack pointer.
21109          In theory you could eliminate from the hard frame
21110          pointer to the stack pointer, but this will never
21111          happen, since if a stack frame is not needed the
21112          hard frame pointer will never be used.  */
21113       gcc_unreachable ();
21114     }
21115 }
21116
21117 /* Given FROM and TO register numbers, say whether this elimination is
21118    allowed.  Frame pointer elimination is automatically handled.
21119
21120    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21121    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21122    pointer, we must eliminate FRAME_POINTER_REGNUM into
21123    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21124    ARG_POINTER_REGNUM.  */
21125
21126 bool
21127 arm_can_eliminate (const int from, const int to)
21128 {
21129   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21130           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21131           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21132           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21133            true);
21134 }
21135
21136 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21137    number of bytes pushed.  */
21138
21139 static int
21140 arm_save_coproc_regs(void)
21141 {
21142   int saved_size = 0;
21143   unsigned reg;
21144   unsigned start_reg;
21145   rtx insn;
21146
21147   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21148     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21149       {
21150         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21151         insn = gen_rtx_MEM (V2SImode, insn);
21152         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21153         RTX_FRAME_RELATED_P (insn) = 1;
21154         saved_size += 8;
21155       }
21156
21157   if (TARGET_HARD_FLOAT)
21158     {
21159       start_reg = FIRST_VFP_REGNUM;
21160
21161       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21162         {
21163           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21164               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21165             {
21166               if (start_reg != reg)
21167                 saved_size += vfp_emit_fstmd (start_reg,
21168                                               (reg - start_reg) / 2);
21169               start_reg = reg + 2;
21170             }
21171         }
21172       if (start_reg != reg)
21173         saved_size += vfp_emit_fstmd (start_reg,
21174                                       (reg - start_reg) / 2);
21175     }
21176   return saved_size;
21177 }
21178
21179
21180 /* Set the Thumb frame pointer from the stack pointer.  */
21181
21182 static void
21183 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21184 {
21185   HOST_WIDE_INT amount;
21186   rtx insn, dwarf;
21187
21188   amount = offsets->outgoing_args - offsets->locals_base;
21189   if (amount < 1024)
21190     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21191                                   stack_pointer_rtx, GEN_INT (amount)));
21192   else
21193     {
21194       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21195       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21196          expects the first two operands to be the same.  */
21197       if (TARGET_THUMB2)
21198         {
21199           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21200                                         stack_pointer_rtx,
21201                                         hard_frame_pointer_rtx));
21202         }
21203       else
21204         {
21205           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21206                                         hard_frame_pointer_rtx,
21207                                         stack_pointer_rtx));
21208         }
21209       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21210                            plus_constant (Pmode, stack_pointer_rtx, amount));
21211       RTX_FRAME_RELATED_P (dwarf) = 1;
21212       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21213     }
21214
21215   RTX_FRAME_RELATED_P (insn) = 1;
21216 }
21217
21218 struct scratch_reg {
21219   rtx reg;
21220   bool saved;
21221 };
21222
21223 /* Return a short-lived scratch register for use as a 2nd scratch register on
21224    function entry after the registers are saved in the prologue.  This register
21225    must be released by means of release_scratch_register_on_entry.  IP is not
21226    considered since it is always used as the 1st scratch register if available.
21227
21228    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21229    mask of live registers.  */
21230
21231 static void
21232 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21233                                unsigned long live_regs)
21234 {
21235   int regno = -1;
21236
21237   sr->saved = false;
21238
21239   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21240     regno = LR_REGNUM;
21241   else
21242     {
21243       unsigned int i;
21244
21245       for (i = 4; i < 11; i++)
21246         if (regno1 != i && (live_regs & (1 << i)) != 0)
21247           {
21248             regno = i;
21249             break;
21250           }
21251
21252       if (regno < 0)
21253         {
21254           /* If IP is used as the 1st scratch register for a nested function,
21255              then either r3 wasn't available or is used to preserve IP.  */
21256           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21257             regno1 = 3;
21258           regno = (regno1 == 3 ? 2 : 3);
21259           sr->saved
21260             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21261                                regno);
21262         }
21263     }
21264
21265   sr->reg = gen_rtx_REG (SImode, regno);
21266   if (sr->saved)
21267     {
21268       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21269       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21270       rtx x = gen_rtx_SET (stack_pointer_rtx,
21271                            plus_constant (Pmode, stack_pointer_rtx, -4));
21272       RTX_FRAME_RELATED_P (insn) = 1;
21273       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21274     }
21275 }
21276
21277 /* Release a scratch register obtained from the preceding function.  */
21278
21279 static void
21280 release_scratch_register_on_entry (struct scratch_reg *sr)
21281 {
21282   if (sr->saved)
21283     {
21284       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21285       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21286       rtx x = gen_rtx_SET (stack_pointer_rtx,
21287                            plus_constant (Pmode, stack_pointer_rtx, 4));
21288       RTX_FRAME_RELATED_P (insn) = 1;
21289       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21290     }
21291 }
21292
21293 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21294
21295 #if PROBE_INTERVAL > 4096
21296 #error Cannot use indexed addressing mode for stack probing
21297 #endif
21298
21299 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21300    inclusive.  These are offsets from the current stack pointer.  REGNO1
21301    is the index number of the 1st scratch register and LIVE_REGS is the
21302    mask of live registers.  */
21303
21304 static void
21305 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21306                             unsigned int regno1, unsigned long live_regs)
21307 {
21308   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21309
21310   /* See if we have a constant small number of probes to generate.  If so,
21311      that's the easy case.  */
21312   if (size <= PROBE_INTERVAL)
21313     {
21314       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21315       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21316       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21317     }
21318
21319   /* The run-time loop is made up of 10 insns in the generic case while the
21320      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21321   else if (size <= 5 * PROBE_INTERVAL)
21322     {
21323       HOST_WIDE_INT i, rem;
21324
21325       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21326       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21327       emit_stack_probe (reg1);
21328
21329       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21330          it exceeds SIZE.  If only two probes are needed, this will not
21331          generate any code.  Then probe at FIRST + SIZE.  */
21332       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21333         {
21334           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21335           emit_stack_probe (reg1);
21336         }
21337
21338       rem = size - (i - PROBE_INTERVAL);
21339       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21340         {
21341           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21342           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21343         }
21344       else
21345         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21346     }
21347
21348   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21349      extra careful with variables wrapping around because we might be at
21350      the very top (or the very bottom) of the address space and we have
21351      to be able to handle this case properly; in particular, we use an
21352      equality test for the loop condition.  */
21353   else
21354     {
21355       HOST_WIDE_INT rounded_size;
21356       struct scratch_reg sr;
21357
21358       get_scratch_register_on_entry (&sr, regno1, live_regs);
21359
21360       emit_move_insn (reg1, GEN_INT (first));
21361
21362
21363       /* Step 1: round SIZE to the previous multiple of the interval.  */
21364
21365       rounded_size = size & -PROBE_INTERVAL;
21366       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21367
21368
21369       /* Step 2: compute initial and final value of the loop counter.  */
21370
21371       /* TEST_ADDR = SP + FIRST.  */
21372       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21373
21374       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21375       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21376
21377
21378       /* Step 3: the loop
21379
21380          do
21381            {
21382              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21383              probe at TEST_ADDR
21384            }
21385          while (TEST_ADDR != LAST_ADDR)
21386
21387          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21388          until it is equal to ROUNDED_SIZE.  */
21389
21390       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21391
21392
21393       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21394          that SIZE is equal to ROUNDED_SIZE.  */
21395
21396       if (size != rounded_size)
21397         {
21398           HOST_WIDE_INT rem = size - rounded_size;
21399
21400           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21401             {
21402               emit_set_insn (sr.reg,
21403                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21404               emit_stack_probe (plus_constant (Pmode, sr.reg,
21405                                                PROBE_INTERVAL - rem));
21406             }
21407           else
21408             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21409         }
21410
21411       release_scratch_register_on_entry (&sr);
21412     }
21413
21414   /* Make sure nothing is scheduled before we are done.  */
21415   emit_insn (gen_blockage ());
21416 }
21417
21418 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21419    absolute addresses.  */
21420
21421 const char *
21422 output_probe_stack_range (rtx reg1, rtx reg2)
21423 {
21424   static int labelno = 0;
21425   char loop_lab[32];
21426   rtx xops[2];
21427
21428   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21429
21430   /* Loop.  */
21431   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21432
21433   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21434   xops[0] = reg1;
21435   xops[1] = GEN_INT (PROBE_INTERVAL);
21436   output_asm_insn ("sub\t%0, %0, %1", xops);
21437
21438   /* Probe at TEST_ADDR.  */
21439   output_asm_insn ("str\tr0, [%0, #0]", xops);
21440
21441   /* Test if TEST_ADDR == LAST_ADDR.  */
21442   xops[1] = reg2;
21443   output_asm_insn ("cmp\t%0, %1", xops);
21444
21445   /* Branch.  */
21446   fputs ("\tbne\t", asm_out_file);
21447   assemble_name_raw (asm_out_file, loop_lab);
21448   fputc ('\n', asm_out_file);
21449
21450   return "";
21451 }
21452
21453 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21454    function.  */
21455 void
21456 arm_expand_prologue (void)
21457 {
21458   rtx amount;
21459   rtx insn;
21460   rtx ip_rtx;
21461   unsigned long live_regs_mask;
21462   unsigned long func_type;
21463   int fp_offset = 0;
21464   int saved_pretend_args = 0;
21465   int saved_regs = 0;
21466   unsigned HOST_WIDE_INT args_to_push;
21467   HOST_WIDE_INT size;
21468   arm_stack_offsets *offsets;
21469   bool clobber_ip;
21470
21471   func_type = arm_current_func_type ();
21472
21473   /* Naked functions don't have prologues.  */
21474   if (IS_NAKED (func_type))
21475     {
21476       if (flag_stack_usage_info)
21477         current_function_static_stack_size = 0;
21478       return;
21479     }
21480
21481   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21482   args_to_push = crtl->args.pretend_args_size;
21483
21484   /* Compute which register we will have to save onto the stack.  */
21485   offsets = arm_get_frame_offsets ();
21486   live_regs_mask = offsets->saved_regs_mask;
21487
21488   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21489
21490   if (IS_STACKALIGN (func_type))
21491     {
21492       rtx r0, r1;
21493
21494       /* Handle a word-aligned stack pointer.  We generate the following:
21495
21496           mov r0, sp
21497           bic r1, r0, #7
21498           mov sp, r1
21499           <save and restore r0 in normal prologue/epilogue>
21500           mov sp, r0
21501           bx lr
21502
21503          The unwinder doesn't need to know about the stack realignment.
21504          Just tell it we saved SP in r0.  */
21505       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21506
21507       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21508       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21509
21510       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21511       RTX_FRAME_RELATED_P (insn) = 1;
21512       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21513
21514       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21515
21516       /* ??? The CFA changes here, which may cause GDB to conclude that it
21517          has entered a different function.  That said, the unwind info is
21518          correct, individually, before and after this instruction because
21519          we've described the save of SP, which will override the default
21520          handling of SP as restoring from the CFA.  */
21521       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21522     }
21523
21524   /* The static chain register is the same as the IP register.  If it is
21525      clobbered when creating the frame, we need to save and restore it.  */
21526   clobber_ip = IS_NESTED (func_type)
21527                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21528                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21529                         || flag_stack_clash_protection)
21530                        && !df_regs_ever_live_p (LR_REGNUM)
21531                        && arm_r3_live_at_start_p ()));
21532
21533   /* Find somewhere to store IP whilst the frame is being created.
21534      We try the following places in order:
21535
21536        1. The last argument register r3 if it is available.
21537        2. A slot on the stack above the frame if there are no
21538           arguments to push onto the stack.
21539        3. Register r3 again, after pushing the argument registers
21540           onto the stack, if this is a varargs function.
21541        4. The last slot on the stack created for the arguments to
21542           push, if this isn't a varargs function.
21543
21544      Note - we only need to tell the dwarf2 backend about the SP
21545      adjustment in the second variant; the static chain register
21546      doesn't need to be unwound, as it doesn't contain a value
21547      inherited from the caller.  */
21548   if (clobber_ip)
21549     {
21550       if (!arm_r3_live_at_start_p ())
21551         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21552       else if (args_to_push == 0)
21553         {
21554           rtx addr, dwarf;
21555
21556           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21557           saved_regs += 4;
21558
21559           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21560           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21561           fp_offset = 4;
21562
21563           /* Just tell the dwarf backend that we adjusted SP.  */
21564           dwarf = gen_rtx_SET (stack_pointer_rtx,
21565                                plus_constant (Pmode, stack_pointer_rtx,
21566                                               -fp_offset));
21567           RTX_FRAME_RELATED_P (insn) = 1;
21568           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21569         }
21570       else
21571         {
21572           /* Store the args on the stack.  */
21573           if (cfun->machine->uses_anonymous_args)
21574             {
21575               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21576                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21577               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21578               saved_pretend_args = 1;
21579             }
21580           else
21581             {
21582               rtx addr, dwarf;
21583
21584               if (args_to_push == 4)
21585                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21586               else
21587                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21588                                            plus_constant (Pmode,
21589                                                           stack_pointer_rtx,
21590                                                           -args_to_push));
21591
21592               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21593
21594               /* Just tell the dwarf backend that we adjusted SP.  */
21595               dwarf = gen_rtx_SET (stack_pointer_rtx,
21596                                    plus_constant (Pmode, stack_pointer_rtx,
21597                                                   -args_to_push));
21598               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21599             }
21600
21601           RTX_FRAME_RELATED_P (insn) = 1;
21602           fp_offset = args_to_push;
21603           args_to_push = 0;
21604         }
21605     }
21606
21607   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21608     {
21609       if (IS_INTERRUPT (func_type))
21610         {
21611           /* Interrupt functions must not corrupt any registers.
21612              Creating a frame pointer however, corrupts the IP
21613              register, so we must push it first.  */
21614           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21615
21616           /* Do not set RTX_FRAME_RELATED_P on this insn.
21617              The dwarf stack unwinding code only wants to see one
21618              stack decrement per function, and this is not it.  If
21619              this instruction is labeled as being part of the frame
21620              creation sequence then dwarf2out_frame_debug_expr will
21621              die when it encounters the assignment of IP to FP
21622              later on, since the use of SP here establishes SP as
21623              the CFA register and not IP.
21624
21625              Anyway this instruction is not really part of the stack
21626              frame creation although it is part of the prologue.  */
21627         }
21628
21629       insn = emit_set_insn (ip_rtx,
21630                             plus_constant (Pmode, stack_pointer_rtx,
21631                                            fp_offset));
21632       RTX_FRAME_RELATED_P (insn) = 1;
21633     }
21634
21635   if (args_to_push)
21636     {
21637       /* Push the argument registers, or reserve space for them.  */
21638       if (cfun->machine->uses_anonymous_args)
21639         insn = emit_multi_reg_push
21640           ((0xf0 >> (args_to_push / 4)) & 0xf,
21641            (0xf0 >> (args_to_push / 4)) & 0xf);
21642       else
21643         insn = emit_insn
21644           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21645                        GEN_INT (- args_to_push)));
21646       RTX_FRAME_RELATED_P (insn) = 1;
21647     }
21648
21649   /* If this is an interrupt service routine, and the link register
21650      is going to be pushed, and we're not generating extra
21651      push of IP (needed when frame is needed and frame layout if apcs),
21652      subtracting four from LR now will mean that the function return
21653      can be done with a single instruction.  */
21654   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21655       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21656       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21657       && TARGET_ARM)
21658     {
21659       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21660
21661       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21662     }
21663
21664   if (live_regs_mask)
21665     {
21666       unsigned long dwarf_regs_mask = live_regs_mask;
21667
21668       saved_regs += bit_count (live_regs_mask) * 4;
21669       if (optimize_size && !frame_pointer_needed
21670           && saved_regs == offsets->saved_regs - offsets->saved_args)
21671         {
21672           /* If no coprocessor registers are being pushed and we don't have
21673              to worry about a frame pointer then push extra registers to
21674              create the stack frame.  This is done in a way that does not
21675              alter the frame layout, so is independent of the epilogue.  */
21676           int n;
21677           int frame;
21678           n = 0;
21679           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21680             n++;
21681           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21682           if (frame && n * 4 >= frame)
21683             {
21684               n = frame / 4;
21685               live_regs_mask |= (1 << n) - 1;
21686               saved_regs += frame;
21687             }
21688         }
21689
21690       if (TARGET_LDRD
21691           && current_tune->prefer_ldrd_strd
21692           && !optimize_function_for_size_p (cfun))
21693         {
21694           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21695           if (TARGET_THUMB2)
21696             thumb2_emit_strd_push (live_regs_mask);
21697           else if (TARGET_ARM
21698                    && !TARGET_APCS_FRAME
21699                    && !IS_INTERRUPT (func_type))
21700             arm_emit_strd_push (live_regs_mask);
21701           else
21702             {
21703               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21704               RTX_FRAME_RELATED_P (insn) = 1;
21705             }
21706         }
21707       else
21708         {
21709           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21710           RTX_FRAME_RELATED_P (insn) = 1;
21711         }
21712     }
21713
21714   if (! IS_VOLATILE (func_type))
21715     saved_regs += arm_save_coproc_regs ();
21716
21717   if (frame_pointer_needed && TARGET_ARM)
21718     {
21719       /* Create the new frame pointer.  */
21720       if (TARGET_APCS_FRAME)
21721         {
21722           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21723           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21724           RTX_FRAME_RELATED_P (insn) = 1;
21725         }
21726       else
21727         {
21728           insn = GEN_INT (saved_regs - (4 + fp_offset));
21729           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21730                                         stack_pointer_rtx, insn));
21731           RTX_FRAME_RELATED_P (insn) = 1;
21732         }
21733     }
21734
21735   size = offsets->outgoing_args - offsets->saved_args;
21736   if (flag_stack_usage_info)
21737     current_function_static_stack_size = size;
21738
21739   /* If this isn't an interrupt service routine and we have a frame, then do
21740      stack checking.  We use IP as the first scratch register, except for the
21741      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21742   if (!IS_INTERRUPT (func_type)
21743       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21744           || flag_stack_clash_protection))
21745     {
21746       unsigned int regno;
21747
21748       if (!IS_NESTED (func_type) || clobber_ip)
21749         regno = IP_REGNUM;
21750       else if (df_regs_ever_live_p (LR_REGNUM))
21751         regno = LR_REGNUM;
21752       else
21753         regno = 3;
21754
21755       if (crtl->is_leaf && !cfun->calls_alloca)
21756         {
21757           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21758             arm_emit_probe_stack_range (get_stack_check_protect (),
21759                                         size - get_stack_check_protect (),
21760                                         regno, live_regs_mask);
21761         }
21762       else if (size > 0)
21763         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21764                                     regno, live_regs_mask);
21765     }
21766
21767   /* Recover the static chain register.  */
21768   if (clobber_ip)
21769     {
21770       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21771         insn = gen_rtx_REG (SImode, 3);
21772       else
21773         {
21774           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21775           insn = gen_frame_mem (SImode, insn);
21776         }
21777       emit_set_insn (ip_rtx, insn);
21778       emit_insn (gen_force_register_use (ip_rtx));
21779     }
21780
21781   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21782     {
21783       /* This add can produce multiple insns for a large constant, so we
21784          need to get tricky.  */
21785       rtx_insn *last = get_last_insn ();
21786
21787       amount = GEN_INT (offsets->saved_args + saved_regs
21788                         - offsets->outgoing_args);
21789
21790       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21791                                     amount));
21792       do
21793         {
21794           last = last ? NEXT_INSN (last) : get_insns ();
21795           RTX_FRAME_RELATED_P (last) = 1;
21796         }
21797       while (last != insn);
21798
21799       /* If the frame pointer is needed, emit a special barrier that
21800          will prevent the scheduler from moving stores to the frame
21801          before the stack adjustment.  */
21802       if (frame_pointer_needed)
21803         emit_insn (gen_stack_tie (stack_pointer_rtx,
21804                                   hard_frame_pointer_rtx));
21805     }
21806
21807
21808   if (frame_pointer_needed && TARGET_THUMB2)
21809     thumb_set_frame_pointer (offsets);
21810
21811   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21812     {
21813       unsigned long mask;
21814
21815       mask = live_regs_mask;
21816       mask &= THUMB2_WORK_REGS;
21817       if (!IS_NESTED (func_type))
21818         mask |= (1 << IP_REGNUM);
21819       arm_load_pic_register (mask);
21820     }
21821
21822   /* If we are profiling, make sure no instructions are scheduled before
21823      the call to mcount.  Similarly if the user has requested no
21824      scheduling in the prolog.  Similarly if we want non-call exceptions
21825      using the EABI unwinder, to prevent faulting instructions from being
21826      swapped with a stack adjustment.  */
21827   if (crtl->profile || !TARGET_SCHED_PROLOG
21828       || (arm_except_unwind_info (&global_options) == UI_TARGET
21829           && cfun->can_throw_non_call_exceptions))
21830     emit_insn (gen_blockage ());
21831
21832   /* If the link register is being kept alive, with the return address in it,
21833      then make sure that it does not get reused by the ce2 pass.  */
21834   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21835     cfun->machine->lr_save_eliminated = 1;
21836 }
21837 \f
21838 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21839 static void
21840 arm_print_condition (FILE *stream)
21841 {
21842   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21843     {
21844       /* Branch conversion is not implemented for Thumb-2.  */
21845       if (TARGET_THUMB)
21846         {
21847           output_operand_lossage ("predicated Thumb instruction");
21848           return;
21849         }
21850       if (current_insn_predicate != NULL)
21851         {
21852           output_operand_lossage
21853             ("predicated instruction in conditional sequence");
21854           return;
21855         }
21856
21857       fputs (arm_condition_codes[arm_current_cc], stream);
21858     }
21859   else if (current_insn_predicate)
21860     {
21861       enum arm_cond_code code;
21862
21863       if (TARGET_THUMB1)
21864         {
21865           output_operand_lossage ("predicated Thumb instruction");
21866           return;
21867         }
21868
21869       code = get_arm_condition_code (current_insn_predicate);
21870       fputs (arm_condition_codes[code], stream);
21871     }
21872 }
21873
21874
21875 /* Globally reserved letters: acln
21876    Puncutation letters currently used: @_|?().!#
21877    Lower case letters currently used: bcdefhimpqtvwxyz
21878    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21879    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21880
21881    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21882
21883    If CODE is 'd', then the X is a condition operand and the instruction
21884    should only be executed if the condition is true.
21885    if CODE is 'D', then the X is a condition operand and the instruction
21886    should only be executed if the condition is false: however, if the mode
21887    of the comparison is CCFPEmode, then always execute the instruction -- we
21888    do this because in these circumstances !GE does not necessarily imply LT;
21889    in these cases the instruction pattern will take care to make sure that
21890    an instruction containing %d will follow, thereby undoing the effects of
21891    doing this instruction unconditionally.
21892    If CODE is 'N' then X is a floating point operand that must be negated
21893    before output.
21894    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21895    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21896 static void
21897 arm_print_operand (FILE *stream, rtx x, int code)
21898 {
21899   switch (code)
21900     {
21901     case '@':
21902       fputs (ASM_COMMENT_START, stream);
21903       return;
21904
21905     case '_':
21906       fputs (user_label_prefix, stream);
21907       return;
21908
21909     case '|':
21910       fputs (REGISTER_PREFIX, stream);
21911       return;
21912
21913     case '?':
21914       arm_print_condition (stream);
21915       return;
21916
21917     case '.':
21918       /* The current condition code for a condition code setting instruction.
21919          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21920       fputc('s', stream);
21921       arm_print_condition (stream);
21922       return;
21923
21924     case '!':
21925       /* If the instruction is conditionally executed then print
21926          the current condition code, otherwise print 's'.  */
21927       gcc_assert (TARGET_THUMB2);
21928       if (current_insn_predicate)
21929         arm_print_condition (stream);
21930       else
21931         fputc('s', stream);
21932       break;
21933
21934     /* %# is a "break" sequence. It doesn't output anything, but is used to
21935        separate e.g. operand numbers from following text, if that text consists
21936        of further digits which we don't want to be part of the operand
21937        number.  */
21938     case '#':
21939       return;
21940
21941     case 'N':
21942       {
21943         REAL_VALUE_TYPE r;
21944         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21945         fprintf (stream, "%s", fp_const_from_val (&r));
21946       }
21947       return;
21948
21949     /* An integer or symbol address without a preceding # sign.  */
21950     case 'c':
21951       switch (GET_CODE (x))
21952         {
21953         case CONST_INT:
21954           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21955           break;
21956
21957         case SYMBOL_REF:
21958           output_addr_const (stream, x);
21959           break;
21960
21961         case CONST:
21962           if (GET_CODE (XEXP (x, 0)) == PLUS
21963               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21964             {
21965               output_addr_const (stream, x);
21966               break;
21967             }
21968           /* Fall through.  */
21969
21970         default:
21971           output_operand_lossage ("Unsupported operand for code '%c'", code);
21972         }
21973       return;
21974
21975     /* An integer that we want to print in HEX.  */
21976     case 'x':
21977       switch (GET_CODE (x))
21978         {
21979         case CONST_INT:
21980           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21981           break;
21982
21983         default:
21984           output_operand_lossage ("Unsupported operand for code '%c'", code);
21985         }
21986       return;
21987
21988     case 'B':
21989       if (CONST_INT_P (x))
21990         {
21991           HOST_WIDE_INT val;
21992           val = ARM_SIGN_EXTEND (~INTVAL (x));
21993           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21994         }
21995       else
21996         {
21997           putc ('~', stream);
21998           output_addr_const (stream, x);
21999         }
22000       return;
22001
22002     case 'b':
22003       /* Print the log2 of a CONST_INT.  */
22004       {
22005         HOST_WIDE_INT val;
22006
22007         if (!CONST_INT_P (x)
22008             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22009           output_operand_lossage ("Unsupported operand for code '%c'", code);
22010         else
22011           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22012       }
22013       return;
22014
22015     case 'L':
22016       /* The low 16 bits of an immediate constant.  */
22017       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22018       return;
22019
22020     case 'i':
22021       fprintf (stream, "%s", arithmetic_instr (x, 1));
22022       return;
22023
22024     case 'I':
22025       fprintf (stream, "%s", arithmetic_instr (x, 0));
22026       return;
22027
22028     case 'S':
22029       {
22030         HOST_WIDE_INT val;
22031         const char *shift;
22032
22033         shift = shift_op (x, &val);
22034
22035         if (shift)
22036           {
22037             fprintf (stream, ", %s ", shift);
22038             if (val == -1)
22039               arm_print_operand (stream, XEXP (x, 1), 0);
22040             else
22041               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22042           }
22043       }
22044       return;
22045
22046       /* An explanation of the 'Q', 'R' and 'H' register operands:
22047
22048          In a pair of registers containing a DI or DF value the 'Q'
22049          operand returns the register number of the register containing
22050          the least significant part of the value.  The 'R' operand returns
22051          the register number of the register containing the most
22052          significant part of the value.
22053
22054          The 'H' operand returns the higher of the two register numbers.
22055          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22056          same as the 'Q' operand, since the most significant part of the
22057          value is held in the lower number register.  The reverse is true
22058          on systems where WORDS_BIG_ENDIAN is false.
22059
22060          The purpose of these operands is to distinguish between cases
22061          where the endian-ness of the values is important (for example
22062          when they are added together), and cases where the endian-ness
22063          is irrelevant, but the order of register operations is important.
22064          For example when loading a value from memory into a register
22065          pair, the endian-ness does not matter.  Provided that the value
22066          from the lower memory address is put into the lower numbered
22067          register, and the value from the higher address is put into the
22068          higher numbered register, the load will work regardless of whether
22069          the value being loaded is big-wordian or little-wordian.  The
22070          order of the two register loads can matter however, if the address
22071          of the memory location is actually held in one of the registers
22072          being overwritten by the load.
22073
22074          The 'Q' and 'R' constraints are also available for 64-bit
22075          constants.  */
22076     case 'Q':
22077       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22078         {
22079           rtx part = gen_lowpart (SImode, x);
22080           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22081           return;
22082         }
22083
22084       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22085         {
22086           output_operand_lossage ("invalid operand for code '%c'", code);
22087           return;
22088         }
22089
22090       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22091       return;
22092
22093     case 'R':
22094       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22095         {
22096           machine_mode mode = GET_MODE (x);
22097           rtx part;
22098
22099           if (mode == VOIDmode)
22100             mode = DImode;
22101           part = gen_highpart_mode (SImode, mode, x);
22102           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22103           return;
22104         }
22105
22106       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22107         {
22108           output_operand_lossage ("invalid operand for code '%c'", code);
22109           return;
22110         }
22111
22112       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22113       return;
22114
22115     case 'H':
22116       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22117         {
22118           output_operand_lossage ("invalid operand for code '%c'", code);
22119           return;
22120         }
22121
22122       asm_fprintf (stream, "%r", REGNO (x) + 1);
22123       return;
22124
22125     case 'J':
22126       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22127         {
22128           output_operand_lossage ("invalid operand for code '%c'", code);
22129           return;
22130         }
22131
22132       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22133       return;
22134
22135     case 'K':
22136       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22137         {
22138           output_operand_lossage ("invalid operand for code '%c'", code);
22139           return;
22140         }
22141
22142       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22143       return;
22144
22145     case 'm':
22146       asm_fprintf (stream, "%r",
22147                    REG_P (XEXP (x, 0))
22148                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22149       return;
22150
22151     case 'M':
22152       asm_fprintf (stream, "{%r-%r}",
22153                    REGNO (x),
22154                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22155       return;
22156
22157     /* Like 'M', but writing doubleword vector registers, for use by Neon
22158        insns.  */
22159     case 'h':
22160       {
22161         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22162         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22163         if (numregs == 1)
22164           asm_fprintf (stream, "{d%d}", regno);
22165         else
22166           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22167       }
22168       return;
22169
22170     case 'd':
22171       /* CONST_TRUE_RTX means always -- that's the default.  */
22172       if (x == const_true_rtx)
22173         return;
22174
22175       if (!COMPARISON_P (x))
22176         {
22177           output_operand_lossage ("invalid operand for code '%c'", code);
22178           return;
22179         }
22180
22181       fputs (arm_condition_codes[get_arm_condition_code (x)],
22182              stream);
22183       return;
22184
22185     case 'D':
22186       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22187          want to do that.  */
22188       if (x == const_true_rtx)
22189         {
22190           output_operand_lossage ("instruction never executed");
22191           return;
22192         }
22193       if (!COMPARISON_P (x))
22194         {
22195           output_operand_lossage ("invalid operand for code '%c'", code);
22196           return;
22197         }
22198
22199       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22200                                  (get_arm_condition_code (x))],
22201              stream);
22202       return;
22203
22204     case 's':
22205     case 'V':
22206     case 'W':
22207     case 'X':
22208     case 'Y':
22209     case 'Z':
22210       /* Former Maverick support, removed after GCC-4.7.  */
22211       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22212       return;
22213
22214     case 'U':
22215       if (!REG_P (x)
22216           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22217           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22218         /* Bad value for wCG register number.  */
22219         {
22220           output_operand_lossage ("invalid operand for code '%c'", code);
22221           return;
22222         }
22223
22224       else
22225         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22226       return;
22227
22228       /* Print an iWMMXt control register name.  */
22229     case 'w':
22230       if (!CONST_INT_P (x)
22231           || INTVAL (x) < 0
22232           || INTVAL (x) >= 16)
22233         /* Bad value for wC register number.  */
22234         {
22235           output_operand_lossage ("invalid operand for code '%c'", code);
22236           return;
22237         }
22238
22239       else
22240         {
22241           static const char * wc_reg_names [16] =
22242             {
22243               "wCID",  "wCon",  "wCSSF", "wCASF",
22244               "wC4",   "wC5",   "wC6",   "wC7",
22245               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22246               "wC12",  "wC13",  "wC14",  "wC15"
22247             };
22248
22249           fputs (wc_reg_names [INTVAL (x)], stream);
22250         }
22251       return;
22252
22253     /* Print the high single-precision register of a VFP double-precision
22254        register.  */
22255     case 'p':
22256       {
22257         machine_mode mode = GET_MODE (x);
22258         int regno;
22259
22260         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22261           {
22262             output_operand_lossage ("invalid operand for code '%c'", code);
22263             return;
22264           }
22265
22266         regno = REGNO (x);
22267         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22268           {
22269             output_operand_lossage ("invalid operand for code '%c'", code);
22270             return;
22271           }
22272
22273         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22274       }
22275       return;
22276
22277     /* Print a VFP/Neon double precision or quad precision register name.  */
22278     case 'P':
22279     case 'q':
22280       {
22281         machine_mode mode = GET_MODE (x);
22282         int is_quad = (code == 'q');
22283         int regno;
22284
22285         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22286           {
22287             output_operand_lossage ("invalid operand for code '%c'", code);
22288             return;
22289           }
22290
22291         if (!REG_P (x)
22292             || !IS_VFP_REGNUM (REGNO (x)))
22293           {
22294             output_operand_lossage ("invalid operand for code '%c'", code);
22295             return;
22296           }
22297
22298         regno = REGNO (x);
22299         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22300             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22301           {
22302             output_operand_lossage ("invalid operand for code '%c'", code);
22303             return;
22304           }
22305
22306         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22307           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22308       }
22309       return;
22310
22311     /* These two codes print the low/high doubleword register of a Neon quad
22312        register, respectively.  For pair-structure types, can also print
22313        low/high quadword registers.  */
22314     case 'e':
22315     case 'f':
22316       {
22317         machine_mode mode = GET_MODE (x);
22318         int regno;
22319
22320         if ((GET_MODE_SIZE (mode) != 16
22321              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22322           {
22323             output_operand_lossage ("invalid operand for code '%c'", code);
22324             return;
22325           }
22326
22327         regno = REGNO (x);
22328         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22329           {
22330             output_operand_lossage ("invalid operand for code '%c'", code);
22331             return;
22332           }
22333
22334         if (GET_MODE_SIZE (mode) == 16)
22335           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22336                                   + (code == 'f' ? 1 : 0));
22337         else
22338           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22339                                   + (code == 'f' ? 1 : 0));
22340       }
22341       return;
22342
22343     /* Print a VFPv3 floating-point constant, represented as an integer
22344        index.  */
22345     case 'G':
22346       {
22347         int index = vfp3_const_double_index (x);
22348         gcc_assert (index != -1);
22349         fprintf (stream, "%d", index);
22350       }
22351       return;
22352
22353     /* Print bits representing opcode features for Neon.
22354
22355        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22356        and polynomials as unsigned.
22357
22358        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22359
22360        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22361
22362     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22363     case 'T':
22364       {
22365         HOST_WIDE_INT bits = INTVAL (x);
22366         fputc ("uspf"[bits & 3], stream);
22367       }
22368       return;
22369
22370     /* Likewise, but signed and unsigned integers are both 'i'.  */
22371     case 'F':
22372       {
22373         HOST_WIDE_INT bits = INTVAL (x);
22374         fputc ("iipf"[bits & 3], stream);
22375       }
22376       return;
22377
22378     /* As for 'T', but emit 'u' instead of 'p'.  */
22379     case 't':
22380       {
22381         HOST_WIDE_INT bits = INTVAL (x);
22382         fputc ("usuf"[bits & 3], stream);
22383       }
22384       return;
22385
22386     /* Bit 2: rounding (vs none).  */
22387     case 'O':
22388       {
22389         HOST_WIDE_INT bits = INTVAL (x);
22390         fputs ((bits & 4) != 0 ? "r" : "", stream);
22391       }
22392       return;
22393
22394     /* Memory operand for vld1/vst1 instruction.  */
22395     case 'A':
22396       {
22397         rtx addr;
22398         bool postinc = FALSE;
22399         rtx postinc_reg = NULL;
22400         unsigned align, memsize, align_bits;
22401
22402         gcc_assert (MEM_P (x));
22403         addr = XEXP (x, 0);
22404         if (GET_CODE (addr) == POST_INC)
22405           {
22406             postinc = 1;
22407             addr = XEXP (addr, 0);
22408           }
22409         if (GET_CODE (addr) == POST_MODIFY)
22410           {
22411             postinc_reg = XEXP( XEXP (addr, 1), 1);
22412             addr = XEXP (addr, 0);
22413           }
22414         asm_fprintf (stream, "[%r", REGNO (addr));
22415
22416         /* We know the alignment of this access, so we can emit a hint in the
22417            instruction (for some alignments) as an aid to the memory subsystem
22418            of the target.  */
22419         align = MEM_ALIGN (x) >> 3;
22420         memsize = MEM_SIZE (x);
22421
22422         /* Only certain alignment specifiers are supported by the hardware.  */
22423         if (memsize == 32 && (align % 32) == 0)
22424           align_bits = 256;
22425         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22426           align_bits = 128;
22427         else if (memsize >= 8 && (align % 8) == 0)
22428           align_bits = 64;
22429         else
22430           align_bits = 0;
22431
22432         if (align_bits != 0)
22433           asm_fprintf (stream, ":%d", align_bits);
22434
22435         asm_fprintf (stream, "]");
22436
22437         if (postinc)
22438           fputs("!", stream);
22439         if (postinc_reg)
22440           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22441       }
22442       return;
22443
22444     case 'C':
22445       {
22446         rtx addr;
22447
22448         gcc_assert (MEM_P (x));
22449         addr = XEXP (x, 0);
22450         gcc_assert (REG_P (addr));
22451         asm_fprintf (stream, "[%r]", REGNO (addr));
22452       }
22453       return;
22454
22455     /* Translate an S register number into a D register number and element index.  */
22456     case 'y':
22457       {
22458         machine_mode mode = GET_MODE (x);
22459         int regno;
22460
22461         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22462           {
22463             output_operand_lossage ("invalid operand for code '%c'", code);
22464             return;
22465           }
22466
22467         regno = REGNO (x);
22468         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22469           {
22470             output_operand_lossage ("invalid operand for code '%c'", code);
22471             return;
22472           }
22473
22474         regno = regno - FIRST_VFP_REGNUM;
22475         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22476       }
22477       return;
22478
22479     case 'v':
22480         gcc_assert (CONST_DOUBLE_P (x));
22481         int result;
22482         result = vfp3_const_double_for_fract_bits (x);
22483         if (result == 0)
22484           result = vfp3_const_double_for_bits (x);
22485         fprintf (stream, "#%d", result);
22486         return;
22487
22488     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22489        number into a D register number and element index.  */
22490     case 'z':
22491       {
22492         machine_mode mode = GET_MODE (x);
22493         int regno;
22494
22495         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22496           {
22497             output_operand_lossage ("invalid operand for code '%c'", code);
22498             return;
22499           }
22500
22501         regno = REGNO (x);
22502         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22503           {
22504             output_operand_lossage ("invalid operand for code '%c'", code);
22505             return;
22506           }
22507
22508         regno = regno - FIRST_VFP_REGNUM;
22509         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22510       }
22511       return;
22512
22513     default:
22514       if (x == 0)
22515         {
22516           output_operand_lossage ("missing operand");
22517           return;
22518         }
22519
22520       switch (GET_CODE (x))
22521         {
22522         case REG:
22523           asm_fprintf (stream, "%r", REGNO (x));
22524           break;
22525
22526         case MEM:
22527           output_address (GET_MODE (x), XEXP (x, 0));
22528           break;
22529
22530         case CONST_DOUBLE:
22531           {
22532             char fpstr[20];
22533             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22534                               sizeof (fpstr), 0, 1);
22535             fprintf (stream, "#%s", fpstr);
22536           }
22537           break;
22538
22539         default:
22540           gcc_assert (GET_CODE (x) != NEG);
22541           fputc ('#', stream);
22542           if (GET_CODE (x) == HIGH)
22543             {
22544               fputs (":lower16:", stream);
22545               x = XEXP (x, 0);
22546             }
22547
22548           output_addr_const (stream, x);
22549           break;
22550         }
22551     }
22552 }
22553 \f
22554 /* Target hook for printing a memory address.  */
22555 static void
22556 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22557 {
22558   if (TARGET_32BIT)
22559     {
22560       int is_minus = GET_CODE (x) == MINUS;
22561
22562       if (REG_P (x))
22563         asm_fprintf (stream, "[%r]", REGNO (x));
22564       else if (GET_CODE (x) == PLUS || is_minus)
22565         {
22566           rtx base = XEXP (x, 0);
22567           rtx index = XEXP (x, 1);
22568           HOST_WIDE_INT offset = 0;
22569           if (!REG_P (base)
22570               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22571             {
22572               /* Ensure that BASE is a register.  */
22573               /* (one of them must be).  */
22574               /* Also ensure the SP is not used as in index register.  */
22575               std::swap (base, index);
22576             }
22577           switch (GET_CODE (index))
22578             {
22579             case CONST_INT:
22580               offset = INTVAL (index);
22581               if (is_minus)
22582                 offset = -offset;
22583               asm_fprintf (stream, "[%r, #%wd]",
22584                            REGNO (base), offset);
22585               break;
22586
22587             case REG:
22588               asm_fprintf (stream, "[%r, %s%r]",
22589                            REGNO (base), is_minus ? "-" : "",
22590                            REGNO (index));
22591               break;
22592
22593             case MULT:
22594             case ASHIFTRT:
22595             case LSHIFTRT:
22596             case ASHIFT:
22597             case ROTATERT:
22598               {
22599                 asm_fprintf (stream, "[%r, %s%r",
22600                              REGNO (base), is_minus ? "-" : "",
22601                              REGNO (XEXP (index, 0)));
22602                 arm_print_operand (stream, index, 'S');
22603                 fputs ("]", stream);
22604                 break;
22605               }
22606
22607             default:
22608               gcc_unreachable ();
22609             }
22610         }
22611       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22612                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22613         {
22614           gcc_assert (REG_P (XEXP (x, 0)));
22615
22616           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22617             asm_fprintf (stream, "[%r, #%s%d]!",
22618                          REGNO (XEXP (x, 0)),
22619                          GET_CODE (x) == PRE_DEC ? "-" : "",
22620                          GET_MODE_SIZE (mode));
22621           else
22622             asm_fprintf (stream, "[%r], #%s%d",
22623                          REGNO (XEXP (x, 0)),
22624                          GET_CODE (x) == POST_DEC ? "-" : "",
22625                          GET_MODE_SIZE (mode));
22626         }
22627       else if (GET_CODE (x) == PRE_MODIFY)
22628         {
22629           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22630           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22631             asm_fprintf (stream, "#%wd]!",
22632                          INTVAL (XEXP (XEXP (x, 1), 1)));
22633           else
22634             asm_fprintf (stream, "%r]!",
22635                          REGNO (XEXP (XEXP (x, 1), 1)));
22636         }
22637       else if (GET_CODE (x) == POST_MODIFY)
22638         {
22639           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22640           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22641             asm_fprintf (stream, "#%wd",
22642                          INTVAL (XEXP (XEXP (x, 1), 1)));
22643           else
22644             asm_fprintf (stream, "%r",
22645                          REGNO (XEXP (XEXP (x, 1), 1)));
22646         }
22647       else output_addr_const (stream, x);
22648     }
22649   else
22650     {
22651       if (REG_P (x))
22652         asm_fprintf (stream, "[%r]", REGNO (x));
22653       else if (GET_CODE (x) == POST_INC)
22654         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22655       else if (GET_CODE (x) == PLUS)
22656         {
22657           gcc_assert (REG_P (XEXP (x, 0)));
22658           if (CONST_INT_P (XEXP (x, 1)))
22659             asm_fprintf (stream, "[%r, #%wd]",
22660                          REGNO (XEXP (x, 0)),
22661                          INTVAL (XEXP (x, 1)));
22662           else
22663             asm_fprintf (stream, "[%r, %r]",
22664                          REGNO (XEXP (x, 0)),
22665                          REGNO (XEXP (x, 1)));
22666         }
22667       else
22668         output_addr_const (stream, x);
22669     }
22670 }
22671 \f
22672 /* Target hook for indicating whether a punctuation character for
22673    TARGET_PRINT_OPERAND is valid.  */
22674 static bool
22675 arm_print_operand_punct_valid_p (unsigned char code)
22676 {
22677   return (code == '@' || code == '|' || code == '.'
22678           || code == '(' || code == ')' || code == '#'
22679           || (TARGET_32BIT && (code == '?'))
22680           || (TARGET_THUMB2 && (code == '!'))
22681           || (TARGET_THUMB && (code == '_')));
22682 }
22683 \f
22684 /* Target hook for assembling integer objects.  The ARM version needs to
22685    handle word-sized values specially.  */
22686 static bool
22687 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22688 {
22689   machine_mode mode;
22690
22691   if (size == UNITS_PER_WORD && aligned_p)
22692     {
22693       fputs ("\t.word\t", asm_out_file);
22694       output_addr_const (asm_out_file, x);
22695
22696       /* Mark symbols as position independent.  We only do this in the
22697          .text segment, not in the .data segment.  */
22698       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22699           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22700         {
22701           /* See legitimize_pic_address for an explanation of the
22702              TARGET_VXWORKS_RTP check.  */
22703           /* References to weak symbols cannot be resolved locally:
22704              they may be overridden by a non-weak definition at link
22705              time.  */
22706           if (!arm_pic_data_is_text_relative
22707               || (GET_CODE (x) == SYMBOL_REF
22708                   && (!SYMBOL_REF_LOCAL_P (x)
22709                       || (SYMBOL_REF_DECL (x)
22710                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22711             fputs ("(GOT)", asm_out_file);
22712           else
22713             fputs ("(GOTOFF)", asm_out_file);
22714         }
22715       fputc ('\n', asm_out_file);
22716       return true;
22717     }
22718
22719   mode = GET_MODE (x);
22720
22721   if (arm_vector_mode_supported_p (mode))
22722     {
22723       int i, units;
22724
22725       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22726
22727       units = CONST_VECTOR_NUNITS (x);
22728       size = GET_MODE_UNIT_SIZE (mode);
22729
22730       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22731         for (i = 0; i < units; i++)
22732           {
22733             rtx elt = CONST_VECTOR_ELT (x, i);
22734             assemble_integer
22735               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22736           }
22737       else
22738         for (i = 0; i < units; i++)
22739           {
22740             rtx elt = CONST_VECTOR_ELT (x, i);
22741             assemble_real
22742               (*CONST_DOUBLE_REAL_VALUE (elt),
22743                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22744                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22745           }
22746
22747       return true;
22748     }
22749
22750   return default_assemble_integer (x, size, aligned_p);
22751 }
22752
22753 static void
22754 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22755 {
22756   section *s;
22757
22758   if (!TARGET_AAPCS_BASED)
22759     {
22760       (is_ctor ?
22761        default_named_section_asm_out_constructor
22762        : default_named_section_asm_out_destructor) (symbol, priority);
22763       return;
22764     }
22765
22766   /* Put these in the .init_array section, using a special relocation.  */
22767   if (priority != DEFAULT_INIT_PRIORITY)
22768     {
22769       char buf[18];
22770       sprintf (buf, "%s.%.5u",
22771                is_ctor ? ".init_array" : ".fini_array",
22772                priority);
22773       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22774     }
22775   else if (is_ctor)
22776     s = ctors_section;
22777   else
22778     s = dtors_section;
22779
22780   switch_to_section (s);
22781   assemble_align (POINTER_SIZE);
22782   fputs ("\t.word\t", asm_out_file);
22783   output_addr_const (asm_out_file, symbol);
22784   fputs ("(target1)\n", asm_out_file);
22785 }
22786
22787 /* Add a function to the list of static constructors.  */
22788
22789 static void
22790 arm_elf_asm_constructor (rtx symbol, int priority)
22791 {
22792   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22793 }
22794
22795 /* Add a function to the list of static destructors.  */
22796
22797 static void
22798 arm_elf_asm_destructor (rtx symbol, int priority)
22799 {
22800   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22801 }
22802 \f
22803 /* A finite state machine takes care of noticing whether or not instructions
22804    can be conditionally executed, and thus decrease execution time and code
22805    size by deleting branch instructions.  The fsm is controlled by
22806    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22807
22808 /* The state of the fsm controlling condition codes are:
22809    0: normal, do nothing special
22810    1: make ASM_OUTPUT_OPCODE not output this instruction
22811    2: make ASM_OUTPUT_OPCODE not output this instruction
22812    3: make instructions conditional
22813    4: make instructions conditional
22814
22815    State transitions (state->state by whom under condition):
22816    0 -> 1 final_prescan_insn if the `target' is a label
22817    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22818    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22819    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22820    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22821           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22822    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22823           (the target insn is arm_target_insn).
22824
22825    If the jump clobbers the conditions then we use states 2 and 4.
22826
22827    A similar thing can be done with conditional return insns.
22828
22829    XXX In case the `target' is an unconditional branch, this conditionalising
22830    of the instructions always reduces code size, but not always execution
22831    time.  But then, I want to reduce the code size to somewhere near what
22832    /bin/cc produces.  */
22833
22834 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22835    instructions.  When a COND_EXEC instruction is seen the subsequent
22836    instructions are scanned so that multiple conditional instructions can be
22837    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22838    specify the length and true/false mask for the IT block.  These will be
22839    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22840
22841 /* Returns the index of the ARM condition code string in
22842    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22843    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22844
22845 enum arm_cond_code
22846 maybe_get_arm_condition_code (rtx comparison)
22847 {
22848   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22849   enum arm_cond_code code;
22850   enum rtx_code comp_code = GET_CODE (comparison);
22851
22852   if (GET_MODE_CLASS (mode) != MODE_CC)
22853     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22854                            XEXP (comparison, 1));
22855
22856   switch (mode)
22857     {
22858     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22859     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22860     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22861     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22862     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22863     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22864     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22865     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22866     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22867     case E_CC_DLTUmode: code = ARM_CC;
22868
22869     dominance:
22870       if (comp_code == EQ)
22871         return ARM_INVERSE_CONDITION_CODE (code);
22872       if (comp_code == NE)
22873         return code;
22874       return ARM_NV;
22875
22876     case E_CC_NOOVmode:
22877       switch (comp_code)
22878         {
22879         case NE: return ARM_NE;
22880         case EQ: return ARM_EQ;
22881         case GE: return ARM_PL;
22882         case LT: return ARM_MI;
22883         default: return ARM_NV;
22884         }
22885
22886     case E_CC_Zmode:
22887       switch (comp_code)
22888         {
22889         case NE: return ARM_NE;
22890         case EQ: return ARM_EQ;
22891         default: return ARM_NV;
22892         }
22893
22894     case E_CC_Nmode:
22895       switch (comp_code)
22896         {
22897         case NE: return ARM_MI;
22898         case EQ: return ARM_PL;
22899         default: return ARM_NV;
22900         }
22901
22902     case E_CCFPEmode:
22903     case E_CCFPmode:
22904       /* We can handle all cases except UNEQ and LTGT.  */
22905       switch (comp_code)
22906         {
22907         case GE: return ARM_GE;
22908         case GT: return ARM_GT;
22909         case LE: return ARM_LS;
22910         case LT: return ARM_MI;
22911         case NE: return ARM_NE;
22912         case EQ: return ARM_EQ;
22913         case ORDERED: return ARM_VC;
22914         case UNORDERED: return ARM_VS;
22915         case UNLT: return ARM_LT;
22916         case UNLE: return ARM_LE;
22917         case UNGT: return ARM_HI;
22918         case UNGE: return ARM_PL;
22919           /* UNEQ and LTGT do not have a representation.  */
22920         case UNEQ: /* Fall through.  */
22921         case LTGT: /* Fall through.  */
22922         default: return ARM_NV;
22923         }
22924
22925     case E_CC_SWPmode:
22926       switch (comp_code)
22927         {
22928         case NE: return ARM_NE;
22929         case EQ: return ARM_EQ;
22930         case GE: return ARM_LE;
22931         case GT: return ARM_LT;
22932         case LE: return ARM_GE;
22933         case LT: return ARM_GT;
22934         case GEU: return ARM_LS;
22935         case GTU: return ARM_CC;
22936         case LEU: return ARM_CS;
22937         case LTU: return ARM_HI;
22938         default: return ARM_NV;
22939         }
22940
22941     case E_CC_Cmode:
22942       switch (comp_code)
22943         {
22944         case LTU: return ARM_CS;
22945         case GEU: return ARM_CC;
22946         case NE: return ARM_CS;
22947         case EQ: return ARM_CC;
22948         default: return ARM_NV;
22949         }
22950
22951     case E_CC_CZmode:
22952       switch (comp_code)
22953         {
22954         case NE: return ARM_NE;
22955         case EQ: return ARM_EQ;
22956         case GEU: return ARM_CS;
22957         case GTU: return ARM_HI;
22958         case LEU: return ARM_LS;
22959         case LTU: return ARM_CC;
22960         default: return ARM_NV;
22961         }
22962
22963     case E_CC_NCVmode:
22964       switch (comp_code)
22965         {
22966         case GE: return ARM_GE;
22967         case LT: return ARM_LT;
22968         case GEU: return ARM_CS;
22969         case LTU: return ARM_CC;
22970         default: return ARM_NV;
22971         }
22972
22973     case E_CC_Vmode:
22974       switch (comp_code)
22975         {
22976         case NE: return ARM_VS;
22977         case EQ: return ARM_VC;
22978         default: return ARM_NV;
22979         }
22980
22981     case E_CCmode:
22982       switch (comp_code)
22983         {
22984         case NE: return ARM_NE;
22985         case EQ: return ARM_EQ;
22986         case GE: return ARM_GE;
22987         case GT: return ARM_GT;
22988         case LE: return ARM_LE;
22989         case LT: return ARM_LT;
22990         case GEU: return ARM_CS;
22991         case GTU: return ARM_HI;
22992         case LEU: return ARM_LS;
22993         case LTU: return ARM_CC;
22994         default: return ARM_NV;
22995         }
22996
22997     default: gcc_unreachable ();
22998     }
22999 }
23000
23001 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23002 static enum arm_cond_code
23003 get_arm_condition_code (rtx comparison)
23004 {
23005   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23006   gcc_assert (code != ARM_NV);
23007   return code;
23008 }
23009
23010 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23011    code registers when not targetting Thumb1.  The VFP condition register
23012    only exists when generating hard-float code.  */
23013 static bool
23014 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23015 {
23016   if (!TARGET_32BIT)
23017     return false;
23018
23019   *p1 = CC_REGNUM;
23020   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23021   return true;
23022 }
23023
23024 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23025    instructions.  */
23026 void
23027 thumb2_final_prescan_insn (rtx_insn *insn)
23028 {
23029   rtx_insn *first_insn = insn;
23030   rtx body = PATTERN (insn);
23031   rtx predicate;
23032   enum arm_cond_code code;
23033   int n;
23034   int mask;
23035   int max;
23036
23037   /* max_insns_skipped in the tune was already taken into account in the
23038      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23039      just emit the IT blocks as we can.  It does not make sense to split
23040      the IT blocks.  */
23041   max = MAX_INSN_PER_IT_BLOCK;
23042
23043   /* Remove the previous insn from the count of insns to be output.  */
23044   if (arm_condexec_count)
23045       arm_condexec_count--;
23046
23047   /* Nothing to do if we are already inside a conditional block.  */
23048   if (arm_condexec_count)
23049     return;
23050
23051   if (GET_CODE (body) != COND_EXEC)
23052     return;
23053
23054   /* Conditional jumps are implemented directly.  */
23055   if (JUMP_P (insn))
23056     return;
23057
23058   predicate = COND_EXEC_TEST (body);
23059   arm_current_cc = get_arm_condition_code (predicate);
23060
23061   n = get_attr_ce_count (insn);
23062   arm_condexec_count = 1;
23063   arm_condexec_mask = (1 << n) - 1;
23064   arm_condexec_masklen = n;
23065   /* See if subsequent instructions can be combined into the same block.  */
23066   for (;;)
23067     {
23068       insn = next_nonnote_insn (insn);
23069
23070       /* Jumping into the middle of an IT block is illegal, so a label or
23071          barrier terminates the block.  */
23072       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23073         break;
23074
23075       body = PATTERN (insn);
23076       /* USE and CLOBBER aren't really insns, so just skip them.  */
23077       if (GET_CODE (body) == USE
23078           || GET_CODE (body) == CLOBBER)
23079         continue;
23080
23081       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23082       if (GET_CODE (body) != COND_EXEC)
23083         break;
23084       /* Maximum number of conditionally executed instructions in a block.  */
23085       n = get_attr_ce_count (insn);
23086       if (arm_condexec_masklen + n > max)
23087         break;
23088
23089       predicate = COND_EXEC_TEST (body);
23090       code = get_arm_condition_code (predicate);
23091       mask = (1 << n) - 1;
23092       if (arm_current_cc == code)
23093         arm_condexec_mask |= (mask << arm_condexec_masklen);
23094       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23095         break;
23096
23097       arm_condexec_count++;
23098       arm_condexec_masklen += n;
23099
23100       /* A jump must be the last instruction in a conditional block.  */
23101       if (JUMP_P (insn))
23102         break;
23103     }
23104   /* Restore recog_data (getting the attributes of other insns can
23105      destroy this array, but final.c assumes that it remains intact
23106      across this call).  */
23107   extract_constrain_insn_cached (first_insn);
23108 }
23109
23110 void
23111 arm_final_prescan_insn (rtx_insn *insn)
23112 {
23113   /* BODY will hold the body of INSN.  */
23114   rtx body = PATTERN (insn);
23115
23116   /* This will be 1 if trying to repeat the trick, and things need to be
23117      reversed if it appears to fail.  */
23118   int reverse = 0;
23119
23120   /* If we start with a return insn, we only succeed if we find another one.  */
23121   int seeking_return = 0;
23122   enum rtx_code return_code = UNKNOWN;
23123
23124   /* START_INSN will hold the insn from where we start looking.  This is the
23125      first insn after the following code_label if REVERSE is true.  */
23126   rtx_insn *start_insn = insn;
23127
23128   /* If in state 4, check if the target branch is reached, in order to
23129      change back to state 0.  */
23130   if (arm_ccfsm_state == 4)
23131     {
23132       if (insn == arm_target_insn)
23133         {
23134           arm_target_insn = NULL;
23135           arm_ccfsm_state = 0;
23136         }
23137       return;
23138     }
23139
23140   /* If in state 3, it is possible to repeat the trick, if this insn is an
23141      unconditional branch to a label, and immediately following this branch
23142      is the previous target label which is only used once, and the label this
23143      branch jumps to is not too far off.  */
23144   if (arm_ccfsm_state == 3)
23145     {
23146       if (simplejump_p (insn))
23147         {
23148           start_insn = next_nonnote_insn (start_insn);
23149           if (BARRIER_P (start_insn))
23150             {
23151               /* XXX Isn't this always a barrier?  */
23152               start_insn = next_nonnote_insn (start_insn);
23153             }
23154           if (LABEL_P (start_insn)
23155               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23156               && LABEL_NUSES (start_insn) == 1)
23157             reverse = TRUE;
23158           else
23159             return;
23160         }
23161       else if (ANY_RETURN_P (body))
23162         {
23163           start_insn = next_nonnote_insn (start_insn);
23164           if (BARRIER_P (start_insn))
23165             start_insn = next_nonnote_insn (start_insn);
23166           if (LABEL_P (start_insn)
23167               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23168               && LABEL_NUSES (start_insn) == 1)
23169             {
23170               reverse = TRUE;
23171               seeking_return = 1;
23172               return_code = GET_CODE (body);
23173             }
23174           else
23175             return;
23176         }
23177       else
23178         return;
23179     }
23180
23181   gcc_assert (!arm_ccfsm_state || reverse);
23182   if (!JUMP_P (insn))
23183     return;
23184
23185   /* This jump might be paralleled with a clobber of the condition codes
23186      the jump should always come first */
23187   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23188     body = XVECEXP (body, 0, 0);
23189
23190   if (reverse
23191       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23192           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23193     {
23194       int insns_skipped;
23195       int fail = FALSE, succeed = FALSE;
23196       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23197       int then_not_else = TRUE;
23198       rtx_insn *this_insn = start_insn;
23199       rtx label = 0;
23200
23201       /* Register the insn jumped to.  */
23202       if (reverse)
23203         {
23204           if (!seeking_return)
23205             label = XEXP (SET_SRC (body), 0);
23206         }
23207       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23208         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23209       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23210         {
23211           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23212           then_not_else = FALSE;
23213         }
23214       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23215         {
23216           seeking_return = 1;
23217           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23218         }
23219       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23220         {
23221           seeking_return = 1;
23222           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23223           then_not_else = FALSE;
23224         }
23225       else
23226         gcc_unreachable ();
23227
23228       /* See how many insns this branch skips, and what kind of insns.  If all
23229          insns are okay, and the label or unconditional branch to the same
23230          label is not too far away, succeed.  */
23231       for (insns_skipped = 0;
23232            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23233         {
23234           rtx scanbody;
23235
23236           this_insn = next_nonnote_insn (this_insn);
23237           if (!this_insn)
23238             break;
23239
23240           switch (GET_CODE (this_insn))
23241             {
23242             case CODE_LABEL:
23243               /* Succeed if it is the target label, otherwise fail since
23244                  control falls in from somewhere else.  */
23245               if (this_insn == label)
23246                 {
23247                   arm_ccfsm_state = 1;
23248                   succeed = TRUE;
23249                 }
23250               else
23251                 fail = TRUE;
23252               break;
23253
23254             case BARRIER:
23255               /* Succeed if the following insn is the target label.
23256                  Otherwise fail.
23257                  If return insns are used then the last insn in a function
23258                  will be a barrier.  */
23259               this_insn = next_nonnote_insn (this_insn);
23260               if (this_insn && this_insn == label)
23261                 {
23262                   arm_ccfsm_state = 1;
23263                   succeed = TRUE;
23264                 }
23265               else
23266                 fail = TRUE;
23267               break;
23268
23269             case CALL_INSN:
23270               /* The AAPCS says that conditional calls should not be
23271                  used since they make interworking inefficient (the
23272                  linker can't transform BL<cond> into BLX).  That's
23273                  only a problem if the machine has BLX.  */
23274               if (arm_arch5)
23275                 {
23276                   fail = TRUE;
23277                   break;
23278                 }
23279
23280               /* Succeed if the following insn is the target label, or
23281                  if the following two insns are a barrier and the
23282                  target label.  */
23283               this_insn = next_nonnote_insn (this_insn);
23284               if (this_insn && BARRIER_P (this_insn))
23285                 this_insn = next_nonnote_insn (this_insn);
23286
23287               if (this_insn && this_insn == label
23288                   && insns_skipped < max_insns_skipped)
23289                 {
23290                   arm_ccfsm_state = 1;
23291                   succeed = TRUE;
23292                 }
23293               else
23294                 fail = TRUE;
23295               break;
23296
23297             case JUMP_INSN:
23298               /* If this is an unconditional branch to the same label, succeed.
23299                  If it is to another label, do nothing.  If it is conditional,
23300                  fail.  */
23301               /* XXX Probably, the tests for SET and the PC are
23302                  unnecessary.  */
23303
23304               scanbody = PATTERN (this_insn);
23305               if (GET_CODE (scanbody) == SET
23306                   && GET_CODE (SET_DEST (scanbody)) == PC)
23307                 {
23308                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23309                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23310                     {
23311                       arm_ccfsm_state = 2;
23312                       succeed = TRUE;
23313                     }
23314                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23315                     fail = TRUE;
23316                 }
23317               /* Fail if a conditional return is undesirable (e.g. on a
23318                  StrongARM), but still allow this if optimizing for size.  */
23319               else if (GET_CODE (scanbody) == return_code
23320                        && !use_return_insn (TRUE, NULL)
23321                        && !optimize_size)
23322                 fail = TRUE;
23323               else if (GET_CODE (scanbody) == return_code)
23324                 {
23325                   arm_ccfsm_state = 2;
23326                   succeed = TRUE;
23327                 }
23328               else if (GET_CODE (scanbody) == PARALLEL)
23329                 {
23330                   switch (get_attr_conds (this_insn))
23331                     {
23332                     case CONDS_NOCOND:
23333                       break;
23334                     default:
23335                       fail = TRUE;
23336                       break;
23337                     }
23338                 }
23339               else
23340                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23341
23342               break;
23343
23344             case INSN:
23345               /* Instructions using or affecting the condition codes make it
23346                  fail.  */
23347               scanbody = PATTERN (this_insn);
23348               if (!(GET_CODE (scanbody) == SET
23349                     || GET_CODE (scanbody) == PARALLEL)
23350                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23351                 fail = TRUE;
23352               break;
23353
23354             default:
23355               break;
23356             }
23357         }
23358       if (succeed)
23359         {
23360           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23361             arm_target_label = CODE_LABEL_NUMBER (label);
23362           else
23363             {
23364               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23365
23366               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23367                 {
23368                   this_insn = next_nonnote_insn (this_insn);
23369                   gcc_assert (!this_insn
23370                               || (!BARRIER_P (this_insn)
23371                                   && !LABEL_P (this_insn)));
23372                 }
23373               if (!this_insn)
23374                 {
23375                   /* Oh, dear! we ran off the end.. give up.  */
23376                   extract_constrain_insn_cached (insn);
23377                   arm_ccfsm_state = 0;
23378                   arm_target_insn = NULL;
23379                   return;
23380                 }
23381               arm_target_insn = this_insn;
23382             }
23383
23384           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23385              what it was.  */
23386           if (!reverse)
23387             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23388
23389           if (reverse || then_not_else)
23390             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23391         }
23392
23393       /* Restore recog_data (getting the attributes of other insns can
23394          destroy this array, but final.c assumes that it remains intact
23395          across this call.  */
23396       extract_constrain_insn_cached (insn);
23397     }
23398 }
23399
23400 /* Output IT instructions.  */
23401 void
23402 thumb2_asm_output_opcode (FILE * stream)
23403 {
23404   char buff[5];
23405   int n;
23406
23407   if (arm_condexec_mask)
23408     {
23409       for (n = 0; n < arm_condexec_masklen; n++)
23410         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23411       buff[n] = 0;
23412       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23413                   arm_condition_codes[arm_current_cc]);
23414       arm_condexec_mask = 0;
23415     }
23416 }
23417
23418 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23419    UNITS_PER_WORD bytes wide.  */
23420 static unsigned int
23421 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23422 {
23423   if (TARGET_32BIT
23424       && regno > PC_REGNUM
23425       && regno != FRAME_POINTER_REGNUM
23426       && regno != ARG_POINTER_REGNUM
23427       && !IS_VFP_REGNUM (regno))
23428     return 1;
23429
23430   return ARM_NUM_REGS (mode);
23431 }
23432
23433 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23434 static bool
23435 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23436 {
23437   if (GET_MODE_CLASS (mode) == MODE_CC)
23438     return (regno == CC_REGNUM
23439             || (TARGET_HARD_FLOAT
23440                 && regno == VFPCC_REGNUM));
23441
23442   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23443     return false;
23444
23445   if (TARGET_THUMB1)
23446     /* For the Thumb we only allow values bigger than SImode in
23447        registers 0 - 6, so that there is always a second low
23448        register available to hold the upper part of the value.
23449        We probably we ought to ensure that the register is the
23450        start of an even numbered register pair.  */
23451     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23452
23453   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23454     {
23455       if (mode == SFmode || mode == SImode)
23456         return VFP_REGNO_OK_FOR_SINGLE (regno);
23457
23458       if (mode == DFmode)
23459         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23460
23461       if (mode == HFmode)
23462         return VFP_REGNO_OK_FOR_SINGLE (regno);
23463
23464       /* VFP registers can hold HImode values.  */
23465       if (mode == HImode)
23466         return VFP_REGNO_OK_FOR_SINGLE (regno);
23467
23468       if (TARGET_NEON)
23469         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23470                || (VALID_NEON_QREG_MODE (mode)
23471                    && NEON_REGNO_OK_FOR_QUAD (regno))
23472                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23473                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23474                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23475                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23476                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23477
23478       return false;
23479     }
23480
23481   if (TARGET_REALLY_IWMMXT)
23482     {
23483       if (IS_IWMMXT_GR_REGNUM (regno))
23484         return mode == SImode;
23485
23486       if (IS_IWMMXT_REGNUM (regno))
23487         return VALID_IWMMXT_REG_MODE (mode);
23488     }
23489
23490   /* We allow almost any value to be stored in the general registers.
23491      Restrict doubleword quantities to even register pairs in ARM state
23492      so that we can use ldrd.  Do not allow very large Neon structure
23493      opaque modes in general registers; they would use too many.  */
23494   if (regno <= LAST_ARM_REGNUM)
23495     {
23496       if (ARM_NUM_REGS (mode) > 4)
23497         return false;
23498
23499       if (TARGET_THUMB2)
23500         return true;
23501
23502       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23503     }
23504
23505   if (regno == FRAME_POINTER_REGNUM
23506       || regno == ARG_POINTER_REGNUM)
23507     /* We only allow integers in the fake hard registers.  */
23508     return GET_MODE_CLASS (mode) == MODE_INT;
23509
23510   return false;
23511 }
23512
23513 /* Implement TARGET_MODES_TIEABLE_P.  */
23514
23515 static bool
23516 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23517 {
23518   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23519     return true;
23520
23521   /* We specifically want to allow elements of "structure" modes to
23522      be tieable to the structure.  This more general condition allows
23523      other rarer situations too.  */
23524   if (TARGET_NEON
23525       && (VALID_NEON_DREG_MODE (mode1)
23526           || VALID_NEON_QREG_MODE (mode1)
23527           || VALID_NEON_STRUCT_MODE (mode1))
23528       && (VALID_NEON_DREG_MODE (mode2)
23529           || VALID_NEON_QREG_MODE (mode2)
23530           || VALID_NEON_STRUCT_MODE (mode2)))
23531     return true;
23532
23533   return false;
23534 }
23535
23536 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23537    not used in arm mode.  */
23538
23539 enum reg_class
23540 arm_regno_class (int regno)
23541 {
23542   if (regno == PC_REGNUM)
23543     return NO_REGS;
23544
23545   if (TARGET_THUMB1)
23546     {
23547       if (regno == STACK_POINTER_REGNUM)
23548         return STACK_REG;
23549       if (regno == CC_REGNUM)
23550         return CC_REG;
23551       if (regno < 8)
23552         return LO_REGS;
23553       return HI_REGS;
23554     }
23555
23556   if (TARGET_THUMB2 && regno < 8)
23557     return LO_REGS;
23558
23559   if (   regno <= LAST_ARM_REGNUM
23560       || regno == FRAME_POINTER_REGNUM
23561       || regno == ARG_POINTER_REGNUM)
23562     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23563
23564   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23565     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23566
23567   if (IS_VFP_REGNUM (regno))
23568     {
23569       if (regno <= D7_VFP_REGNUM)
23570         return VFP_D0_D7_REGS;
23571       else if (regno <= LAST_LO_VFP_REGNUM)
23572         return VFP_LO_REGS;
23573       else
23574         return VFP_HI_REGS;
23575     }
23576
23577   if (IS_IWMMXT_REGNUM (regno))
23578     return IWMMXT_REGS;
23579
23580   if (IS_IWMMXT_GR_REGNUM (regno))
23581     return IWMMXT_GR_REGS;
23582
23583   return NO_REGS;
23584 }
23585
23586 /* Handle a special case when computing the offset
23587    of an argument from the frame pointer.  */
23588 int
23589 arm_debugger_arg_offset (int value, rtx addr)
23590 {
23591   rtx_insn *insn;
23592
23593   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23594   if (value != 0)
23595     return 0;
23596
23597   /* We can only cope with the case where the address is held in a register.  */
23598   if (!REG_P (addr))
23599     return 0;
23600
23601   /* If we are using the frame pointer to point at the argument, then
23602      an offset of 0 is correct.  */
23603   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23604     return 0;
23605
23606   /* If we are using the stack pointer to point at the
23607      argument, then an offset of 0 is correct.  */
23608   /* ??? Check this is consistent with thumb2 frame layout.  */
23609   if ((TARGET_THUMB || !frame_pointer_needed)
23610       && REGNO (addr) == SP_REGNUM)
23611     return 0;
23612
23613   /* Oh dear.  The argument is pointed to by a register rather
23614      than being held in a register, or being stored at a known
23615      offset from the frame pointer.  Since GDB only understands
23616      those two kinds of argument we must translate the address
23617      held in the register into an offset from the frame pointer.
23618      We do this by searching through the insns for the function
23619      looking to see where this register gets its value.  If the
23620      register is initialized from the frame pointer plus an offset
23621      then we are in luck and we can continue, otherwise we give up.
23622
23623      This code is exercised by producing debugging information
23624      for a function with arguments like this:
23625
23626            double func (double a, double b, int c, double d) {return d;}
23627
23628      Without this code the stab for parameter 'd' will be set to
23629      an offset of 0 from the frame pointer, rather than 8.  */
23630
23631   /* The if() statement says:
23632
23633      If the insn is a normal instruction
23634      and if the insn is setting the value in a register
23635      and if the register being set is the register holding the address of the argument
23636      and if the address is computing by an addition
23637      that involves adding to a register
23638      which is the frame pointer
23639      a constant integer
23640
23641      then...  */
23642
23643   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23644     {
23645       if (   NONJUMP_INSN_P (insn)
23646           && GET_CODE (PATTERN (insn)) == SET
23647           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23648           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23649           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23650           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23651           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23652              )
23653         {
23654           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23655
23656           break;
23657         }
23658     }
23659
23660   if (value == 0)
23661     {
23662       debug_rtx (addr);
23663       warning (0, "unable to compute real location of stacked parameter");
23664       value = 8; /* XXX magic hack */
23665     }
23666
23667   return value;
23668 }
23669 \f
23670 /* Implement TARGET_PROMOTED_TYPE.  */
23671
23672 static tree
23673 arm_promoted_type (const_tree t)
23674 {
23675   if (SCALAR_FLOAT_TYPE_P (t)
23676       && TYPE_PRECISION (t) == 16
23677       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23678     return float_type_node;
23679   return NULL_TREE;
23680 }
23681
23682 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23683    This simply adds HFmode as a supported mode; even though we don't
23684    implement arithmetic on this type directly, it's supported by
23685    optabs conversions, much the way the double-word arithmetic is
23686    special-cased in the default hook.  */
23687
23688 static bool
23689 arm_scalar_mode_supported_p (scalar_mode mode)
23690 {
23691   if (mode == HFmode)
23692     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23693   else if (ALL_FIXED_POINT_MODE_P (mode))
23694     return true;
23695   else
23696     return default_scalar_mode_supported_p (mode);
23697 }
23698
23699 /* Set the value of FLT_EVAL_METHOD.
23700    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23701
23702     0: evaluate all operations and constants, whose semantic type has at
23703        most the range and precision of type float, to the range and
23704        precision of float; evaluate all other operations and constants to
23705        the range and precision of the semantic type;
23706
23707     N, where _FloatN is a supported interchange floating type
23708        evaluate all operations and constants, whose semantic type has at
23709        most the range and precision of _FloatN type, to the range and
23710        precision of the _FloatN type; evaluate all other operations and
23711        constants to the range and precision of the semantic type;
23712
23713    If we have the ARMv8.2-A extensions then we support _Float16 in native
23714    precision, so we should set this to 16.  Otherwise, we support the type,
23715    but want to evaluate expressions in float precision, so set this to
23716    0.  */
23717
23718 static enum flt_eval_method
23719 arm_excess_precision (enum excess_precision_type type)
23720 {
23721   switch (type)
23722     {
23723       case EXCESS_PRECISION_TYPE_FAST:
23724       case EXCESS_PRECISION_TYPE_STANDARD:
23725         /* We can calculate either in 16-bit range and precision or
23726            32-bit range and precision.  Make that decision based on whether
23727            we have native support for the ARMv8.2-A 16-bit floating-point
23728            instructions or not.  */
23729         return (TARGET_VFP_FP16INST
23730                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23731                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23732       case EXCESS_PRECISION_TYPE_IMPLICIT:
23733         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23734       default:
23735         gcc_unreachable ();
23736     }
23737   return FLT_EVAL_METHOD_UNPREDICTABLE;
23738 }
23739
23740
23741 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23742    _Float16 if we are using anything other than ieee format for 16-bit
23743    floating point.  Otherwise, punt to the default implementation.  */
23744 static opt_scalar_float_mode
23745 arm_floatn_mode (int n, bool extended)
23746 {
23747   if (!extended && n == 16)
23748     {
23749       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23750         return HFmode;
23751       return opt_scalar_float_mode ();
23752     }
23753
23754   return default_floatn_mode (n, extended);
23755 }
23756
23757
23758 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23759    not to early-clobber SRC registers in the process.
23760
23761    We assume that the operands described by SRC and DEST represent a
23762    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23763    number of components into which the copy has been decomposed.  */
23764 void
23765 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23766 {
23767   unsigned int i;
23768
23769   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23770       || REGNO (operands[0]) < REGNO (operands[1]))
23771     {
23772       for (i = 0; i < count; i++)
23773         {
23774           operands[2 * i] = dest[i];
23775           operands[2 * i + 1] = src[i];
23776         }
23777     }
23778   else
23779     {
23780       for (i = 0; i < count; i++)
23781         {
23782           operands[2 * i] = dest[count - i - 1];
23783           operands[2 * i + 1] = src[count - i - 1];
23784         }
23785     }
23786 }
23787
23788 /* Split operands into moves from op[1] + op[2] into op[0].  */
23789
23790 void
23791 neon_split_vcombine (rtx operands[3])
23792 {
23793   unsigned int dest = REGNO (operands[0]);
23794   unsigned int src1 = REGNO (operands[1]);
23795   unsigned int src2 = REGNO (operands[2]);
23796   machine_mode halfmode = GET_MODE (operands[1]);
23797   unsigned int halfregs = REG_NREGS (operands[1]);
23798   rtx destlo, desthi;
23799
23800   if (src1 == dest && src2 == dest + halfregs)
23801     {
23802       /* No-op move.  Can't split to nothing; emit something.  */
23803       emit_note (NOTE_INSN_DELETED);
23804       return;
23805     }
23806
23807   /* Preserve register attributes for variable tracking.  */
23808   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23809   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23810                                GET_MODE_SIZE (halfmode));
23811
23812   /* Special case of reversed high/low parts.  Use VSWP.  */
23813   if (src2 == dest && src1 == dest + halfregs)
23814     {
23815       rtx x = gen_rtx_SET (destlo, operands[1]);
23816       rtx y = gen_rtx_SET (desthi, operands[2]);
23817       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23818       return;
23819     }
23820
23821   if (!reg_overlap_mentioned_p (operands[2], destlo))
23822     {
23823       /* Try to avoid unnecessary moves if part of the result
23824          is in the right place already.  */
23825       if (src1 != dest)
23826         emit_move_insn (destlo, operands[1]);
23827       if (src2 != dest + halfregs)
23828         emit_move_insn (desthi, operands[2]);
23829     }
23830   else
23831     {
23832       if (src2 != dest + halfregs)
23833         emit_move_insn (desthi, operands[2]);
23834       if (src1 != dest)
23835         emit_move_insn (destlo, operands[1]);
23836     }
23837 }
23838 \f
23839 /* Return the number (counting from 0) of
23840    the least significant set bit in MASK.  */
23841
23842 inline static int
23843 number_of_first_bit_set (unsigned mask)
23844 {
23845   return ctz_hwi (mask);
23846 }
23847
23848 /* Like emit_multi_reg_push, but allowing for a different set of
23849    registers to be described as saved.  MASK is the set of registers
23850    to be saved; REAL_REGS is the set of registers to be described as
23851    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23852
23853 static rtx_insn *
23854 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23855 {
23856   unsigned long regno;
23857   rtx par[10], tmp, reg;
23858   rtx_insn *insn;
23859   int i, j;
23860
23861   /* Build the parallel of the registers actually being stored.  */
23862   for (i = 0; mask; ++i, mask &= mask - 1)
23863     {
23864       regno = ctz_hwi (mask);
23865       reg = gen_rtx_REG (SImode, regno);
23866
23867       if (i == 0)
23868         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23869       else
23870         tmp = gen_rtx_USE (VOIDmode, reg);
23871
23872       par[i] = tmp;
23873     }
23874
23875   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23876   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23877   tmp = gen_frame_mem (BLKmode, tmp);
23878   tmp = gen_rtx_SET (tmp, par[0]);
23879   par[0] = tmp;
23880
23881   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23882   insn = emit_insn (tmp);
23883
23884   /* Always build the stack adjustment note for unwind info.  */
23885   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23886   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23887   par[0] = tmp;
23888
23889   /* Build the parallel of the registers recorded as saved for unwind.  */
23890   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23891     {
23892       regno = ctz_hwi (real_regs);
23893       reg = gen_rtx_REG (SImode, regno);
23894
23895       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23896       tmp = gen_frame_mem (SImode, tmp);
23897       tmp = gen_rtx_SET (tmp, reg);
23898       RTX_FRAME_RELATED_P (tmp) = 1;
23899       par[j + 1] = tmp;
23900     }
23901
23902   if (j == 0)
23903     tmp = par[0];
23904   else
23905     {
23906       RTX_FRAME_RELATED_P (par[0]) = 1;
23907       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23908     }
23909
23910   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23911
23912   return insn;
23913 }
23914
23915 /* Emit code to push or pop registers to or from the stack.  F is the
23916    assembly file.  MASK is the registers to pop.  */
23917 static void
23918 thumb_pop (FILE *f, unsigned long mask)
23919 {
23920   int regno;
23921   int lo_mask = mask & 0xFF;
23922
23923   gcc_assert (mask);
23924
23925   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23926     {
23927       /* Special case.  Do not generate a POP PC statement here, do it in
23928          thumb_exit() */
23929       thumb_exit (f, -1);
23930       return;
23931     }
23932
23933   fprintf (f, "\tpop\t{");
23934
23935   /* Look at the low registers first.  */
23936   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23937     {
23938       if (lo_mask & 1)
23939         {
23940           asm_fprintf (f, "%r", regno);
23941
23942           if ((lo_mask & ~1) != 0)
23943             fprintf (f, ", ");
23944         }
23945     }
23946
23947   if (mask & (1 << PC_REGNUM))
23948     {
23949       /* Catch popping the PC.  */
23950       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23951           || IS_CMSE_ENTRY (arm_current_func_type ()))
23952         {
23953           /* The PC is never poped directly, instead
23954              it is popped into r3 and then BX is used.  */
23955           fprintf (f, "}\n");
23956
23957           thumb_exit (f, -1);
23958
23959           return;
23960         }
23961       else
23962         {
23963           if (mask & 0xFF)
23964             fprintf (f, ", ");
23965
23966           asm_fprintf (f, "%r", PC_REGNUM);
23967         }
23968     }
23969
23970   fprintf (f, "}\n");
23971 }
23972
23973 /* Generate code to return from a thumb function.
23974    If 'reg_containing_return_addr' is -1, then the return address is
23975    actually on the stack, at the stack pointer.
23976
23977    Note: do not forget to update length attribute of corresponding insn pattern
23978    when changing assembly output (eg. length attribute of epilogue_insns when
23979    updating Armv8-M Baseline Security Extensions register clearing
23980    sequences).  */
23981 static void
23982 thumb_exit (FILE *f, int reg_containing_return_addr)
23983 {
23984   unsigned regs_available_for_popping;
23985   unsigned regs_to_pop;
23986   int pops_needed;
23987   unsigned available;
23988   unsigned required;
23989   machine_mode mode;
23990   int size;
23991   int restore_a4 = FALSE;
23992
23993   /* Compute the registers we need to pop.  */
23994   regs_to_pop = 0;
23995   pops_needed = 0;
23996
23997   if (reg_containing_return_addr == -1)
23998     {
23999       regs_to_pop |= 1 << LR_REGNUM;
24000       ++pops_needed;
24001     }
24002
24003   if (TARGET_BACKTRACE)
24004     {
24005       /* Restore the (ARM) frame pointer and stack pointer.  */
24006       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24007       pops_needed += 2;
24008     }
24009
24010   /* If there is nothing to pop then just emit the BX instruction and
24011      return.  */
24012   if (pops_needed == 0)
24013     {
24014       if (crtl->calls_eh_return)
24015         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24016
24017       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24018         {
24019           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24020                        reg_containing_return_addr);
24021           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24022         }
24023       else
24024         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24025       return;
24026     }
24027   /* Otherwise if we are not supporting interworking and we have not created
24028      a backtrace structure and the function was not entered in ARM mode then
24029      just pop the return address straight into the PC.  */
24030   else if (!TARGET_INTERWORK
24031            && !TARGET_BACKTRACE
24032            && !is_called_in_ARM_mode (current_function_decl)
24033            && !crtl->calls_eh_return
24034            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24035     {
24036       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24037       return;
24038     }
24039
24040   /* Find out how many of the (return) argument registers we can corrupt.  */
24041   regs_available_for_popping = 0;
24042
24043   /* If returning via __builtin_eh_return, the bottom three registers
24044      all contain information needed for the return.  */
24045   if (crtl->calls_eh_return)
24046     size = 12;
24047   else
24048     {
24049       /* If we can deduce the registers used from the function's
24050          return value.  This is more reliable that examining
24051          df_regs_ever_live_p () because that will be set if the register is
24052          ever used in the function, not just if the register is used
24053          to hold a return value.  */
24054
24055       if (crtl->return_rtx != 0)
24056         mode = GET_MODE (crtl->return_rtx);
24057       else
24058         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24059
24060       size = GET_MODE_SIZE (mode);
24061
24062       if (size == 0)
24063         {
24064           /* In a void function we can use any argument register.
24065              In a function that returns a structure on the stack
24066              we can use the second and third argument registers.  */
24067           if (mode == VOIDmode)
24068             regs_available_for_popping =
24069               (1 << ARG_REGISTER (1))
24070               | (1 << ARG_REGISTER (2))
24071               | (1 << ARG_REGISTER (3));
24072           else
24073             regs_available_for_popping =
24074               (1 << ARG_REGISTER (2))
24075               | (1 << ARG_REGISTER (3));
24076         }
24077       else if (size <= 4)
24078         regs_available_for_popping =
24079           (1 << ARG_REGISTER (2))
24080           | (1 << ARG_REGISTER (3));
24081       else if (size <= 8)
24082         regs_available_for_popping =
24083           (1 << ARG_REGISTER (3));
24084     }
24085
24086   /* Match registers to be popped with registers into which we pop them.  */
24087   for (available = regs_available_for_popping,
24088        required  = regs_to_pop;
24089        required != 0 && available != 0;
24090        available &= ~(available & - available),
24091        required  &= ~(required  & - required))
24092     -- pops_needed;
24093
24094   /* If we have any popping registers left over, remove them.  */
24095   if (available > 0)
24096     regs_available_for_popping &= ~available;
24097
24098   /* Otherwise if we need another popping register we can use
24099      the fourth argument register.  */
24100   else if (pops_needed)
24101     {
24102       /* If we have not found any free argument registers and
24103          reg a4 contains the return address, we must move it.  */
24104       if (regs_available_for_popping == 0
24105           && reg_containing_return_addr == LAST_ARG_REGNUM)
24106         {
24107           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24108           reg_containing_return_addr = LR_REGNUM;
24109         }
24110       else if (size > 12)
24111         {
24112           /* Register a4 is being used to hold part of the return value,
24113              but we have dire need of a free, low register.  */
24114           restore_a4 = TRUE;
24115
24116           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24117         }
24118
24119       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24120         {
24121           /* The fourth argument register is available.  */
24122           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24123
24124           --pops_needed;
24125         }
24126     }
24127
24128   /* Pop as many registers as we can.  */
24129   thumb_pop (f, regs_available_for_popping);
24130
24131   /* Process the registers we popped.  */
24132   if (reg_containing_return_addr == -1)
24133     {
24134       /* The return address was popped into the lowest numbered register.  */
24135       regs_to_pop &= ~(1 << LR_REGNUM);
24136
24137       reg_containing_return_addr =
24138         number_of_first_bit_set (regs_available_for_popping);
24139
24140       /* Remove this register for the mask of available registers, so that
24141          the return address will not be corrupted by further pops.  */
24142       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24143     }
24144
24145   /* If we popped other registers then handle them here.  */
24146   if (regs_available_for_popping)
24147     {
24148       int frame_pointer;
24149
24150       /* Work out which register currently contains the frame pointer.  */
24151       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24152
24153       /* Move it into the correct place.  */
24154       asm_fprintf (f, "\tmov\t%r, %r\n",
24155                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24156
24157       /* (Temporarily) remove it from the mask of popped registers.  */
24158       regs_available_for_popping &= ~(1 << frame_pointer);
24159       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24160
24161       if (regs_available_for_popping)
24162         {
24163           int stack_pointer;
24164
24165           /* We popped the stack pointer as well,
24166              find the register that contains it.  */
24167           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24168
24169           /* Move it into the stack register.  */
24170           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24171
24172           /* At this point we have popped all necessary registers, so
24173              do not worry about restoring regs_available_for_popping
24174              to its correct value:
24175
24176              assert (pops_needed == 0)
24177              assert (regs_available_for_popping == (1 << frame_pointer))
24178              assert (regs_to_pop == (1 << STACK_POINTER))  */
24179         }
24180       else
24181         {
24182           /* Since we have just move the popped value into the frame
24183              pointer, the popping register is available for reuse, and
24184              we know that we still have the stack pointer left to pop.  */
24185           regs_available_for_popping |= (1 << frame_pointer);
24186         }
24187     }
24188
24189   /* If we still have registers left on the stack, but we no longer have
24190      any registers into which we can pop them, then we must move the return
24191      address into the link register and make available the register that
24192      contained it.  */
24193   if (regs_available_for_popping == 0 && pops_needed > 0)
24194     {
24195       regs_available_for_popping |= 1 << reg_containing_return_addr;
24196
24197       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24198                    reg_containing_return_addr);
24199
24200       reg_containing_return_addr = LR_REGNUM;
24201     }
24202
24203   /* If we have registers left on the stack then pop some more.
24204      We know that at most we will want to pop FP and SP.  */
24205   if (pops_needed > 0)
24206     {
24207       int  popped_into;
24208       int  move_to;
24209
24210       thumb_pop (f, regs_available_for_popping);
24211
24212       /* We have popped either FP or SP.
24213          Move whichever one it is into the correct register.  */
24214       popped_into = number_of_first_bit_set (regs_available_for_popping);
24215       move_to     = number_of_first_bit_set (regs_to_pop);
24216
24217       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24218       --pops_needed;
24219     }
24220
24221   /* If we still have not popped everything then we must have only
24222      had one register available to us and we are now popping the SP.  */
24223   if (pops_needed > 0)
24224     {
24225       int  popped_into;
24226
24227       thumb_pop (f, regs_available_for_popping);
24228
24229       popped_into = number_of_first_bit_set (regs_available_for_popping);
24230
24231       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24232       /*
24233         assert (regs_to_pop == (1 << STACK_POINTER))
24234         assert (pops_needed == 1)
24235       */
24236     }
24237
24238   /* If necessary restore the a4 register.  */
24239   if (restore_a4)
24240     {
24241       if (reg_containing_return_addr != LR_REGNUM)
24242         {
24243           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24244           reg_containing_return_addr = LR_REGNUM;
24245         }
24246
24247       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24248     }
24249
24250   if (crtl->calls_eh_return)
24251     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24252
24253   /* Return to caller.  */
24254   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24255     {
24256       /* This is for the cases where LR is not being used to contain the return
24257          address.  It may therefore contain information that we might not want
24258          to leak, hence it must be cleared.  The value in R0 will never be a
24259          secret at this point, so it is safe to use it, see the clearing code
24260          in 'cmse_nonsecure_entry_clear_before_return'.  */
24261       if (reg_containing_return_addr != LR_REGNUM)
24262         asm_fprintf (f, "\tmov\tlr, r0\n");
24263
24264       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24265       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24266     }
24267   else
24268     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24269 }
24270 \f
24271 /* Scan INSN just before assembler is output for it.
24272    For Thumb-1, we track the status of the condition codes; this
24273    information is used in the cbranchsi4_insn pattern.  */
24274 void
24275 thumb1_final_prescan_insn (rtx_insn *insn)
24276 {
24277   if (flag_print_asm_name)
24278     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24279                  INSN_ADDRESSES (INSN_UID (insn)));
24280   /* Don't overwrite the previous setter when we get to a cbranch.  */
24281   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24282     {
24283       enum attr_conds conds;
24284
24285       if (cfun->machine->thumb1_cc_insn)
24286         {
24287           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24288               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24289             CC_STATUS_INIT;
24290         }
24291       conds = get_attr_conds (insn);
24292       if (conds == CONDS_SET)
24293         {
24294           rtx set = single_set (insn);
24295           cfun->machine->thumb1_cc_insn = insn;
24296           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24297           cfun->machine->thumb1_cc_op1 = const0_rtx;
24298           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24299           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24300             {
24301               rtx src1 = XEXP (SET_SRC (set), 1);
24302               if (src1 == const0_rtx)
24303                 cfun->machine->thumb1_cc_mode = CCmode;
24304             }
24305           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24306             {
24307               /* Record the src register operand instead of dest because
24308                  cprop_hardreg pass propagates src.  */
24309               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24310             }
24311         }
24312       else if (conds != CONDS_NOCOND)
24313         cfun->machine->thumb1_cc_insn = NULL_RTX;
24314     }
24315
24316     /* Check if unexpected far jump is used.  */
24317     if (cfun->machine->lr_save_eliminated
24318         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24319       internal_error("Unexpected thumb1 far jump");
24320 }
24321
24322 int
24323 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24324 {
24325   unsigned HOST_WIDE_INT mask = 0xff;
24326   int i;
24327
24328   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24329   if (val == 0) /* XXX */
24330     return 0;
24331
24332   for (i = 0; i < 25; i++)
24333     if ((val & (mask << i)) == val)
24334       return 1;
24335
24336   return 0;
24337 }
24338
24339 /* Returns nonzero if the current function contains,
24340    or might contain a far jump.  */
24341 static int
24342 thumb_far_jump_used_p (void)
24343 {
24344   rtx_insn *insn;
24345   bool far_jump = false;
24346   unsigned int func_size = 0;
24347
24348   /* If we have already decided that far jumps may be used,
24349      do not bother checking again, and always return true even if
24350      it turns out that they are not being used.  Once we have made
24351      the decision that far jumps are present (and that hence the link
24352      register will be pushed onto the stack) we cannot go back on it.  */
24353   if (cfun->machine->far_jump_used)
24354     return 1;
24355
24356   /* If this function is not being called from the prologue/epilogue
24357      generation code then it must be being called from the
24358      INITIAL_ELIMINATION_OFFSET macro.  */
24359   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24360     {
24361       /* In this case we know that we are being asked about the elimination
24362          of the arg pointer register.  If that register is not being used,
24363          then there are no arguments on the stack, and we do not have to
24364          worry that a far jump might force the prologue to push the link
24365          register, changing the stack offsets.  In this case we can just
24366          return false, since the presence of far jumps in the function will
24367          not affect stack offsets.
24368
24369          If the arg pointer is live (or if it was live, but has now been
24370          eliminated and so set to dead) then we do have to test to see if
24371          the function might contain a far jump.  This test can lead to some
24372          false negatives, since before reload is completed, then length of
24373          branch instructions is not known, so gcc defaults to returning their
24374          longest length, which in turn sets the far jump attribute to true.
24375
24376          A false negative will not result in bad code being generated, but it
24377          will result in a needless push and pop of the link register.  We
24378          hope that this does not occur too often.
24379
24380          If we need doubleword stack alignment this could affect the other
24381          elimination offsets so we can't risk getting it wrong.  */
24382       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24383         cfun->machine->arg_pointer_live = 1;
24384       else if (!cfun->machine->arg_pointer_live)
24385         return 0;
24386     }
24387
24388   /* We should not change far_jump_used during or after reload, as there is
24389      no chance to change stack frame layout.  */
24390   if (reload_in_progress || reload_completed)
24391     return 0;
24392
24393   /* Check to see if the function contains a branch
24394      insn with the far jump attribute set.  */
24395   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24396     {
24397       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24398         {
24399           far_jump = true;
24400         }
24401       func_size += get_attr_length (insn);
24402     }
24403
24404   /* Attribute far_jump will always be true for thumb1 before
24405      shorten_branch pass.  So checking far_jump attribute before
24406      shorten_branch isn't much useful.
24407
24408      Following heuristic tries to estimate more accurately if a far jump
24409      may finally be used.  The heuristic is very conservative as there is
24410      no chance to roll-back the decision of not to use far jump.
24411
24412      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24413      2-byte insn is associated with a 4 byte constant pool.  Using
24414      function size 2048/3 as the threshold is conservative enough.  */
24415   if (far_jump)
24416     {
24417       if ((func_size * 3) >= 2048)
24418         {
24419           /* Record the fact that we have decided that
24420              the function does use far jumps.  */
24421           cfun->machine->far_jump_used = 1;
24422           return 1;
24423         }
24424     }
24425
24426   return 0;
24427 }
24428
24429 /* Return nonzero if FUNC must be entered in ARM mode.  */
24430 static bool
24431 is_called_in_ARM_mode (tree func)
24432 {
24433   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24434
24435   /* Ignore the problem about functions whose address is taken.  */
24436   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24437     return true;
24438
24439 #ifdef ARM_PE
24440   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24441 #else
24442   return false;
24443 #endif
24444 }
24445
24446 /* Given the stack offsets and register mask in OFFSETS, decide how
24447    many additional registers to push instead of subtracting a constant
24448    from SP.  For epilogues the principle is the same except we use pop.
24449    FOR_PROLOGUE indicates which we're generating.  */
24450 static int
24451 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24452 {
24453   HOST_WIDE_INT amount;
24454   unsigned long live_regs_mask = offsets->saved_regs_mask;
24455   /* Extract a mask of the ones we can give to the Thumb's push/pop
24456      instruction.  */
24457   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24458   /* Then count how many other high registers will need to be pushed.  */
24459   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24460   int n_free, reg_base, size;
24461
24462   if (!for_prologue && frame_pointer_needed)
24463     amount = offsets->locals_base - offsets->saved_regs;
24464   else
24465     amount = offsets->outgoing_args - offsets->saved_regs;
24466
24467   /* If the stack frame size is 512 exactly, we can save one load
24468      instruction, which should make this a win even when optimizing
24469      for speed.  */
24470   if (!optimize_size && amount != 512)
24471     return 0;
24472
24473   /* Can't do this if there are high registers to push.  */
24474   if (high_regs_pushed != 0)
24475     return 0;
24476
24477   /* Shouldn't do it in the prologue if no registers would normally
24478      be pushed at all.  In the epilogue, also allow it if we'll have
24479      a pop insn for the PC.  */
24480   if  (l_mask == 0
24481        && (for_prologue
24482            || TARGET_BACKTRACE
24483            || (live_regs_mask & 1 << LR_REGNUM) == 0
24484            || TARGET_INTERWORK
24485            || crtl->args.pretend_args_size != 0))
24486     return 0;
24487
24488   /* Don't do this if thumb_expand_prologue wants to emit instructions
24489      between the push and the stack frame allocation.  */
24490   if (for_prologue
24491       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24492           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24493     return 0;
24494
24495   reg_base = 0;
24496   n_free = 0;
24497   if (!for_prologue)
24498     {
24499       size = arm_size_return_regs ();
24500       reg_base = ARM_NUM_INTS (size);
24501       live_regs_mask >>= reg_base;
24502     }
24503
24504   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24505          && (for_prologue || call_used_regs[reg_base + n_free]))
24506     {
24507       live_regs_mask >>= 1;
24508       n_free++;
24509     }
24510
24511   if (n_free == 0)
24512     return 0;
24513   gcc_assert (amount / 4 * 4 == amount);
24514
24515   if (amount >= 512 && (amount - n_free * 4) < 512)
24516     return (amount - 508) / 4;
24517   if (amount <= n_free * 4)
24518     return amount / 4;
24519   return 0;
24520 }
24521
24522 /* The bits which aren't usefully expanded as rtl.  */
24523 const char *
24524 thumb1_unexpanded_epilogue (void)
24525 {
24526   arm_stack_offsets *offsets;
24527   int regno;
24528   unsigned long live_regs_mask = 0;
24529   int high_regs_pushed = 0;
24530   int extra_pop;
24531   int had_to_push_lr;
24532   int size;
24533
24534   if (cfun->machine->return_used_this_function != 0)
24535     return "";
24536
24537   if (IS_NAKED (arm_current_func_type ()))
24538     return "";
24539
24540   offsets = arm_get_frame_offsets ();
24541   live_regs_mask = offsets->saved_regs_mask;
24542   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24543
24544   /* If we can deduce the registers used from the function's return value.
24545      This is more reliable that examining df_regs_ever_live_p () because that
24546      will be set if the register is ever used in the function, not just if
24547      the register is used to hold a return value.  */
24548   size = arm_size_return_regs ();
24549
24550   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24551   if (extra_pop > 0)
24552     {
24553       unsigned long extra_mask = (1 << extra_pop) - 1;
24554       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24555     }
24556
24557   /* The prolog may have pushed some high registers to use as
24558      work registers.  e.g. the testsuite file:
24559      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24560      compiles to produce:
24561         push    {r4, r5, r6, r7, lr}
24562         mov     r7, r9
24563         mov     r6, r8
24564         push    {r6, r7}
24565      as part of the prolog.  We have to undo that pushing here.  */
24566
24567   if (high_regs_pushed)
24568     {
24569       unsigned long mask = live_regs_mask & 0xff;
24570       int next_hi_reg;
24571
24572       /* The available low registers depend on the size of the value we are
24573          returning.  */
24574       if (size <= 12)
24575         mask |=  1 << 3;
24576       if (size <= 8)
24577         mask |= 1 << 2;
24578
24579       if (mask == 0)
24580         /* Oh dear!  We have no low registers into which we can pop
24581            high registers!  */
24582         internal_error
24583           ("no low registers available for popping high registers");
24584
24585       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24586         if (live_regs_mask & (1 << next_hi_reg))
24587           break;
24588
24589       while (high_regs_pushed)
24590         {
24591           /* Find lo register(s) into which the high register(s) can
24592              be popped.  */
24593           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24594             {
24595               if (mask & (1 << regno))
24596                 high_regs_pushed--;
24597               if (high_regs_pushed == 0)
24598                 break;
24599             }
24600
24601           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24602
24603           /* Pop the values into the low register(s).  */
24604           thumb_pop (asm_out_file, mask);
24605
24606           /* Move the value(s) into the high registers.  */
24607           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24608             {
24609               if (mask & (1 << regno))
24610                 {
24611                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24612                                regno);
24613
24614                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24615                     if (live_regs_mask & (1 << next_hi_reg))
24616                       break;
24617                 }
24618             }
24619         }
24620       live_regs_mask &= ~0x0f00;
24621     }
24622
24623   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24624   live_regs_mask &= 0xff;
24625
24626   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24627     {
24628       /* Pop the return address into the PC.  */
24629       if (had_to_push_lr)
24630         live_regs_mask |= 1 << PC_REGNUM;
24631
24632       /* Either no argument registers were pushed or a backtrace
24633          structure was created which includes an adjusted stack
24634          pointer, so just pop everything.  */
24635       if (live_regs_mask)
24636         thumb_pop (asm_out_file, live_regs_mask);
24637
24638       /* We have either just popped the return address into the
24639          PC or it is was kept in LR for the entire function.
24640          Note that thumb_pop has already called thumb_exit if the
24641          PC was in the list.  */
24642       if (!had_to_push_lr)
24643         thumb_exit (asm_out_file, LR_REGNUM);
24644     }
24645   else
24646     {
24647       /* Pop everything but the return address.  */
24648       if (live_regs_mask)
24649         thumb_pop (asm_out_file, live_regs_mask);
24650
24651       if (had_to_push_lr)
24652         {
24653           if (size > 12)
24654             {
24655               /* We have no free low regs, so save one.  */
24656               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24657                            LAST_ARG_REGNUM);
24658             }
24659
24660           /* Get the return address into a temporary register.  */
24661           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24662
24663           if (size > 12)
24664             {
24665               /* Move the return address to lr.  */
24666               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24667                            LAST_ARG_REGNUM);
24668               /* Restore the low register.  */
24669               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24670                            IP_REGNUM);
24671               regno = LR_REGNUM;
24672             }
24673           else
24674             regno = LAST_ARG_REGNUM;
24675         }
24676       else
24677         regno = LR_REGNUM;
24678
24679       /* Remove the argument registers that were pushed onto the stack.  */
24680       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24681                    SP_REGNUM, SP_REGNUM,
24682                    crtl->args.pretend_args_size);
24683
24684       thumb_exit (asm_out_file, regno);
24685     }
24686
24687   return "";
24688 }
24689
24690 /* Functions to save and restore machine-specific function data.  */
24691 static struct machine_function *
24692 arm_init_machine_status (void)
24693 {
24694   struct machine_function *machine;
24695   machine = ggc_cleared_alloc<machine_function> ();
24696
24697 #if ARM_FT_UNKNOWN != 0
24698   machine->func_type = ARM_FT_UNKNOWN;
24699 #endif
24700   return machine;
24701 }
24702
24703 /* Return an RTX indicating where the return address to the
24704    calling function can be found.  */
24705 rtx
24706 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24707 {
24708   if (count != 0)
24709     return NULL_RTX;
24710
24711   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24712 }
24713
24714 /* Do anything needed before RTL is emitted for each function.  */
24715 void
24716 arm_init_expanders (void)
24717 {
24718   /* Arrange to initialize and mark the machine per-function status.  */
24719   init_machine_status = arm_init_machine_status;
24720
24721   /* This is to stop the combine pass optimizing away the alignment
24722      adjustment of va_arg.  */
24723   /* ??? It is claimed that this should not be necessary.  */
24724   if (cfun)
24725     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24726 }
24727
24728 /* Check that FUNC is called with a different mode.  */
24729
24730 bool
24731 arm_change_mode_p (tree func)
24732 {
24733   if (TREE_CODE (func) != FUNCTION_DECL)
24734     return false;
24735
24736   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24737
24738   if (!callee_tree)
24739     callee_tree = target_option_default_node;
24740
24741   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24742   int flags = callee_opts->x_target_flags;
24743
24744   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24745 }
24746
24747 /* Like arm_compute_initial_elimination offset.  Simpler because there
24748    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24749    to point at the base of the local variables after static stack
24750    space for a function has been allocated.  */
24751
24752 HOST_WIDE_INT
24753 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24754 {
24755   arm_stack_offsets *offsets;
24756
24757   offsets = arm_get_frame_offsets ();
24758
24759   switch (from)
24760     {
24761     case ARG_POINTER_REGNUM:
24762       switch (to)
24763         {
24764         case STACK_POINTER_REGNUM:
24765           return offsets->outgoing_args - offsets->saved_args;
24766
24767         case FRAME_POINTER_REGNUM:
24768           return offsets->soft_frame - offsets->saved_args;
24769
24770         case ARM_HARD_FRAME_POINTER_REGNUM:
24771           return offsets->saved_regs - offsets->saved_args;
24772
24773         case THUMB_HARD_FRAME_POINTER_REGNUM:
24774           return offsets->locals_base - offsets->saved_args;
24775
24776         default:
24777           gcc_unreachable ();
24778         }
24779       break;
24780
24781     case FRAME_POINTER_REGNUM:
24782       switch (to)
24783         {
24784         case STACK_POINTER_REGNUM:
24785           return offsets->outgoing_args - offsets->soft_frame;
24786
24787         case ARM_HARD_FRAME_POINTER_REGNUM:
24788           return offsets->saved_regs - offsets->soft_frame;
24789
24790         case THUMB_HARD_FRAME_POINTER_REGNUM:
24791           return offsets->locals_base - offsets->soft_frame;
24792
24793         default:
24794           gcc_unreachable ();
24795         }
24796       break;
24797
24798     default:
24799       gcc_unreachable ();
24800     }
24801 }
24802
24803 /* Generate the function's prologue.  */
24804
24805 void
24806 thumb1_expand_prologue (void)
24807 {
24808   rtx_insn *insn;
24809
24810   HOST_WIDE_INT amount;
24811   HOST_WIDE_INT size;
24812   arm_stack_offsets *offsets;
24813   unsigned long func_type;
24814   int regno;
24815   unsigned long live_regs_mask;
24816   unsigned long l_mask;
24817   unsigned high_regs_pushed = 0;
24818   bool lr_needs_saving;
24819
24820   func_type = arm_current_func_type ();
24821
24822   /* Naked functions don't have prologues.  */
24823   if (IS_NAKED (func_type))
24824     {
24825       if (flag_stack_usage_info)
24826         current_function_static_stack_size = 0;
24827       return;
24828     }
24829
24830   if (IS_INTERRUPT (func_type))
24831     {
24832       error ("interrupt Service Routines cannot be coded in Thumb mode");
24833       return;
24834     }
24835
24836   if (is_called_in_ARM_mode (current_function_decl))
24837     emit_insn (gen_prologue_thumb1_interwork ());
24838
24839   offsets = arm_get_frame_offsets ();
24840   live_regs_mask = offsets->saved_regs_mask;
24841   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24842
24843   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24844   l_mask = live_regs_mask & 0x40ff;
24845   /* Then count how many other high registers will need to be pushed.  */
24846   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24847
24848   if (crtl->args.pretend_args_size)
24849     {
24850       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24851
24852       if (cfun->machine->uses_anonymous_args)
24853         {
24854           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24855           unsigned long mask;
24856
24857           mask = 1ul << (LAST_ARG_REGNUM + 1);
24858           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24859
24860           insn = thumb1_emit_multi_reg_push (mask, 0);
24861         }
24862       else
24863         {
24864           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24865                                         stack_pointer_rtx, x));
24866         }
24867       RTX_FRAME_RELATED_P (insn) = 1;
24868     }
24869
24870   if (TARGET_BACKTRACE)
24871     {
24872       HOST_WIDE_INT offset = 0;
24873       unsigned work_register;
24874       rtx work_reg, x, arm_hfp_rtx;
24875
24876       /* We have been asked to create a stack backtrace structure.
24877          The code looks like this:
24878
24879          0   .align 2
24880          0   func:
24881          0     sub   SP, #16         Reserve space for 4 registers.
24882          2     push  {R7}            Push low registers.
24883          4     add   R7, SP, #20     Get the stack pointer before the push.
24884          6     str   R7, [SP, #8]    Store the stack pointer
24885                                         (before reserving the space).
24886          8     mov   R7, PC          Get hold of the start of this code + 12.
24887         10     str   R7, [SP, #16]   Store it.
24888         12     mov   R7, FP          Get hold of the current frame pointer.
24889         14     str   R7, [SP, #4]    Store it.
24890         16     mov   R7, LR          Get hold of the current return address.
24891         18     str   R7, [SP, #12]   Store it.
24892         20     add   R7, SP, #16     Point at the start of the
24893                                         backtrace structure.
24894         22     mov   FP, R7          Put this value into the frame pointer.  */
24895
24896       work_register = thumb_find_work_register (live_regs_mask);
24897       work_reg = gen_rtx_REG (SImode, work_register);
24898       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24899
24900       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24901                                     stack_pointer_rtx, GEN_INT (-16)));
24902       RTX_FRAME_RELATED_P (insn) = 1;
24903
24904       if (l_mask)
24905         {
24906           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24907           RTX_FRAME_RELATED_P (insn) = 1;
24908           lr_needs_saving = false;
24909
24910           offset = bit_count (l_mask) * UNITS_PER_WORD;
24911         }
24912
24913       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24914       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24915
24916       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24917       x = gen_frame_mem (SImode, x);
24918       emit_move_insn (x, work_reg);
24919
24920       /* Make sure that the instruction fetching the PC is in the right place
24921          to calculate "start of backtrace creation code + 12".  */
24922       /* ??? The stores using the common WORK_REG ought to be enough to
24923          prevent the scheduler from doing anything weird.  Failing that
24924          we could always move all of the following into an UNSPEC_VOLATILE.  */
24925       if (l_mask)
24926         {
24927           x = gen_rtx_REG (SImode, PC_REGNUM);
24928           emit_move_insn (work_reg, x);
24929
24930           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24931           x = gen_frame_mem (SImode, x);
24932           emit_move_insn (x, work_reg);
24933
24934           emit_move_insn (work_reg, arm_hfp_rtx);
24935
24936           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24937           x = gen_frame_mem (SImode, x);
24938           emit_move_insn (x, work_reg);
24939         }
24940       else
24941         {
24942           emit_move_insn (work_reg, arm_hfp_rtx);
24943
24944           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24945           x = gen_frame_mem (SImode, x);
24946           emit_move_insn (x, work_reg);
24947
24948           x = gen_rtx_REG (SImode, PC_REGNUM);
24949           emit_move_insn (work_reg, x);
24950
24951           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24952           x = gen_frame_mem (SImode, x);
24953           emit_move_insn (x, work_reg);
24954         }
24955
24956       x = gen_rtx_REG (SImode, LR_REGNUM);
24957       emit_move_insn (work_reg, x);
24958
24959       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24960       x = gen_frame_mem (SImode, x);
24961       emit_move_insn (x, work_reg);
24962
24963       x = GEN_INT (offset + 12);
24964       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24965
24966       emit_move_insn (arm_hfp_rtx, work_reg);
24967     }
24968   /* Optimization:  If we are not pushing any low registers but we are going
24969      to push some high registers then delay our first push.  This will just
24970      be a push of LR and we can combine it with the push of the first high
24971      register.  */
24972   else if ((l_mask & 0xff) != 0
24973            || (high_regs_pushed == 0 && lr_needs_saving))
24974     {
24975       unsigned long mask = l_mask;
24976       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24977       insn = thumb1_emit_multi_reg_push (mask, mask);
24978       RTX_FRAME_RELATED_P (insn) = 1;
24979       lr_needs_saving = false;
24980     }
24981
24982   if (high_regs_pushed)
24983     {
24984       unsigned pushable_regs;
24985       unsigned next_hi_reg;
24986       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24987                                                  : crtl->args.info.nregs;
24988       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24989
24990       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24991         if (live_regs_mask & (1 << next_hi_reg))
24992           break;
24993
24994       /* Here we need to mask out registers used for passing arguments
24995          even if they can be pushed.  This is to avoid using them to stash the high
24996          registers.  Such kind of stash may clobber the use of arguments.  */
24997       pushable_regs = l_mask & (~arg_regs_mask);
24998       if (lr_needs_saving)
24999         pushable_regs &= ~(1 << LR_REGNUM);
25000
25001       if (pushable_regs == 0)
25002         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25003
25004       while (high_regs_pushed > 0)
25005         {
25006           unsigned long real_regs_mask = 0;
25007           unsigned long push_mask = 0;
25008
25009           for (regno = LR_REGNUM; regno >= 0; regno --)
25010             {
25011               if (pushable_regs & (1 << regno))
25012                 {
25013                   emit_move_insn (gen_rtx_REG (SImode, regno),
25014                                   gen_rtx_REG (SImode, next_hi_reg));
25015
25016                   high_regs_pushed --;
25017                   real_regs_mask |= (1 << next_hi_reg);
25018                   push_mask |= (1 << regno);
25019
25020                   if (high_regs_pushed)
25021                     {
25022                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25023                            next_hi_reg --)
25024                         if (live_regs_mask & (1 << next_hi_reg))
25025                           break;
25026                     }
25027                   else
25028                     break;
25029                 }
25030             }
25031
25032           /* If we had to find a work register and we have not yet
25033              saved the LR then add it to the list of regs to push.  */
25034           if (lr_needs_saving)
25035             {
25036               push_mask |= 1 << LR_REGNUM;
25037               real_regs_mask |= 1 << LR_REGNUM;
25038               lr_needs_saving = false;
25039             }
25040
25041           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25042           RTX_FRAME_RELATED_P (insn) = 1;
25043         }
25044     }
25045
25046   /* Load the pic register before setting the frame pointer,
25047      so we can use r7 as a temporary work register.  */
25048   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25049     arm_load_pic_register (live_regs_mask);
25050
25051   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25052     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25053                     stack_pointer_rtx);
25054
25055   size = offsets->outgoing_args - offsets->saved_args;
25056   if (flag_stack_usage_info)
25057     current_function_static_stack_size = size;
25058
25059   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25060   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25061        || flag_stack_clash_protection)
25062       && size)
25063     sorry ("-fstack-check=specific for Thumb-1");
25064
25065   amount = offsets->outgoing_args - offsets->saved_regs;
25066   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25067   if (amount)
25068     {
25069       if (amount < 512)
25070         {
25071           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25072                                         GEN_INT (- amount)));
25073           RTX_FRAME_RELATED_P (insn) = 1;
25074         }
25075       else
25076         {
25077           rtx reg, dwarf;
25078
25079           /* The stack decrement is too big for an immediate value in a single
25080              insn.  In theory we could issue multiple subtracts, but after
25081              three of them it becomes more space efficient to place the full
25082              value in the constant pool and load into a register.  (Also the
25083              ARM debugger really likes to see only one stack decrement per
25084              function).  So instead we look for a scratch register into which
25085              we can load the decrement, and then we subtract this from the
25086              stack pointer.  Unfortunately on the thumb the only available
25087              scratch registers are the argument registers, and we cannot use
25088              these as they may hold arguments to the function.  Instead we
25089              attempt to locate a call preserved register which is used by this
25090              function.  If we can find one, then we know that it will have
25091              been pushed at the start of the prologue and so we can corrupt
25092              it now.  */
25093           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25094             if (live_regs_mask & (1 << regno))
25095               break;
25096
25097           gcc_assert(regno <= LAST_LO_REGNUM);
25098
25099           reg = gen_rtx_REG (SImode, regno);
25100
25101           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25102
25103           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25104                                         stack_pointer_rtx, reg));
25105
25106           dwarf = gen_rtx_SET (stack_pointer_rtx,
25107                                plus_constant (Pmode, stack_pointer_rtx,
25108                                               -amount));
25109           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25110           RTX_FRAME_RELATED_P (insn) = 1;
25111         }
25112     }
25113
25114   if (frame_pointer_needed)
25115     thumb_set_frame_pointer (offsets);
25116
25117   /* If we are profiling, make sure no instructions are scheduled before
25118      the call to mcount.  Similarly if the user has requested no
25119      scheduling in the prolog.  Similarly if we want non-call exceptions
25120      using the EABI unwinder, to prevent faulting instructions from being
25121      swapped with a stack adjustment.  */
25122   if (crtl->profile || !TARGET_SCHED_PROLOG
25123       || (arm_except_unwind_info (&global_options) == UI_TARGET
25124           && cfun->can_throw_non_call_exceptions))
25125     emit_insn (gen_blockage ());
25126
25127   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25128   if (live_regs_mask & 0xff)
25129     cfun->machine->lr_save_eliminated = 0;
25130 }
25131
25132 /* Clear caller saved registers not used to pass return values and leaked
25133    condition flags before exiting a cmse_nonsecure_entry function.  */
25134
25135 void
25136 cmse_nonsecure_entry_clear_before_return (void)
25137 {
25138   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25139   uint32_t padding_bits_to_clear = 0;
25140   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25141   auto_sbitmap to_clear_bitmap (maxregno + 1);
25142   tree result_type;
25143   rtx result_rtl;
25144
25145   bitmap_clear (to_clear_bitmap);
25146   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25147   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25148
25149   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25150      registers.  */
25151   if (TARGET_HARD_FLOAT)
25152     {
25153       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25154
25155       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25156
25157       /* Make sure we don't clear the two scratch registers used to clear the
25158          relevant FPSCR bits in output_return_instruction.  */
25159       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25160       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25161       emit_use (gen_rtx_REG (SImode, 4));
25162       bitmap_clear_bit (to_clear_bitmap, 4);
25163     }
25164
25165   /* If the user has defined registers to be caller saved, these are no longer
25166      restored by the function before returning and must thus be cleared for
25167      security purposes.  */
25168   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25169     {
25170       /* We do not touch registers that can be used to pass arguments as per
25171          the AAPCS, since these should never be made callee-saved by user
25172          options.  */
25173       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25174         continue;
25175       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25176         continue;
25177       if (call_used_regs[regno])
25178         bitmap_set_bit (to_clear_bitmap, regno);
25179     }
25180
25181   /* Make sure we do not clear the registers used to return the result in.  */
25182   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25183   if (!VOID_TYPE_P (result_type))
25184     {
25185       uint64_t to_clear_return_mask;
25186       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25187
25188       /* No need to check that we return in registers, because we don't
25189          support returning on stack yet.  */
25190       gcc_assert (REG_P (result_rtl));
25191       to_clear_return_mask
25192         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25193                                      padding_bits_to_clear_ptr);
25194       if (to_clear_return_mask)
25195         {
25196           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25197           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25198             {
25199               if (to_clear_return_mask & (1ULL << regno))
25200                 bitmap_clear_bit (to_clear_bitmap, regno);
25201             }
25202         }
25203     }
25204
25205   if (padding_bits_to_clear != 0)
25206     {
25207       rtx reg_rtx;
25208       auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25209
25210       /* Padding bits to clear is not 0 so we know we are dealing with
25211          returning a composite type, which only uses r0.  Let's make sure that
25212          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25213       bitmap_clear (to_clear_arg_regs_bitmap);
25214       bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25215                         NUM_ARG_REGS - 1);
25216       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25217
25218       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25219
25220       /* Fill the lower half of the negated padding_bits_to_clear.  */
25221       emit_move_insn (reg_rtx,
25222                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25223
25224       /* Also fill the top half of the negated padding_bits_to_clear.  */
25225       if (((~padding_bits_to_clear) >> 16) > 0)
25226         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25227                                                       GEN_INT (16),
25228                                                       GEN_INT (16)),
25229                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25230
25231       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25232                            gen_rtx_REG (SImode, R0_REGNUM),
25233                            reg_rtx));
25234     }
25235
25236   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25237     {
25238       if (!bitmap_bit_p (to_clear_bitmap, regno))
25239         continue;
25240
25241       if (IS_VFP_REGNUM (regno))
25242         {
25243           /* If regno is an even vfp register and its successor is also to
25244              be cleared, use vmov.  */
25245           if (TARGET_VFP_DOUBLE
25246               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25247               && bitmap_bit_p (to_clear_bitmap, regno + 1))
25248             {
25249               emit_move_insn (gen_rtx_REG (DFmode, regno),
25250                               CONST1_RTX (DFmode));
25251               emit_use (gen_rtx_REG (DFmode, regno));
25252               regno++;
25253             }
25254           else
25255             {
25256               emit_move_insn (gen_rtx_REG (SFmode, regno),
25257                               CONST1_RTX (SFmode));
25258               emit_use (gen_rtx_REG (SFmode, regno));
25259             }
25260         }
25261       else
25262         {
25263           if (TARGET_THUMB1)
25264             {
25265               if (regno == R0_REGNUM)
25266                 emit_move_insn (gen_rtx_REG (SImode, regno),
25267                                 const0_rtx);
25268               else
25269                 /* R0 has either been cleared before, see code above, or it
25270                    holds a return value, either way it is not secret
25271                    information.  */
25272                 emit_move_insn (gen_rtx_REG (SImode, regno),
25273                                 gen_rtx_REG (SImode, R0_REGNUM));
25274               emit_use (gen_rtx_REG (SImode, regno));
25275             }
25276           else
25277             {
25278               emit_move_insn (gen_rtx_REG (SImode, regno),
25279                               gen_rtx_REG (SImode, LR_REGNUM));
25280               emit_use (gen_rtx_REG (SImode, regno));
25281             }
25282         }
25283     }
25284 }
25285
25286 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25287    POP instruction can be generated.  LR should be replaced by PC.  All
25288    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25289    all we really need to check here is if single register is to be
25290    returned, or multiple register return.  */
25291 void
25292 thumb2_expand_return (bool simple_return)
25293 {
25294   int i, num_regs;
25295   unsigned long saved_regs_mask;
25296   arm_stack_offsets *offsets;
25297
25298   offsets = arm_get_frame_offsets ();
25299   saved_regs_mask = offsets->saved_regs_mask;
25300
25301   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25302     if (saved_regs_mask & (1 << i))
25303       num_regs++;
25304
25305   if (!simple_return && saved_regs_mask)
25306     {
25307       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25308          functions or adapt code to handle according to ACLE.  This path should
25309          not be reachable for cmse_nonsecure_entry functions though we prefer
25310          to assert it for now to ensure that future code changes do not silently
25311          change this behavior.  */
25312       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25313       if (num_regs == 1)
25314         {
25315           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25316           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25317           rtx addr = gen_rtx_MEM (SImode,
25318                                   gen_rtx_POST_INC (SImode,
25319                                                     stack_pointer_rtx));
25320           set_mem_alias_set (addr, get_frame_alias_set ());
25321           XVECEXP (par, 0, 0) = ret_rtx;
25322           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25323           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25324           emit_jump_insn (par);
25325         }
25326       else
25327         {
25328           saved_regs_mask &= ~ (1 << LR_REGNUM);
25329           saved_regs_mask |=   (1 << PC_REGNUM);
25330           arm_emit_multi_reg_pop (saved_regs_mask);
25331         }
25332     }
25333   else
25334     {
25335       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25336         cmse_nonsecure_entry_clear_before_return ();
25337       emit_jump_insn (simple_return_rtx);
25338     }
25339 }
25340
25341 void
25342 thumb1_expand_epilogue (void)
25343 {
25344   HOST_WIDE_INT amount;
25345   arm_stack_offsets *offsets;
25346   int regno;
25347
25348   /* Naked functions don't have prologues.  */
25349   if (IS_NAKED (arm_current_func_type ()))
25350     return;
25351
25352   offsets = arm_get_frame_offsets ();
25353   amount = offsets->outgoing_args - offsets->saved_regs;
25354
25355   if (frame_pointer_needed)
25356     {
25357       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25358       amount = offsets->locals_base - offsets->saved_regs;
25359     }
25360   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25361
25362   gcc_assert (amount >= 0);
25363   if (amount)
25364     {
25365       emit_insn (gen_blockage ());
25366
25367       if (amount < 512)
25368         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25369                                GEN_INT (amount)));
25370       else
25371         {
25372           /* r3 is always free in the epilogue.  */
25373           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25374
25375           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25376           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25377         }
25378     }
25379
25380   /* Emit a USE (stack_pointer_rtx), so that
25381      the stack adjustment will not be deleted.  */
25382   emit_insn (gen_force_register_use (stack_pointer_rtx));
25383
25384   if (crtl->profile || !TARGET_SCHED_PROLOG)
25385     emit_insn (gen_blockage ());
25386
25387   /* Emit a clobber for each insn that will be restored in the epilogue,
25388      so that flow2 will get register lifetimes correct.  */
25389   for (regno = 0; regno < 13; regno++)
25390     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25391       emit_clobber (gen_rtx_REG (SImode, regno));
25392
25393   if (! df_regs_ever_live_p (LR_REGNUM))
25394     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25395
25396   /* Clear all caller-saved regs that are not used to return.  */
25397   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25398     cmse_nonsecure_entry_clear_before_return ();
25399 }
25400
25401 /* Epilogue code for APCS frame.  */
25402 static void
25403 arm_expand_epilogue_apcs_frame (bool really_return)
25404 {
25405   unsigned long func_type;
25406   unsigned long saved_regs_mask;
25407   int num_regs = 0;
25408   int i;
25409   int floats_from_frame = 0;
25410   arm_stack_offsets *offsets;
25411
25412   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25413   func_type = arm_current_func_type ();
25414
25415   /* Get frame offsets for ARM.  */
25416   offsets = arm_get_frame_offsets ();
25417   saved_regs_mask = offsets->saved_regs_mask;
25418
25419   /* Find the offset of the floating-point save area in the frame.  */
25420   floats_from_frame
25421     = (offsets->saved_args
25422        + arm_compute_static_chain_stack_bytes ()
25423        - offsets->frame);
25424
25425   /* Compute how many core registers saved and how far away the floats are.  */
25426   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25427     if (saved_regs_mask & (1 << i))
25428       {
25429         num_regs++;
25430         floats_from_frame += 4;
25431       }
25432
25433   if (TARGET_HARD_FLOAT)
25434     {
25435       int start_reg;
25436       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25437
25438       /* The offset is from IP_REGNUM.  */
25439       int saved_size = arm_get_vfp_saved_size ();
25440       if (saved_size > 0)
25441         {
25442           rtx_insn *insn;
25443           floats_from_frame += saved_size;
25444           insn = emit_insn (gen_addsi3 (ip_rtx,
25445                                         hard_frame_pointer_rtx,
25446                                         GEN_INT (-floats_from_frame)));
25447           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25448                                        ip_rtx, hard_frame_pointer_rtx);
25449         }
25450
25451       /* Generate VFP register multi-pop.  */
25452       start_reg = FIRST_VFP_REGNUM;
25453
25454       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25455         /* Look for a case where a reg does not need restoring.  */
25456         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25457             && (!df_regs_ever_live_p (i + 1)
25458                 || call_used_regs[i + 1]))
25459           {
25460             if (start_reg != i)
25461               arm_emit_vfp_multi_reg_pop (start_reg,
25462                                           (i - start_reg) / 2,
25463                                           gen_rtx_REG (SImode,
25464                                                        IP_REGNUM));
25465             start_reg = i + 2;
25466           }
25467
25468       /* Restore the remaining regs that we have discovered (or possibly
25469          even all of them, if the conditional in the for loop never
25470          fired).  */
25471       if (start_reg != i)
25472         arm_emit_vfp_multi_reg_pop (start_reg,
25473                                     (i - start_reg) / 2,
25474                                     gen_rtx_REG (SImode, IP_REGNUM));
25475     }
25476
25477   if (TARGET_IWMMXT)
25478     {
25479       /* The frame pointer is guaranteed to be non-double-word aligned, as
25480          it is set to double-word-aligned old_stack_pointer - 4.  */
25481       rtx_insn *insn;
25482       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25483
25484       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25485         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25486           {
25487             rtx addr = gen_frame_mem (V2SImode,
25488                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25489                                                 - lrm_count * 4));
25490             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25491             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25492                                                gen_rtx_REG (V2SImode, i),
25493                                                NULL_RTX);
25494             lrm_count += 2;
25495           }
25496     }
25497
25498   /* saved_regs_mask should contain IP which contains old stack pointer
25499      at the time of activation creation.  Since SP and IP are adjacent registers,
25500      we can restore the value directly into SP.  */
25501   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25502   saved_regs_mask &= ~(1 << IP_REGNUM);
25503   saved_regs_mask |= (1 << SP_REGNUM);
25504
25505   /* There are two registers left in saved_regs_mask - LR and PC.  We
25506      only need to restore LR (the return address), but to
25507      save time we can load it directly into PC, unless we need a
25508      special function exit sequence, or we are not really returning.  */
25509   if (really_return
25510       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25511       && !crtl->calls_eh_return)
25512     /* Delete LR from the register mask, so that LR on
25513        the stack is loaded into the PC in the register mask.  */
25514     saved_regs_mask &= ~(1 << LR_REGNUM);
25515   else
25516     saved_regs_mask &= ~(1 << PC_REGNUM);
25517
25518   num_regs = bit_count (saved_regs_mask);
25519   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25520     {
25521       rtx_insn *insn;
25522       emit_insn (gen_blockage ());
25523       /* Unwind the stack to just below the saved registers.  */
25524       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25525                                     hard_frame_pointer_rtx,
25526                                     GEN_INT (- 4 * num_regs)));
25527
25528       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25529                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25530     }
25531
25532   arm_emit_multi_reg_pop (saved_regs_mask);
25533
25534   if (IS_INTERRUPT (func_type))
25535     {
25536       /* Interrupt handlers will have pushed the
25537          IP onto the stack, so restore it now.  */
25538       rtx_insn *insn;
25539       rtx addr = gen_rtx_MEM (SImode,
25540                               gen_rtx_POST_INC (SImode,
25541                               stack_pointer_rtx));
25542       set_mem_alias_set (addr, get_frame_alias_set ());
25543       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25544       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25545                                          gen_rtx_REG (SImode, IP_REGNUM),
25546                                          NULL_RTX);
25547     }
25548
25549   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25550     return;
25551
25552   if (crtl->calls_eh_return)
25553     emit_insn (gen_addsi3 (stack_pointer_rtx,
25554                            stack_pointer_rtx,
25555                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25556
25557   if (IS_STACKALIGN (func_type))
25558     /* Restore the original stack pointer.  Before prologue, the stack was
25559        realigned and the original stack pointer saved in r0.  For details,
25560        see comment in arm_expand_prologue.  */
25561     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25562
25563   emit_jump_insn (simple_return_rtx);
25564 }
25565
25566 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25567    function is not a sibcall.  */
25568 void
25569 arm_expand_epilogue (bool really_return)
25570 {
25571   unsigned long func_type;
25572   unsigned long saved_regs_mask;
25573   int num_regs = 0;
25574   int i;
25575   int amount;
25576   arm_stack_offsets *offsets;
25577
25578   func_type = arm_current_func_type ();
25579
25580   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25581      let output_return_instruction take care of instruction emission if any.  */
25582   if (IS_NAKED (func_type)
25583       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25584     {
25585       if (really_return)
25586         emit_jump_insn (simple_return_rtx);
25587       return;
25588     }
25589
25590   /* If we are throwing an exception, then we really must be doing a
25591      return, so we can't tail-call.  */
25592   gcc_assert (!crtl->calls_eh_return || really_return);
25593
25594   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25595     {
25596       arm_expand_epilogue_apcs_frame (really_return);
25597       return;
25598     }
25599
25600   /* Get frame offsets for ARM.  */
25601   offsets = arm_get_frame_offsets ();
25602   saved_regs_mask = offsets->saved_regs_mask;
25603   num_regs = bit_count (saved_regs_mask);
25604
25605   if (frame_pointer_needed)
25606     {
25607       rtx_insn *insn;
25608       /* Restore stack pointer if necessary.  */
25609       if (TARGET_ARM)
25610         {
25611           /* In ARM mode, frame pointer points to first saved register.
25612              Restore stack pointer to last saved register.  */
25613           amount = offsets->frame - offsets->saved_regs;
25614
25615           /* Force out any pending memory operations that reference stacked data
25616              before stack de-allocation occurs.  */
25617           emit_insn (gen_blockage ());
25618           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25619                             hard_frame_pointer_rtx,
25620                             GEN_INT (amount)));
25621           arm_add_cfa_adjust_cfa_note (insn, amount,
25622                                        stack_pointer_rtx,
25623                                        hard_frame_pointer_rtx);
25624
25625           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25626              deleted.  */
25627           emit_insn (gen_force_register_use (stack_pointer_rtx));
25628         }
25629       else
25630         {
25631           /* In Thumb-2 mode, the frame pointer points to the last saved
25632              register.  */
25633           amount = offsets->locals_base - offsets->saved_regs;
25634           if (amount)
25635             {
25636               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25637                                 hard_frame_pointer_rtx,
25638                                 GEN_INT (amount)));
25639               arm_add_cfa_adjust_cfa_note (insn, amount,
25640                                            hard_frame_pointer_rtx,
25641                                            hard_frame_pointer_rtx);
25642             }
25643
25644           /* Force out any pending memory operations that reference stacked data
25645              before stack de-allocation occurs.  */
25646           emit_insn (gen_blockage ());
25647           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25648                                        hard_frame_pointer_rtx));
25649           arm_add_cfa_adjust_cfa_note (insn, 0,
25650                                        stack_pointer_rtx,
25651                                        hard_frame_pointer_rtx);
25652           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25653              deleted.  */
25654           emit_insn (gen_force_register_use (stack_pointer_rtx));
25655         }
25656     }
25657   else
25658     {
25659       /* Pop off outgoing args and local frame to adjust stack pointer to
25660          last saved register.  */
25661       amount = offsets->outgoing_args - offsets->saved_regs;
25662       if (amount)
25663         {
25664           rtx_insn *tmp;
25665           /* Force out any pending memory operations that reference stacked data
25666              before stack de-allocation occurs.  */
25667           emit_insn (gen_blockage ());
25668           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25669                                        stack_pointer_rtx,
25670                                        GEN_INT (amount)));
25671           arm_add_cfa_adjust_cfa_note (tmp, amount,
25672                                        stack_pointer_rtx, stack_pointer_rtx);
25673           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25674              not deleted.  */
25675           emit_insn (gen_force_register_use (stack_pointer_rtx));
25676         }
25677     }
25678
25679   if (TARGET_HARD_FLOAT)
25680     {
25681       /* Generate VFP register multi-pop.  */
25682       int end_reg = LAST_VFP_REGNUM + 1;
25683
25684       /* Scan the registers in reverse order.  We need to match
25685          any groupings made in the prologue and generate matching
25686          vldm operations.  The need to match groups is because,
25687          unlike pop, vldm can only do consecutive regs.  */
25688       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25689         /* Look for a case where a reg does not need restoring.  */
25690         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25691             && (!df_regs_ever_live_p (i + 1)
25692                 || call_used_regs[i + 1]))
25693           {
25694             /* Restore the regs discovered so far (from reg+2 to
25695                end_reg).  */
25696             if (end_reg > i + 2)
25697               arm_emit_vfp_multi_reg_pop (i + 2,
25698                                           (end_reg - (i + 2)) / 2,
25699                                           stack_pointer_rtx);
25700             end_reg = i;
25701           }
25702
25703       /* Restore the remaining regs that we have discovered (or possibly
25704          even all of them, if the conditional in the for loop never
25705          fired).  */
25706       if (end_reg > i + 2)
25707         arm_emit_vfp_multi_reg_pop (i + 2,
25708                                     (end_reg - (i + 2)) / 2,
25709                                     stack_pointer_rtx);
25710     }
25711
25712   if (TARGET_IWMMXT)
25713     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25714       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25715         {
25716           rtx_insn *insn;
25717           rtx addr = gen_rtx_MEM (V2SImode,
25718                                   gen_rtx_POST_INC (SImode,
25719                                                     stack_pointer_rtx));
25720           set_mem_alias_set (addr, get_frame_alias_set ());
25721           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25722           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25723                                              gen_rtx_REG (V2SImode, i),
25724                                              NULL_RTX);
25725           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25726                                        stack_pointer_rtx, stack_pointer_rtx);
25727         }
25728
25729   if (saved_regs_mask)
25730     {
25731       rtx insn;
25732       bool return_in_pc = false;
25733
25734       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25735           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25736           && !IS_CMSE_ENTRY (func_type)
25737           && !IS_STACKALIGN (func_type)
25738           && really_return
25739           && crtl->args.pretend_args_size == 0
25740           && saved_regs_mask & (1 << LR_REGNUM)
25741           && !crtl->calls_eh_return)
25742         {
25743           saved_regs_mask &= ~(1 << LR_REGNUM);
25744           saved_regs_mask |= (1 << PC_REGNUM);
25745           return_in_pc = true;
25746         }
25747
25748       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25749         {
25750           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25751             if (saved_regs_mask & (1 << i))
25752               {
25753                 rtx addr = gen_rtx_MEM (SImode,
25754                                         gen_rtx_POST_INC (SImode,
25755                                                           stack_pointer_rtx));
25756                 set_mem_alias_set (addr, get_frame_alias_set ());
25757
25758                 if (i == PC_REGNUM)
25759                   {
25760                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25761                     XVECEXP (insn, 0, 0) = ret_rtx;
25762                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25763                                                         addr);
25764                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25765                     insn = emit_jump_insn (insn);
25766                   }
25767                 else
25768                   {
25769                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25770                                                  addr));
25771                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25772                                                        gen_rtx_REG (SImode, i),
25773                                                        NULL_RTX);
25774                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25775                                                  stack_pointer_rtx,
25776                                                  stack_pointer_rtx);
25777                   }
25778               }
25779         }
25780       else
25781         {
25782           if (TARGET_LDRD
25783               && current_tune->prefer_ldrd_strd
25784               && !optimize_function_for_size_p (cfun))
25785             {
25786               if (TARGET_THUMB2)
25787                 thumb2_emit_ldrd_pop (saved_regs_mask);
25788               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25789                 arm_emit_ldrd_pop (saved_regs_mask);
25790               else
25791                 arm_emit_multi_reg_pop (saved_regs_mask);
25792             }
25793           else
25794             arm_emit_multi_reg_pop (saved_regs_mask);
25795         }
25796
25797       if (return_in_pc)
25798         return;
25799     }
25800
25801   amount
25802     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25803   if (amount)
25804     {
25805       int i, j;
25806       rtx dwarf = NULL_RTX;
25807       rtx_insn *tmp =
25808         emit_insn (gen_addsi3 (stack_pointer_rtx,
25809                                stack_pointer_rtx,
25810                                GEN_INT (amount)));
25811
25812       RTX_FRAME_RELATED_P (tmp) = 1;
25813
25814       if (cfun->machine->uses_anonymous_args)
25815         {
25816           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25817              pretend_args in stack.  */
25818           int num_regs = crtl->args.pretend_args_size / 4;
25819           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25820           for (j = 0, i = 0; j < num_regs; i++)
25821             if (saved_regs_mask & (1 << i))
25822               {
25823                 rtx reg = gen_rtx_REG (SImode, i);
25824                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25825                 j++;
25826               }
25827           REG_NOTES (tmp) = dwarf;
25828         }
25829       arm_add_cfa_adjust_cfa_note (tmp, amount,
25830                                    stack_pointer_rtx, stack_pointer_rtx);
25831     }
25832
25833     /* Clear all caller-saved regs that are not used to return.  */
25834     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25835       {
25836         /* CMSE_ENTRY always returns.  */
25837         gcc_assert (really_return);
25838         cmse_nonsecure_entry_clear_before_return ();
25839       }
25840
25841   if (!really_return)
25842     return;
25843
25844   if (crtl->calls_eh_return)
25845     emit_insn (gen_addsi3 (stack_pointer_rtx,
25846                            stack_pointer_rtx,
25847                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25848
25849   if (IS_STACKALIGN (func_type))
25850     /* Restore the original stack pointer.  Before prologue, the stack was
25851        realigned and the original stack pointer saved in r0.  For details,
25852        see comment in arm_expand_prologue.  */
25853     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25854
25855   emit_jump_insn (simple_return_rtx);
25856 }
25857
25858 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25859    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25860
25861 const char *
25862 thumb1_output_interwork (void)
25863 {
25864   const char * name;
25865   FILE *f = asm_out_file;
25866
25867   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25868   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25869               == SYMBOL_REF);
25870   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25871
25872   /* Generate code sequence to switch us into Thumb mode.  */
25873   /* The .code 32 directive has already been emitted by
25874      ASM_DECLARE_FUNCTION_NAME.  */
25875   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25876   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25877
25878   /* Generate a label, so that the debugger will notice the
25879      change in instruction sets.  This label is also used by
25880      the assembler to bypass the ARM code when this function
25881      is called from a Thumb encoded function elsewhere in the
25882      same file.  Hence the definition of STUB_NAME here must
25883      agree with the definition in gas/config/tc-arm.c.  */
25884
25885 #define STUB_NAME ".real_start_of"
25886
25887   fprintf (f, "\t.code\t16\n");
25888 #ifdef ARM_PE
25889   if (arm_dllexport_name_p (name))
25890     name = arm_strip_name_encoding (name);
25891 #endif
25892   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25893   fprintf (f, "\t.thumb_func\n");
25894   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25895
25896   return "";
25897 }
25898
25899 /* Handle the case of a double word load into a low register from
25900    a computed memory address.  The computed address may involve a
25901    register which is overwritten by the load.  */
25902 const char *
25903 thumb_load_double_from_address (rtx *operands)
25904 {
25905   rtx addr;
25906   rtx base;
25907   rtx offset;
25908   rtx arg1;
25909   rtx arg2;
25910
25911   gcc_assert (REG_P (operands[0]));
25912   gcc_assert (MEM_P (operands[1]));
25913
25914   /* Get the memory address.  */
25915   addr = XEXP (operands[1], 0);
25916
25917   /* Work out how the memory address is computed.  */
25918   switch (GET_CODE (addr))
25919     {
25920     case REG:
25921       operands[2] = adjust_address (operands[1], SImode, 4);
25922
25923       if (REGNO (operands[0]) == REGNO (addr))
25924         {
25925           output_asm_insn ("ldr\t%H0, %2", operands);
25926           output_asm_insn ("ldr\t%0, %1", operands);
25927         }
25928       else
25929         {
25930           output_asm_insn ("ldr\t%0, %1", operands);
25931           output_asm_insn ("ldr\t%H0, %2", operands);
25932         }
25933       break;
25934
25935     case CONST:
25936       /* Compute <address> + 4 for the high order load.  */
25937       operands[2] = adjust_address (operands[1], SImode, 4);
25938
25939       output_asm_insn ("ldr\t%0, %1", operands);
25940       output_asm_insn ("ldr\t%H0, %2", operands);
25941       break;
25942
25943     case PLUS:
25944       arg1   = XEXP (addr, 0);
25945       arg2   = XEXP (addr, 1);
25946
25947       if (CONSTANT_P (arg1))
25948         base = arg2, offset = arg1;
25949       else
25950         base = arg1, offset = arg2;
25951
25952       gcc_assert (REG_P (base));
25953
25954       /* Catch the case of <address> = <reg> + <reg> */
25955       if (REG_P (offset))
25956         {
25957           int reg_offset = REGNO (offset);
25958           int reg_base   = REGNO (base);
25959           int reg_dest   = REGNO (operands[0]);
25960
25961           /* Add the base and offset registers together into the
25962              higher destination register.  */
25963           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25964                        reg_dest + 1, reg_base, reg_offset);
25965
25966           /* Load the lower destination register from the address in
25967              the higher destination register.  */
25968           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25969                        reg_dest, reg_dest + 1);
25970
25971           /* Load the higher destination register from its own address
25972              plus 4.  */
25973           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25974                        reg_dest + 1, reg_dest + 1);
25975         }
25976       else
25977         {
25978           /* Compute <address> + 4 for the high order load.  */
25979           operands[2] = adjust_address (operands[1], SImode, 4);
25980
25981           /* If the computed address is held in the low order register
25982              then load the high order register first, otherwise always
25983              load the low order register first.  */
25984           if (REGNO (operands[0]) == REGNO (base))
25985             {
25986               output_asm_insn ("ldr\t%H0, %2", operands);
25987               output_asm_insn ("ldr\t%0, %1", operands);
25988             }
25989           else
25990             {
25991               output_asm_insn ("ldr\t%0, %1", operands);
25992               output_asm_insn ("ldr\t%H0, %2", operands);
25993             }
25994         }
25995       break;
25996
25997     case LABEL_REF:
25998       /* With no registers to worry about we can just load the value
25999          directly.  */
26000       operands[2] = adjust_address (operands[1], SImode, 4);
26001
26002       output_asm_insn ("ldr\t%H0, %2", operands);
26003       output_asm_insn ("ldr\t%0, %1", operands);
26004       break;
26005
26006     default:
26007       gcc_unreachable ();
26008     }
26009
26010   return "";
26011 }
26012
26013 const char *
26014 thumb_output_move_mem_multiple (int n, rtx *operands)
26015 {
26016   switch (n)
26017     {
26018     case 2:
26019       if (REGNO (operands[4]) > REGNO (operands[5]))
26020         std::swap (operands[4], operands[5]);
26021
26022       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26023       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26024       break;
26025
26026     case 3:
26027       if (REGNO (operands[4]) > REGNO (operands[5]))
26028         std::swap (operands[4], operands[5]);
26029       if (REGNO (operands[5]) > REGNO (operands[6]))
26030         std::swap (operands[5], operands[6]);
26031       if (REGNO (operands[4]) > REGNO (operands[5]))
26032         std::swap (operands[4], operands[5]);
26033
26034       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26035       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26036       break;
26037
26038     default:
26039       gcc_unreachable ();
26040     }
26041
26042   return "";
26043 }
26044
26045 /* Output a call-via instruction for thumb state.  */
26046 const char *
26047 thumb_call_via_reg (rtx reg)
26048 {
26049   int regno = REGNO (reg);
26050   rtx *labelp;
26051
26052   gcc_assert (regno < LR_REGNUM);
26053
26054   /* If we are in the normal text section we can use a single instance
26055      per compilation unit.  If we are doing function sections, then we need
26056      an entry per section, since we can't rely on reachability.  */
26057   if (in_section == text_section)
26058     {
26059       thumb_call_reg_needed = 1;
26060
26061       if (thumb_call_via_label[regno] == NULL)
26062         thumb_call_via_label[regno] = gen_label_rtx ();
26063       labelp = thumb_call_via_label + regno;
26064     }
26065   else
26066     {
26067       if (cfun->machine->call_via[regno] == NULL)
26068         cfun->machine->call_via[regno] = gen_label_rtx ();
26069       labelp = cfun->machine->call_via + regno;
26070     }
26071
26072   output_asm_insn ("bl\t%a0", labelp);
26073   return "";
26074 }
26075
26076 /* Routines for generating rtl.  */
26077 void
26078 thumb_expand_movmemqi (rtx *operands)
26079 {
26080   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26081   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26082   HOST_WIDE_INT len = INTVAL (operands[2]);
26083   HOST_WIDE_INT offset = 0;
26084
26085   while (len >= 12)
26086     {
26087       emit_insn (gen_movmem12b (out, in, out, in));
26088       len -= 12;
26089     }
26090
26091   if (len >= 8)
26092     {
26093       emit_insn (gen_movmem8b (out, in, out, in));
26094       len -= 8;
26095     }
26096
26097   if (len >= 4)
26098     {
26099       rtx reg = gen_reg_rtx (SImode);
26100       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26101       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26102       len -= 4;
26103       offset += 4;
26104     }
26105
26106   if (len >= 2)
26107     {
26108       rtx reg = gen_reg_rtx (HImode);
26109       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26110                                               plus_constant (Pmode, in,
26111                                                              offset))));
26112       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26113                                                                 offset)),
26114                             reg));
26115       len -= 2;
26116       offset += 2;
26117     }
26118
26119   if (len)
26120     {
26121       rtx reg = gen_reg_rtx (QImode);
26122       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26123                                               plus_constant (Pmode, in,
26124                                                              offset))));
26125       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26126                                                                 offset)),
26127                             reg));
26128     }
26129 }
26130
26131 void
26132 thumb_reload_out_hi (rtx *operands)
26133 {
26134   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26135 }
26136
26137 /* Return the length of a function name prefix
26138     that starts with the character 'c'.  */
26139 static int
26140 arm_get_strip_length (int c)
26141 {
26142   switch (c)
26143     {
26144     ARM_NAME_ENCODING_LENGTHS
26145       default: return 0;
26146     }
26147 }
26148
26149 /* Return a pointer to a function's name with any
26150    and all prefix encodings stripped from it.  */
26151 const char *
26152 arm_strip_name_encoding (const char *name)
26153 {
26154   int skip;
26155
26156   while ((skip = arm_get_strip_length (* name)))
26157     name += skip;
26158
26159   return name;
26160 }
26161
26162 /* If there is a '*' anywhere in the name's prefix, then
26163    emit the stripped name verbatim, otherwise prepend an
26164    underscore if leading underscores are being used.  */
26165 void
26166 arm_asm_output_labelref (FILE *stream, const char *name)
26167 {
26168   int skip;
26169   int verbatim = 0;
26170
26171   while ((skip = arm_get_strip_length (* name)))
26172     {
26173       verbatim |= (*name == '*');
26174       name += skip;
26175     }
26176
26177   if (verbatim)
26178     fputs (name, stream);
26179   else
26180     asm_fprintf (stream, "%U%s", name);
26181 }
26182
26183 /* This function is used to emit an EABI tag and its associated value.
26184    We emit the numerical value of the tag in case the assembler does not
26185    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26186    the tag name in a comment so that anyone reading the assembler output
26187    will know which tag is being set.
26188
26189    This function is not static because arm-c.c needs it too.  */
26190
26191 void
26192 arm_emit_eabi_attribute (const char *name, int num, int val)
26193 {
26194   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26195   if (flag_verbose_asm || flag_debug_asm)
26196     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26197   asm_fprintf (asm_out_file, "\n");
26198 }
26199
26200 /* This function is used to print CPU tuning information as comment
26201    in assembler file.  Pointers are not printed for now.  */
26202
26203 void
26204 arm_print_tune_info (void)
26205 {
26206   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26207   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26208                current_tune->constant_limit);
26209   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26210                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26211   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26212                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26213   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26214                "prefetch.l1_cache_size:\t%d\n",
26215                current_tune->prefetch.l1_cache_size);
26216   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26217                "prefetch.l1_cache_line_size:\t%d\n",
26218                current_tune->prefetch.l1_cache_line_size);
26219   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26220                "prefer_constant_pool:\t%d\n",
26221                (int) current_tune->prefer_constant_pool);
26222   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26223                "branch_cost:\t(s:speed, p:predictable)\n");
26224   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26225   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26226                current_tune->branch_cost (false, false));
26227   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26228                current_tune->branch_cost (false, true));
26229   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26230                current_tune->branch_cost (true, false));
26231   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26232                current_tune->branch_cost (true, true));
26233   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26234                "prefer_ldrd_strd:\t%d\n",
26235                (int) current_tune->prefer_ldrd_strd);
26236   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26237                "logical_op_non_short_circuit:\t[%d,%d]\n",
26238                (int) current_tune->logical_op_non_short_circuit_thumb,
26239                (int) current_tune->logical_op_non_short_circuit_arm);
26240   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26241                "prefer_neon_for_64bits:\t%d\n",
26242                (int) current_tune->prefer_neon_for_64bits);
26243   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26244                "disparage_flag_setting_t16_encodings:\t%d\n",
26245                (int) current_tune->disparage_flag_setting_t16_encodings);
26246   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26247                "string_ops_prefer_neon:\t%d\n",
26248                (int) current_tune->string_ops_prefer_neon);
26249   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26250                "max_insns_inline_memset:\t%d\n",
26251                current_tune->max_insns_inline_memset);
26252   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26253                current_tune->fusible_ops);
26254   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26255                (int) current_tune->sched_autopref);
26256 }
26257
26258 /* Print .arch and .arch_extension directives corresponding to the
26259    current architecture configuration.  */
26260 static void
26261 arm_print_asm_arch_directives ()
26262 {
26263   const arch_option *arch
26264     = arm_parse_arch_option_name (all_architectures, "-march",
26265                                   arm_active_target.arch_name);
26266   auto_sbitmap opt_bits (isa_num_bits);
26267
26268   gcc_assert (arch);
26269
26270   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26271   if (!arch->common.extensions)
26272     return;
26273
26274   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26275        opt->name != NULL;
26276        opt++)
26277     {
26278       if (!opt->remove)
26279         {
26280           arm_initialize_isa (opt_bits, opt->isa_bits);
26281
26282           /* If every feature bit of this option is set in the target
26283              ISA specification, print out the option name.  However,
26284              don't print anything if all the bits are part of the
26285              FPU specification.  */
26286           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26287               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26288             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26289         }
26290     }
26291 }
26292
26293 static void
26294 arm_file_start (void)
26295 {
26296   int val;
26297
26298   if (TARGET_BPABI)
26299     {
26300       /* We don't have a specified CPU.  Use the architecture to
26301          generate the tags.
26302
26303          Note: it might be better to do this unconditionally, then the
26304          assembler would not need to know about all new CPU names as
26305          they are added.  */
26306       if (!arm_active_target.core_name)
26307         {
26308           /* armv7ve doesn't support any extensions.  */
26309           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26310             {
26311               /* Keep backward compatability for assemblers
26312                  which don't support armv7ve.  */
26313               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26314               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26315               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26316               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26317               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26318             }
26319           else
26320             arm_print_asm_arch_directives ();
26321         }
26322       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26323         asm_fprintf (asm_out_file, "\t.arch %s\n",
26324                      arm_active_target.core_name + 8);
26325       else
26326         {
26327           const char* truncated_name
26328             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26329           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26330         }
26331
26332       if (print_tune_info)
26333         arm_print_tune_info ();
26334
26335       if (! TARGET_SOFT_FLOAT)
26336         {
26337           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26338             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26339
26340           if (TARGET_HARD_FLOAT_ABI)
26341             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26342         }
26343
26344       /* Some of these attributes only apply when the corresponding features
26345          are used.  However we don't have any easy way of figuring this out.
26346          Conservatively record the setting that would have been used.  */
26347
26348       if (flag_rounding_math)
26349         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26350
26351       if (!flag_unsafe_math_optimizations)
26352         {
26353           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26354           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26355         }
26356       if (flag_signaling_nans)
26357         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26358
26359       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26360                            flag_finite_math_only ? 1 : 3);
26361
26362       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26363       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26364       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26365                                flag_short_enums ? 1 : 2);
26366
26367       /* Tag_ABI_optimization_goals.  */
26368       if (optimize_size)
26369         val = 4;
26370       else if (optimize >= 2)
26371         val = 2;
26372       else if (optimize)
26373         val = 1;
26374       else
26375         val = 6;
26376       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26377
26378       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26379                                unaligned_access);
26380
26381       if (arm_fp16_format)
26382         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26383                              (int) arm_fp16_format);
26384
26385       if (arm_lang_output_object_attributes_hook)
26386         arm_lang_output_object_attributes_hook();
26387     }
26388
26389   default_file_start ();
26390 }
26391
26392 static void
26393 arm_file_end (void)
26394 {
26395   int regno;
26396
26397   if (NEED_INDICATE_EXEC_STACK)
26398     /* Add .note.GNU-stack.  */
26399     file_end_indicate_exec_stack ();
26400
26401   if (! thumb_call_reg_needed)
26402     return;
26403
26404   switch_to_section (text_section);
26405   asm_fprintf (asm_out_file, "\t.code 16\n");
26406   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26407
26408   for (regno = 0; regno < LR_REGNUM; regno++)
26409     {
26410       rtx label = thumb_call_via_label[regno];
26411
26412       if (label != 0)
26413         {
26414           targetm.asm_out.internal_label (asm_out_file, "L",
26415                                           CODE_LABEL_NUMBER (label));
26416           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26417         }
26418     }
26419 }
26420
26421 #ifndef ARM_PE
26422 /* Symbols in the text segment can be accessed without indirecting via the
26423    constant pool; it may take an extra binary operation, but this is still
26424    faster than indirecting via memory.  Don't do this when not optimizing,
26425    since we won't be calculating al of the offsets necessary to do this
26426    simplification.  */
26427
26428 static void
26429 arm_encode_section_info (tree decl, rtx rtl, int first)
26430 {
26431   if (optimize > 0 && TREE_CONSTANT (decl))
26432     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26433
26434   default_encode_section_info (decl, rtl, first);
26435 }
26436 #endif /* !ARM_PE */
26437
26438 static void
26439 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26440 {
26441   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26442       && !strcmp (prefix, "L"))
26443     {
26444       arm_ccfsm_state = 0;
26445       arm_target_insn = NULL;
26446     }
26447   default_internal_label (stream, prefix, labelno);
26448 }
26449
26450 /* Output code to add DELTA to the first argument, and then jump
26451    to FUNCTION.  Used for C++ multiple inheritance.  */
26452
26453 static void
26454 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26455                      HOST_WIDE_INT, tree function)
26456 {
26457   static int thunk_label = 0;
26458   char label[256];
26459   char labelpc[256];
26460   int mi_delta = delta;
26461   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26462   int shift = 0;
26463   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26464                     ? 1 : 0);
26465   if (mi_delta < 0)
26466     mi_delta = - mi_delta;
26467
26468   final_start_function (emit_barrier (), file, 1);
26469
26470   if (TARGET_THUMB1)
26471     {
26472       int labelno = thunk_label++;
26473       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26474       /* Thunks are entered in arm mode when available.  */
26475       if (TARGET_THUMB1_ONLY)
26476         {
26477           /* push r3 so we can use it as a temporary.  */
26478           /* TODO: Omit this save if r3 is not used.  */
26479           fputs ("\tpush {r3}\n", file);
26480           fputs ("\tldr\tr3, ", file);
26481         }
26482       else
26483         {
26484           fputs ("\tldr\tr12, ", file);
26485         }
26486       assemble_name (file, label);
26487       fputc ('\n', file);
26488       if (flag_pic)
26489         {
26490           /* If we are generating PIC, the ldr instruction below loads
26491              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26492              the address of the add + 8, so we have:
26493
26494              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26495                  = target + 1.
26496
26497              Note that we have "+ 1" because some versions of GNU ld
26498              don't set the low bit of the result for R_ARM_REL32
26499              relocations against thumb function symbols.
26500              On ARMv6M this is +4, not +8.  */
26501           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26502           assemble_name (file, labelpc);
26503           fputs (":\n", file);
26504           if (TARGET_THUMB1_ONLY)
26505             {
26506               /* This is 2 insns after the start of the thunk, so we know it
26507                  is 4-byte aligned.  */
26508               fputs ("\tadd\tr3, pc, r3\n", file);
26509               fputs ("\tmov r12, r3\n", file);
26510             }
26511           else
26512             fputs ("\tadd\tr12, pc, r12\n", file);
26513         }
26514       else if (TARGET_THUMB1_ONLY)
26515         fputs ("\tmov r12, r3\n", file);
26516     }
26517   if (TARGET_THUMB1_ONLY)
26518     {
26519       if (mi_delta > 255)
26520         {
26521           fputs ("\tldr\tr3, ", file);
26522           assemble_name (file, label);
26523           fputs ("+4\n", file);
26524           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26525                        mi_op, this_regno, this_regno);
26526         }
26527       else if (mi_delta != 0)
26528         {
26529           /* Thumb1 unified syntax requires s suffix in instruction name when
26530              one of the operands is immediate.  */
26531           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26532                        mi_op, this_regno, this_regno,
26533                        mi_delta);
26534         }
26535     }
26536   else
26537     {
26538       /* TODO: Use movw/movt for large constants when available.  */
26539       while (mi_delta != 0)
26540         {
26541           if ((mi_delta & (3 << shift)) == 0)
26542             shift += 2;
26543           else
26544             {
26545               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26546                            mi_op, this_regno, this_regno,
26547                            mi_delta & (0xff << shift));
26548               mi_delta &= ~(0xff << shift);
26549               shift += 8;
26550             }
26551         }
26552     }
26553   if (TARGET_THUMB1)
26554     {
26555       if (TARGET_THUMB1_ONLY)
26556         fputs ("\tpop\t{r3}\n", file);
26557
26558       fprintf (file, "\tbx\tr12\n");
26559       ASM_OUTPUT_ALIGN (file, 2);
26560       assemble_name (file, label);
26561       fputs (":\n", file);
26562       if (flag_pic)
26563         {
26564           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26565           rtx tem = XEXP (DECL_RTL (function), 0);
26566           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26567              pipeline offset is four rather than eight.  Adjust the offset
26568              accordingly.  */
26569           tem = plus_constant (GET_MODE (tem), tem,
26570                                TARGET_THUMB1_ONLY ? -3 : -7);
26571           tem = gen_rtx_MINUS (GET_MODE (tem),
26572                                tem,
26573                                gen_rtx_SYMBOL_REF (Pmode,
26574                                                    ggc_strdup (labelpc)));
26575           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26576         }
26577       else
26578         /* Output ".word .LTHUNKn".  */
26579         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26580
26581       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26582         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26583     }
26584   else
26585     {
26586       fputs ("\tb\t", file);
26587       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26588       if (NEED_PLT_RELOC)
26589         fputs ("(PLT)", file);
26590       fputc ('\n', file);
26591     }
26592
26593   final_end_function ();
26594 }
26595
26596 /* MI thunk handling for TARGET_32BIT.  */
26597
26598 static void
26599 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26600                        HOST_WIDE_INT vcall_offset, tree function)
26601 {
26602   /* On ARM, this_regno is R0 or R1 depending on
26603      whether the function returns an aggregate or not.
26604   */
26605   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26606                                        function)
26607                     ? R1_REGNUM : R0_REGNUM);
26608
26609   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26610   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26611   reload_completed = 1;
26612   emit_note (NOTE_INSN_PROLOGUE_END);
26613
26614   /* Add DELTA to THIS_RTX.  */
26615   if (delta != 0)
26616     arm_split_constant (PLUS, Pmode, NULL_RTX,
26617                         delta, this_rtx, this_rtx, false);
26618
26619   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26620   if (vcall_offset != 0)
26621     {
26622       /* Load *THIS_RTX.  */
26623       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26624       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26625       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26626                           false);
26627       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26628       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26629       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26630     }
26631
26632   /* Generate a tail call to the target function.  */
26633   if (!TREE_USED (function))
26634     {
26635       assemble_external (function);
26636       TREE_USED (function) = 1;
26637     }
26638   rtx funexp = XEXP (DECL_RTL (function), 0);
26639   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26640   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26641   SIBLING_CALL_P (insn) = 1;
26642
26643   insn = get_insns ();
26644   shorten_branches (insn);
26645   final_start_function (insn, file, 1);
26646   final (insn, file, 1);
26647   final_end_function ();
26648
26649   /* Stop pretending this is a post-reload pass.  */
26650   reload_completed = 0;
26651 }
26652
26653 /* Output code to add DELTA to the first argument, and then jump
26654    to FUNCTION.  Used for C++ multiple inheritance.  */
26655
26656 static void
26657 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26658                      HOST_WIDE_INT vcall_offset, tree function)
26659 {
26660   if (TARGET_32BIT)
26661     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26662   else
26663     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26664 }
26665
26666 int
26667 arm_emit_vector_const (FILE *file, rtx x)
26668 {
26669   int i;
26670   const char * pattern;
26671
26672   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26673
26674   switch (GET_MODE (x))
26675     {
26676     case E_V2SImode: pattern = "%08x"; break;
26677     case E_V4HImode: pattern = "%04x"; break;
26678     case E_V8QImode: pattern = "%02x"; break;
26679     default:       gcc_unreachable ();
26680     }
26681
26682   fprintf (file, "0x");
26683   for (i = CONST_VECTOR_NUNITS (x); i--;)
26684     {
26685       rtx element;
26686
26687       element = CONST_VECTOR_ELT (x, i);
26688       fprintf (file, pattern, INTVAL (element));
26689     }
26690
26691   return 1;
26692 }
26693
26694 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26695    HFmode constant pool entries are actually loaded with ldr.  */
26696 void
26697 arm_emit_fp16_const (rtx c)
26698 {
26699   long bits;
26700
26701   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26702   if (WORDS_BIG_ENDIAN)
26703     assemble_zeros (2);
26704   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26705   if (!WORDS_BIG_ENDIAN)
26706     assemble_zeros (2);
26707 }
26708
26709 const char *
26710 arm_output_load_gr (rtx *operands)
26711 {
26712   rtx reg;
26713   rtx offset;
26714   rtx wcgr;
26715   rtx sum;
26716
26717   if (!MEM_P (operands [1])
26718       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26719       || !REG_P (reg = XEXP (sum, 0))
26720       || !CONST_INT_P (offset = XEXP (sum, 1))
26721       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26722     return "wldrw%?\t%0, %1";
26723
26724   /* Fix up an out-of-range load of a GR register.  */
26725   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26726   wcgr = operands[0];
26727   operands[0] = reg;
26728   output_asm_insn ("ldr%?\t%0, %1", operands);
26729
26730   operands[0] = wcgr;
26731   operands[1] = reg;
26732   output_asm_insn ("tmcr%?\t%0, %1", operands);
26733   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26734
26735   return "";
26736 }
26737
26738 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26739
26740    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26741    named arg and all anonymous args onto the stack.
26742    XXX I know the prologue shouldn't be pushing registers, but it is faster
26743    that way.  */
26744
26745 static void
26746 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26747                             machine_mode mode,
26748                             tree type,
26749                             int *pretend_size,
26750                             int second_time ATTRIBUTE_UNUSED)
26751 {
26752   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26753   int nregs;
26754
26755   cfun->machine->uses_anonymous_args = 1;
26756   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26757     {
26758       nregs = pcum->aapcs_ncrn;
26759       if (nregs & 1)
26760         {
26761           int res = arm_needs_doubleword_align (mode, type);
26762           if (res < 0 && warn_psabi)
26763             inform (input_location, "parameter passing for argument of "
26764                     "type %qT changed in GCC 7.1", type);
26765           else if (res > 0)
26766             nregs++;
26767         }
26768     }
26769   else
26770     nregs = pcum->nregs;
26771
26772   if (nregs < NUM_ARG_REGS)
26773     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26774 }
26775
26776 /* We can't rely on the caller doing the proper promotion when
26777    using APCS or ATPCS.  */
26778
26779 static bool
26780 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26781 {
26782     return !TARGET_AAPCS_BASED;
26783 }
26784
26785 static machine_mode
26786 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26787                            machine_mode mode,
26788                            int *punsignedp ATTRIBUTE_UNUSED,
26789                            const_tree fntype ATTRIBUTE_UNUSED,
26790                            int for_return ATTRIBUTE_UNUSED)
26791 {
26792   if (GET_MODE_CLASS (mode) == MODE_INT
26793       && GET_MODE_SIZE (mode) < 4)
26794     return SImode;
26795
26796   return mode;
26797 }
26798
26799
26800 static bool
26801 arm_default_short_enums (void)
26802 {
26803   return ARM_DEFAULT_SHORT_ENUMS;
26804 }
26805
26806
26807 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26808
26809 static bool
26810 arm_align_anon_bitfield (void)
26811 {
26812   return TARGET_AAPCS_BASED;
26813 }
26814
26815
26816 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26817
26818 static tree
26819 arm_cxx_guard_type (void)
26820 {
26821   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26822 }
26823
26824
26825 /* The EABI says test the least significant bit of a guard variable.  */
26826
26827 static bool
26828 arm_cxx_guard_mask_bit (void)
26829 {
26830   return TARGET_AAPCS_BASED;
26831 }
26832
26833
26834 /* The EABI specifies that all array cookies are 8 bytes long.  */
26835
26836 static tree
26837 arm_get_cookie_size (tree type)
26838 {
26839   tree size;
26840
26841   if (!TARGET_AAPCS_BASED)
26842     return default_cxx_get_cookie_size (type);
26843
26844   size = build_int_cst (sizetype, 8);
26845   return size;
26846 }
26847
26848
26849 /* The EABI says that array cookies should also contain the element size.  */
26850
26851 static bool
26852 arm_cookie_has_size (void)
26853 {
26854   return TARGET_AAPCS_BASED;
26855 }
26856
26857
26858 /* The EABI says constructors and destructors should return a pointer to
26859    the object constructed/destroyed.  */
26860
26861 static bool
26862 arm_cxx_cdtor_returns_this (void)
26863 {
26864   return TARGET_AAPCS_BASED;
26865 }
26866
26867 /* The EABI says that an inline function may never be the key
26868    method.  */
26869
26870 static bool
26871 arm_cxx_key_method_may_be_inline (void)
26872 {
26873   return !TARGET_AAPCS_BASED;
26874 }
26875
26876 static void
26877 arm_cxx_determine_class_data_visibility (tree decl)
26878 {
26879   if (!TARGET_AAPCS_BASED
26880       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26881     return;
26882
26883   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26884      is exported.  However, on systems without dynamic vague linkage,
26885      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26886   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26887     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26888   else
26889     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26890   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26891 }
26892
26893 static bool
26894 arm_cxx_class_data_always_comdat (void)
26895 {
26896   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26897      vague linkage if the class has no key function.  */
26898   return !TARGET_AAPCS_BASED;
26899 }
26900
26901
26902 /* The EABI says __aeabi_atexit should be used to register static
26903    destructors.  */
26904
26905 static bool
26906 arm_cxx_use_aeabi_atexit (void)
26907 {
26908   return TARGET_AAPCS_BASED;
26909 }
26910
26911
26912 void
26913 arm_set_return_address (rtx source, rtx scratch)
26914 {
26915   arm_stack_offsets *offsets;
26916   HOST_WIDE_INT delta;
26917   rtx addr, mem;
26918   unsigned long saved_regs;
26919
26920   offsets = arm_get_frame_offsets ();
26921   saved_regs = offsets->saved_regs_mask;
26922
26923   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26924     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26925   else
26926     {
26927       if (frame_pointer_needed)
26928         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26929       else
26930         {
26931           /* LR will be the first saved register.  */
26932           delta = offsets->outgoing_args - (offsets->frame + 4);
26933
26934
26935           if (delta >= 4096)
26936             {
26937               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26938                                      GEN_INT (delta & ~4095)));
26939               addr = scratch;
26940               delta &= 4095;
26941             }
26942           else
26943             addr = stack_pointer_rtx;
26944
26945           addr = plus_constant (Pmode, addr, delta);
26946         }
26947
26948       /* The store needs to be marked to prevent DSE from deleting
26949          it as dead if it is based on fp.  */
26950       mem = gen_frame_mem (Pmode, addr);
26951       MEM_VOLATILE_P (mem) = true;
26952       emit_move_insn (mem, source);
26953     }
26954 }
26955
26956
26957 void
26958 thumb_set_return_address (rtx source, rtx scratch)
26959 {
26960   arm_stack_offsets *offsets;
26961   HOST_WIDE_INT delta;
26962   HOST_WIDE_INT limit;
26963   int reg;
26964   rtx addr, mem;
26965   unsigned long mask;
26966
26967   emit_use (source);
26968
26969   offsets = arm_get_frame_offsets ();
26970   mask = offsets->saved_regs_mask;
26971   if (mask & (1 << LR_REGNUM))
26972     {
26973       limit = 1024;
26974       /* Find the saved regs.  */
26975       if (frame_pointer_needed)
26976         {
26977           delta = offsets->soft_frame - offsets->saved_args;
26978           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26979           if (TARGET_THUMB1)
26980             limit = 128;
26981         }
26982       else
26983         {
26984           delta = offsets->outgoing_args - offsets->saved_args;
26985           reg = SP_REGNUM;
26986         }
26987       /* Allow for the stack frame.  */
26988       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26989         delta -= 16;
26990       /* The link register is always the first saved register.  */
26991       delta -= 4;
26992
26993       /* Construct the address.  */
26994       addr = gen_rtx_REG (SImode, reg);
26995       if (delta > limit)
26996         {
26997           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26998           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26999           addr = scratch;
27000         }
27001       else
27002         addr = plus_constant (Pmode, addr, delta);
27003
27004       /* The store needs to be marked to prevent DSE from deleting
27005          it as dead if it is based on fp.  */
27006       mem = gen_frame_mem (Pmode, addr);
27007       MEM_VOLATILE_P (mem) = true;
27008       emit_move_insn (mem, source);
27009     }
27010   else
27011     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27012 }
27013
27014 /* Implements target hook vector_mode_supported_p.  */
27015 bool
27016 arm_vector_mode_supported_p (machine_mode mode)
27017 {
27018   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27019   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27020       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27021       || mode == V2DImode || mode == V8HFmode))
27022     return true;
27023
27024   if ((TARGET_NEON || TARGET_IWMMXT)
27025       && ((mode == V2SImode)
27026           || (mode == V4HImode)
27027           || (mode == V8QImode)))
27028     return true;
27029
27030   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27031       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27032       || mode == V2HAmode))
27033     return true;
27034
27035   return false;
27036 }
27037
27038 /* Implements target hook array_mode_supported_p.  */
27039
27040 static bool
27041 arm_array_mode_supported_p (machine_mode mode,
27042                             unsigned HOST_WIDE_INT nelems)
27043 {
27044   if (TARGET_NEON
27045       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27046       && (nelems >= 2 && nelems <= 4))
27047     return true;
27048
27049   return false;
27050 }
27051
27052 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27053    registers when autovectorizing for Neon, at least until multiple vector
27054    widths are supported properly by the middle-end.  */
27055
27056 static machine_mode
27057 arm_preferred_simd_mode (scalar_mode mode)
27058 {
27059   if (TARGET_NEON)
27060     switch (mode)
27061       {
27062       case E_SFmode:
27063         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27064       case E_SImode:
27065         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27066       case E_HImode:
27067         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27068       case E_QImode:
27069         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27070       case E_DImode:
27071         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27072           return V2DImode;
27073         break;
27074
27075       default:;
27076       }
27077
27078   if (TARGET_REALLY_IWMMXT)
27079     switch (mode)
27080       {
27081       case E_SImode:
27082         return V2SImode;
27083       case E_HImode:
27084         return V4HImode;
27085       case E_QImode:
27086         return V8QImode;
27087
27088       default:;
27089       }
27090
27091   return word_mode;
27092 }
27093
27094 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27095
27096    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27097    using r0-r4 for function arguments, r7 for the stack frame and don't have
27098    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27099    potentially problematic instructions accept high registers so this is not
27100    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27101    that require many low registers.  */
27102 static bool
27103 arm_class_likely_spilled_p (reg_class_t rclass)
27104 {
27105   if ((TARGET_THUMB1 && rclass == LO_REGS)
27106       || rclass  == CC_REG)
27107     return true;
27108
27109   return false;
27110 }
27111
27112 /* Implements target hook small_register_classes_for_mode_p.  */
27113 bool
27114 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27115 {
27116   return TARGET_THUMB1;
27117 }
27118
27119 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27120    ARM insns and therefore guarantee that the shift count is modulo 256.
27121    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27122    guarantee no particular behavior for out-of-range counts.  */
27123
27124 static unsigned HOST_WIDE_INT
27125 arm_shift_truncation_mask (machine_mode mode)
27126 {
27127   return mode == SImode ? 255 : 0;
27128 }
27129
27130
27131 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27132
27133 unsigned int
27134 arm_dbx_register_number (unsigned int regno)
27135 {
27136   if (regno < 16)
27137     return regno;
27138
27139   if (IS_VFP_REGNUM (regno))
27140     {
27141       /* See comment in arm_dwarf_register_span.  */
27142       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27143         return 64 + regno - FIRST_VFP_REGNUM;
27144       else
27145         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27146     }
27147
27148   if (IS_IWMMXT_GR_REGNUM (regno))
27149     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27150
27151   if (IS_IWMMXT_REGNUM (regno))
27152     return 112 + regno - FIRST_IWMMXT_REGNUM;
27153
27154   return DWARF_FRAME_REGISTERS;
27155 }
27156
27157 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27158    GCC models tham as 64 32-bit registers, so we need to describe this to
27159    the DWARF generation code.  Other registers can use the default.  */
27160 static rtx
27161 arm_dwarf_register_span (rtx rtl)
27162 {
27163   machine_mode mode;
27164   unsigned regno;
27165   rtx parts[16];
27166   int nregs;
27167   int i;
27168
27169   regno = REGNO (rtl);
27170   if (!IS_VFP_REGNUM (regno))
27171     return NULL_RTX;
27172
27173   /* XXX FIXME: The EABI defines two VFP register ranges:
27174         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27175         256-287: D0-D31
27176      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27177      corresponding D register.  Until GDB supports this, we shall use the
27178      legacy encodings.  We also use these encodings for D0-D15 for
27179      compatibility with older debuggers.  */
27180   mode = GET_MODE (rtl);
27181   if (GET_MODE_SIZE (mode) < 8)
27182     return NULL_RTX;
27183
27184   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27185     {
27186       nregs = GET_MODE_SIZE (mode) / 4;
27187       for (i = 0; i < nregs; i += 2)
27188         if (TARGET_BIG_END)
27189           {
27190             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27191             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27192           }
27193         else
27194           {
27195             parts[i] = gen_rtx_REG (SImode, regno + i);
27196             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27197           }
27198     }
27199   else
27200     {
27201       nregs = GET_MODE_SIZE (mode) / 8;
27202       for (i = 0; i < nregs; i++)
27203         parts[i] = gen_rtx_REG (DImode, regno + i);
27204     }
27205
27206   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27207 }
27208
27209 #if ARM_UNWIND_INFO
27210 /* Emit unwind directives for a store-multiple instruction or stack pointer
27211    push during alignment.
27212    These should only ever be generated by the function prologue code, so
27213    expect them to have a particular form.
27214    The store-multiple instruction sometimes pushes pc as the last register,
27215    although it should not be tracked into unwind information, or for -Os
27216    sometimes pushes some dummy registers before first register that needs
27217    to be tracked in unwind information; such dummy registers are there just
27218    to avoid separate stack adjustment, and will not be restored in the
27219    epilogue.  */
27220
27221 static void
27222 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27223 {
27224   int i;
27225   HOST_WIDE_INT offset;
27226   HOST_WIDE_INT nregs;
27227   int reg_size;
27228   unsigned reg;
27229   unsigned lastreg;
27230   unsigned padfirst = 0, padlast = 0;
27231   rtx e;
27232
27233   e = XVECEXP (p, 0, 0);
27234   gcc_assert (GET_CODE (e) == SET);
27235
27236   /* First insn will adjust the stack pointer.  */
27237   gcc_assert (GET_CODE (e) == SET
27238               && REG_P (SET_DEST (e))
27239               && REGNO (SET_DEST (e)) == SP_REGNUM
27240               && GET_CODE (SET_SRC (e)) == PLUS);
27241
27242   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27243   nregs = XVECLEN (p, 0) - 1;
27244   gcc_assert (nregs);
27245
27246   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27247   if (reg < 16)
27248     {
27249       /* For -Os dummy registers can be pushed at the beginning to
27250          avoid separate stack pointer adjustment.  */
27251       e = XVECEXP (p, 0, 1);
27252       e = XEXP (SET_DEST (e), 0);
27253       if (GET_CODE (e) == PLUS)
27254         padfirst = INTVAL (XEXP (e, 1));
27255       gcc_assert (padfirst == 0 || optimize_size);
27256       /* The function prologue may also push pc, but not annotate it as it is
27257          never restored.  We turn this into a stack pointer adjustment.  */
27258       e = XVECEXP (p, 0, nregs);
27259       e = XEXP (SET_DEST (e), 0);
27260       if (GET_CODE (e) == PLUS)
27261         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27262       else
27263         padlast = offset - 4;
27264       gcc_assert (padlast == 0 || padlast == 4);
27265       if (padlast == 4)
27266         fprintf (asm_out_file, "\t.pad #4\n");
27267       reg_size = 4;
27268       fprintf (asm_out_file, "\t.save {");
27269     }
27270   else if (IS_VFP_REGNUM (reg))
27271     {
27272       reg_size = 8;
27273       fprintf (asm_out_file, "\t.vsave {");
27274     }
27275   else
27276     /* Unknown register type.  */
27277     gcc_unreachable ();
27278
27279   /* If the stack increment doesn't match the size of the saved registers,
27280      something has gone horribly wrong.  */
27281   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27282
27283   offset = padfirst;
27284   lastreg = 0;
27285   /* The remaining insns will describe the stores.  */
27286   for (i = 1; i <= nregs; i++)
27287     {
27288       /* Expect (set (mem <addr>) (reg)).
27289          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27290       e = XVECEXP (p, 0, i);
27291       gcc_assert (GET_CODE (e) == SET
27292                   && MEM_P (SET_DEST (e))
27293                   && REG_P (SET_SRC (e)));
27294
27295       reg = REGNO (SET_SRC (e));
27296       gcc_assert (reg >= lastreg);
27297
27298       if (i != 1)
27299         fprintf (asm_out_file, ", ");
27300       /* We can't use %r for vfp because we need to use the
27301          double precision register names.  */
27302       if (IS_VFP_REGNUM (reg))
27303         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27304       else
27305         asm_fprintf (asm_out_file, "%r", reg);
27306
27307       if (flag_checking)
27308         {
27309           /* Check that the addresses are consecutive.  */
27310           e = XEXP (SET_DEST (e), 0);
27311           if (GET_CODE (e) == PLUS)
27312             gcc_assert (REG_P (XEXP (e, 0))
27313                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27314                         && CONST_INT_P (XEXP (e, 1))
27315                         && offset == INTVAL (XEXP (e, 1)));
27316           else
27317             gcc_assert (i == 1
27318                         && REG_P (e)
27319                         && REGNO (e) == SP_REGNUM);
27320           offset += reg_size;
27321         }
27322     }
27323   fprintf (asm_out_file, "}\n");
27324   if (padfirst)
27325     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27326 }
27327
27328 /*  Emit unwind directives for a SET.  */
27329
27330 static void
27331 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27332 {
27333   rtx e0;
27334   rtx e1;
27335   unsigned reg;
27336
27337   e0 = XEXP (p, 0);
27338   e1 = XEXP (p, 1);
27339   switch (GET_CODE (e0))
27340     {
27341     case MEM:
27342       /* Pushing a single register.  */
27343       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27344           || !REG_P (XEXP (XEXP (e0, 0), 0))
27345           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27346         abort ();
27347
27348       asm_fprintf (asm_out_file, "\t.save ");
27349       if (IS_VFP_REGNUM (REGNO (e1)))
27350         asm_fprintf(asm_out_file, "{d%d}\n",
27351                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27352       else
27353         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27354       break;
27355
27356     case REG:
27357       if (REGNO (e0) == SP_REGNUM)
27358         {
27359           /* A stack increment.  */
27360           if (GET_CODE (e1) != PLUS
27361               || !REG_P (XEXP (e1, 0))
27362               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27363               || !CONST_INT_P (XEXP (e1, 1)))
27364             abort ();
27365
27366           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27367                        -INTVAL (XEXP (e1, 1)));
27368         }
27369       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27370         {
27371           HOST_WIDE_INT offset;
27372
27373           if (GET_CODE (e1) == PLUS)
27374             {
27375               if (!REG_P (XEXP (e1, 0))
27376                   || !CONST_INT_P (XEXP (e1, 1)))
27377                 abort ();
27378               reg = REGNO (XEXP (e1, 0));
27379               offset = INTVAL (XEXP (e1, 1));
27380               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27381                            HARD_FRAME_POINTER_REGNUM, reg,
27382                            offset);
27383             }
27384           else if (REG_P (e1))
27385             {
27386               reg = REGNO (e1);
27387               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27388                            HARD_FRAME_POINTER_REGNUM, reg);
27389             }
27390           else
27391             abort ();
27392         }
27393       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27394         {
27395           /* Move from sp to reg.  */
27396           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27397         }
27398      else if (GET_CODE (e1) == PLUS
27399               && REG_P (XEXP (e1, 0))
27400               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27401               && CONST_INT_P (XEXP (e1, 1)))
27402         {
27403           /* Set reg to offset from sp.  */
27404           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27405                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27406         }
27407       else
27408         abort ();
27409       break;
27410
27411     default:
27412       abort ();
27413     }
27414 }
27415
27416
27417 /* Emit unwind directives for the given insn.  */
27418
27419 static void
27420 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27421 {
27422   rtx note, pat;
27423   bool handled_one = false;
27424
27425   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27426     return;
27427
27428   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27429       && (TREE_NOTHROW (current_function_decl)
27430           || crtl->all_throwers_are_sibcalls))
27431     return;
27432
27433   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27434     return;
27435
27436   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27437     {
27438       switch (REG_NOTE_KIND (note))
27439         {
27440         case REG_FRAME_RELATED_EXPR:
27441           pat = XEXP (note, 0);
27442           goto found;
27443
27444         case REG_CFA_REGISTER:
27445           pat = XEXP (note, 0);
27446           if (pat == NULL)
27447             {
27448               pat = PATTERN (insn);
27449               if (GET_CODE (pat) == PARALLEL)
27450                 pat = XVECEXP (pat, 0, 0);
27451             }
27452
27453           /* Only emitted for IS_STACKALIGN re-alignment.  */
27454           {
27455             rtx dest, src;
27456             unsigned reg;
27457
27458             src = SET_SRC (pat);
27459             dest = SET_DEST (pat);
27460
27461             gcc_assert (src == stack_pointer_rtx);
27462             reg = REGNO (dest);
27463             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27464                          reg + 0x90, reg);
27465           }
27466           handled_one = true;
27467           break;
27468
27469         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27470            to get correct dwarf information for shrink-wrap.  We should not
27471            emit unwind information for it because these are used either for
27472            pretend arguments or notes to adjust sp and restore registers from
27473            stack.  */
27474         case REG_CFA_DEF_CFA:
27475         case REG_CFA_ADJUST_CFA:
27476         case REG_CFA_RESTORE:
27477           return;
27478
27479         case REG_CFA_EXPRESSION:
27480         case REG_CFA_OFFSET:
27481           /* ??? Only handling here what we actually emit.  */
27482           gcc_unreachable ();
27483
27484         default:
27485           break;
27486         }
27487     }
27488   if (handled_one)
27489     return;
27490   pat = PATTERN (insn);
27491  found:
27492
27493   switch (GET_CODE (pat))
27494     {
27495     case SET:
27496       arm_unwind_emit_set (asm_out_file, pat);
27497       break;
27498
27499     case SEQUENCE:
27500       /* Store multiple.  */
27501       arm_unwind_emit_sequence (asm_out_file, pat);
27502       break;
27503
27504     default:
27505       abort();
27506     }
27507 }
27508
27509
27510 /* Output a reference from a function exception table to the type_info
27511    object X.  The EABI specifies that the symbol should be relocated by
27512    an R_ARM_TARGET2 relocation.  */
27513
27514 static bool
27515 arm_output_ttype (rtx x)
27516 {
27517   fputs ("\t.word\t", asm_out_file);
27518   output_addr_const (asm_out_file, x);
27519   /* Use special relocations for symbol references.  */
27520   if (!CONST_INT_P (x))
27521     fputs ("(TARGET2)", asm_out_file);
27522   fputc ('\n', asm_out_file);
27523
27524   return TRUE;
27525 }
27526
27527 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27528
27529 static void
27530 arm_asm_emit_except_personality (rtx personality)
27531 {
27532   fputs ("\t.personality\t", asm_out_file);
27533   output_addr_const (asm_out_file, personality);
27534   fputc ('\n', asm_out_file);
27535 }
27536 #endif /* ARM_UNWIND_INFO */
27537
27538 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27539
27540 static void
27541 arm_asm_init_sections (void)
27542 {
27543 #if ARM_UNWIND_INFO
27544   exception_section = get_unnamed_section (0, output_section_asm_op,
27545                                            "\t.handlerdata");
27546 #endif /* ARM_UNWIND_INFO */
27547
27548 #ifdef OBJECT_FORMAT_ELF
27549   if (target_pure_code)
27550     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27551 #endif
27552 }
27553
27554 /* Output unwind directives for the start/end of a function.  */
27555
27556 void
27557 arm_output_fn_unwind (FILE * f, bool prologue)
27558 {
27559   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27560     return;
27561
27562   if (prologue)
27563     fputs ("\t.fnstart\n", f);
27564   else
27565     {
27566       /* If this function will never be unwound, then mark it as such.
27567          The came condition is used in arm_unwind_emit to suppress
27568          the frame annotations.  */
27569       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27570           && (TREE_NOTHROW (current_function_decl)
27571               || crtl->all_throwers_are_sibcalls))
27572         fputs("\t.cantunwind\n", f);
27573
27574       fputs ("\t.fnend\n", f);
27575     }
27576 }
27577
27578 static bool
27579 arm_emit_tls_decoration (FILE *fp, rtx x)
27580 {
27581   enum tls_reloc reloc;
27582   rtx val;
27583
27584   val = XVECEXP (x, 0, 0);
27585   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27586
27587   output_addr_const (fp, val);
27588
27589   switch (reloc)
27590     {
27591     case TLS_GD32:
27592       fputs ("(tlsgd)", fp);
27593       break;
27594     case TLS_LDM32:
27595       fputs ("(tlsldm)", fp);
27596       break;
27597     case TLS_LDO32:
27598       fputs ("(tlsldo)", fp);
27599       break;
27600     case TLS_IE32:
27601       fputs ("(gottpoff)", fp);
27602       break;
27603     case TLS_LE32:
27604       fputs ("(tpoff)", fp);
27605       break;
27606     case TLS_DESCSEQ:
27607       fputs ("(tlsdesc)", fp);
27608       break;
27609     default:
27610       gcc_unreachable ();
27611     }
27612
27613   switch (reloc)
27614     {
27615     case TLS_GD32:
27616     case TLS_LDM32:
27617     case TLS_IE32:
27618     case TLS_DESCSEQ:
27619       fputs (" + (. - ", fp);
27620       output_addr_const (fp, XVECEXP (x, 0, 2));
27621       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27622       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27623       output_addr_const (fp, XVECEXP (x, 0, 3));
27624       fputc (')', fp);
27625       break;
27626     default:
27627       break;
27628     }
27629
27630   return TRUE;
27631 }
27632
27633 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27634
27635 static void
27636 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27637 {
27638   gcc_assert (size == 4);
27639   fputs ("\t.word\t", file);
27640   output_addr_const (file, x);
27641   fputs ("(tlsldo)", file);
27642 }
27643
27644 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27645
27646 static bool
27647 arm_output_addr_const_extra (FILE *fp, rtx x)
27648 {
27649   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27650     return arm_emit_tls_decoration (fp, x);
27651   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27652     {
27653       char label[256];
27654       int labelno = INTVAL (XVECEXP (x, 0, 0));
27655
27656       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27657       assemble_name_raw (fp, label);
27658
27659       return TRUE;
27660     }
27661   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27662     {
27663       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27664       if (GOT_PCREL)
27665         fputs ("+.", fp);
27666       fputs ("-(", fp);
27667       output_addr_const (fp, XVECEXP (x, 0, 0));
27668       fputc (')', fp);
27669       return TRUE;
27670     }
27671   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27672     {
27673       output_addr_const (fp, XVECEXP (x, 0, 0));
27674       if (GOT_PCREL)
27675         fputs ("+.", fp);
27676       fputs ("-(", fp);
27677       output_addr_const (fp, XVECEXP (x, 0, 1));
27678       fputc (')', fp);
27679       return TRUE;
27680     }
27681   else if (GET_CODE (x) == CONST_VECTOR)
27682     return arm_emit_vector_const (fp, x);
27683
27684   return FALSE;
27685 }
27686
27687 /* Output assembly for a shift instruction.
27688    SET_FLAGS determines how the instruction modifies the condition codes.
27689    0 - Do not set condition codes.
27690    1 - Set condition codes.
27691    2 - Use smallest instruction.  */
27692 const char *
27693 arm_output_shift(rtx * operands, int set_flags)
27694 {
27695   char pattern[100];
27696   static const char flag_chars[3] = {'?', '.', '!'};
27697   const char *shift;
27698   HOST_WIDE_INT val;
27699   char c;
27700
27701   c = flag_chars[set_flags];
27702   shift = shift_op(operands[3], &val);
27703   if (shift)
27704     {
27705       if (val != -1)
27706         operands[2] = GEN_INT(val);
27707       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27708     }
27709   else
27710     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27711
27712   output_asm_insn (pattern, operands);
27713   return "";
27714 }
27715
27716 /* Output assembly for a WMMX immediate shift instruction.  */
27717 const char *
27718 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27719 {
27720   int shift = INTVAL (operands[2]);
27721   char templ[50];
27722   machine_mode opmode = GET_MODE (operands[0]);
27723
27724   gcc_assert (shift >= 0);
27725
27726   /* If the shift value in the register versions is > 63 (for D qualifier),
27727      31 (for W qualifier) or 15 (for H qualifier).  */
27728   if (((opmode == V4HImode) && (shift > 15))
27729         || ((opmode == V2SImode) && (shift > 31))
27730         || ((opmode == DImode) && (shift > 63)))
27731   {
27732     if (wror_or_wsra)
27733       {
27734         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27735         output_asm_insn (templ, operands);
27736         if (opmode == DImode)
27737           {
27738             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27739             output_asm_insn (templ, operands);
27740           }
27741       }
27742     else
27743       {
27744         /* The destination register will contain all zeros.  */
27745         sprintf (templ, "wzero\t%%0");
27746         output_asm_insn (templ, operands);
27747       }
27748     return "";
27749   }
27750
27751   if ((opmode == DImode) && (shift > 32))
27752     {
27753       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27754       output_asm_insn (templ, operands);
27755       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27756       output_asm_insn (templ, operands);
27757     }
27758   else
27759     {
27760       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27761       output_asm_insn (templ, operands);
27762     }
27763   return "";
27764 }
27765
27766 /* Output assembly for a WMMX tinsr instruction.  */
27767 const char *
27768 arm_output_iwmmxt_tinsr (rtx *operands)
27769 {
27770   int mask = INTVAL (operands[3]);
27771   int i;
27772   char templ[50];
27773   int units = mode_nunits[GET_MODE (operands[0])];
27774   gcc_assert ((mask & (mask - 1)) == 0);
27775   for (i = 0; i < units; ++i)
27776     {
27777       if ((mask & 0x01) == 1)
27778         {
27779           break;
27780         }
27781       mask >>= 1;
27782     }
27783   gcc_assert (i < units);
27784   {
27785     switch (GET_MODE (operands[0]))
27786       {
27787       case E_V8QImode:
27788         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27789         break;
27790       case E_V4HImode:
27791         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27792         break;
27793       case E_V2SImode:
27794         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27795         break;
27796       default:
27797         gcc_unreachable ();
27798         break;
27799       }
27800     output_asm_insn (templ, operands);
27801   }
27802   return "";
27803 }
27804
27805 /* Output a Thumb-1 casesi dispatch sequence.  */
27806 const char *
27807 thumb1_output_casesi (rtx *operands)
27808 {
27809   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27810
27811   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27812
27813   switch (GET_MODE(diff_vec))
27814     {
27815     case E_QImode:
27816       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27817               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27818     case E_HImode:
27819       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27820               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27821     case E_SImode:
27822       return "bl\t%___gnu_thumb1_case_si";
27823     default:
27824       gcc_unreachable ();
27825     }
27826 }
27827
27828 /* Output a Thumb-2 casesi instruction.  */
27829 const char *
27830 thumb2_output_casesi (rtx *operands)
27831 {
27832   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27833
27834   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27835
27836   output_asm_insn ("cmp\t%0, %1", operands);
27837   output_asm_insn ("bhi\t%l3", operands);
27838   switch (GET_MODE(diff_vec))
27839     {
27840     case E_QImode:
27841       return "tbb\t[%|pc, %0]";
27842     case E_HImode:
27843       return "tbh\t[%|pc, %0, lsl #1]";
27844     case E_SImode:
27845       if (flag_pic)
27846         {
27847           output_asm_insn ("adr\t%4, %l2", operands);
27848           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27849           output_asm_insn ("add\t%4, %4, %5", operands);
27850           return "bx\t%4";
27851         }
27852       else
27853         {
27854           output_asm_insn ("adr\t%4, %l2", operands);
27855           return "ldr\t%|pc, [%4, %0, lsl #2]";
27856         }
27857     default:
27858       gcc_unreachable ();
27859     }
27860 }
27861
27862 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27863    per-core tuning structs.  */
27864 static int
27865 arm_issue_rate (void)
27866 {
27867   return current_tune->issue_rate;
27868 }
27869
27870 /* Return how many instructions should scheduler lookahead to choose the
27871    best one.  */
27872 static int
27873 arm_first_cycle_multipass_dfa_lookahead (void)
27874 {
27875   int issue_rate = arm_issue_rate ();
27876
27877   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27878 }
27879
27880 /* Enable modeling of L2 auto-prefetcher.  */
27881 static int
27882 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27883 {
27884   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27885 }
27886
27887 const char *
27888 arm_mangle_type (const_tree type)
27889 {
27890   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27891      has to be managled as if it is in the "std" namespace.  */
27892   if (TARGET_AAPCS_BASED
27893       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27894     return "St9__va_list";
27895
27896   /* Half-precision float.  */
27897   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27898     return "Dh";
27899
27900   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27901      builtin type.  */
27902   if (TYPE_NAME (type) != NULL)
27903     return arm_mangle_builtin_type (type);
27904
27905   /* Use the default mangling.  */
27906   return NULL;
27907 }
27908
27909 /* Order of allocation of core registers for Thumb: this allocation is
27910    written over the corresponding initial entries of the array
27911    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27912    first.  Saving and restoring a low register is usually cheaper than
27913    using a call-clobbered high register.  */
27914
27915 static const int thumb_core_reg_alloc_order[] =
27916 {
27917    3,  2,  1,  0,  4,  5,  6,  7,
27918   12, 14,  8,  9, 10, 11
27919 };
27920
27921 /* Adjust register allocation order when compiling for Thumb.  */
27922
27923 void
27924 arm_order_regs_for_local_alloc (void)
27925 {
27926   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27927   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27928   if (TARGET_THUMB)
27929     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27930             sizeof (thumb_core_reg_alloc_order));
27931 }
27932
27933 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27934
27935 bool
27936 arm_frame_pointer_required (void)
27937 {
27938   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27939     return true;
27940
27941   /* If the function receives nonlocal gotos, it needs to save the frame
27942      pointer in the nonlocal_goto_save_area object.  */
27943   if (cfun->has_nonlocal_label)
27944     return true;
27945
27946   /* The frame pointer is required for non-leaf APCS frames.  */
27947   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27948     return true;
27949
27950   /* If we are probing the stack in the prologue, we will have a faulting
27951      instruction prior to the stack adjustment and this requires a frame
27952      pointer if we want to catch the exception using the EABI unwinder.  */
27953   if (!IS_INTERRUPT (arm_current_func_type ())
27954       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27955           || flag_stack_clash_protection)
27956       && arm_except_unwind_info (&global_options) == UI_TARGET
27957       && cfun->can_throw_non_call_exceptions)
27958     {
27959       HOST_WIDE_INT size = get_frame_size ();
27960
27961       /* That's irrelevant if there is no stack adjustment.  */
27962       if (size <= 0)
27963         return false;
27964
27965       /* That's relevant only if there is a stack probe.  */
27966       if (crtl->is_leaf && !cfun->calls_alloca)
27967         {
27968           /* We don't have the final size of the frame so adjust.  */
27969           size += 32 * UNITS_PER_WORD;
27970           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27971             return true;
27972         }
27973       else
27974         return true;
27975     }
27976
27977   return false;
27978 }
27979
27980 /* Only thumb1 can't support conditional execution, so return true if
27981    the target is not thumb1.  */
27982 static bool
27983 arm_have_conditional_execution (void)
27984 {
27985   return !TARGET_THUMB1;
27986 }
27987
27988 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27989 static HOST_WIDE_INT
27990 arm_vector_alignment (const_tree type)
27991 {
27992   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27993
27994   if (TARGET_AAPCS_BASED)
27995     align = MIN (align, 64);
27996
27997   return align;
27998 }
27999
28000 static unsigned int
28001 arm_autovectorize_vector_sizes (void)
28002 {
28003   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28004 }
28005
28006 static bool
28007 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28008 {
28009   /* Vectors which aren't in packed structures will not be less aligned than
28010      the natural alignment of their element type, so this is safe.  */
28011   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28012     return !is_packed;
28013
28014   return default_builtin_vector_alignment_reachable (type, is_packed);
28015 }
28016
28017 static bool
28018 arm_builtin_support_vector_misalignment (machine_mode mode,
28019                                          const_tree type, int misalignment,
28020                                          bool is_packed)
28021 {
28022   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28023     {
28024       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28025
28026       if (is_packed)
28027         return align == 1;
28028
28029       /* If the misalignment is unknown, we should be able to handle the access
28030          so long as it is not to a member of a packed data structure.  */
28031       if (misalignment == -1)
28032         return true;
28033
28034       /* Return true if the misalignment is a multiple of the natural alignment
28035          of the vector's element type.  This is probably always going to be
28036          true in practice, since we've already established that this isn't a
28037          packed access.  */
28038       return ((misalignment % align) == 0);
28039     }
28040
28041   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28042                                                       is_packed);
28043 }
28044
28045 static void
28046 arm_conditional_register_usage (void)
28047 {
28048   int regno;
28049
28050   if (TARGET_THUMB1 && optimize_size)
28051     {
28052       /* When optimizing for size on Thumb-1, it's better not
28053         to use the HI regs, because of the overhead of
28054         stacking them.  */
28055       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28056         fixed_regs[regno] = call_used_regs[regno] = 1;
28057     }
28058
28059   /* The link register can be clobbered by any branch insn,
28060      but we have no way to track that at present, so mark
28061      it as unavailable.  */
28062   if (TARGET_THUMB1)
28063     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28064
28065   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28066     {
28067       /* VFPv3 registers are disabled when earlier VFP
28068          versions are selected due to the definition of
28069          LAST_VFP_REGNUM.  */
28070       for (regno = FIRST_VFP_REGNUM;
28071            regno <= LAST_VFP_REGNUM; ++ regno)
28072         {
28073           fixed_regs[regno] = 0;
28074           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28075             || regno >= FIRST_VFP_REGNUM + 32;
28076         }
28077     }
28078
28079   if (TARGET_REALLY_IWMMXT)
28080     {
28081       regno = FIRST_IWMMXT_GR_REGNUM;
28082       /* The 2002/10/09 revision of the XScale ABI has wCG0
28083          and wCG1 as call-preserved registers.  The 2002/11/21
28084          revision changed this so that all wCG registers are
28085          scratch registers.  */
28086       for (regno = FIRST_IWMMXT_GR_REGNUM;
28087            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28088         fixed_regs[regno] = 0;
28089       /* The XScale ABI has wR0 - wR9 as scratch registers,
28090          the rest as call-preserved registers.  */
28091       for (regno = FIRST_IWMMXT_REGNUM;
28092            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28093         {
28094           fixed_regs[regno] = 0;
28095           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28096         }
28097     }
28098
28099   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28100     {
28101       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28102       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28103     }
28104   else if (TARGET_APCS_STACK)
28105     {
28106       fixed_regs[10]     = 1;
28107       call_used_regs[10] = 1;
28108     }
28109   /* -mcaller-super-interworking reserves r11 for calls to
28110      _interwork_r11_call_via_rN().  Making the register global
28111      is an easy way of ensuring that it remains valid for all
28112      calls.  */
28113   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28114       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28115     {
28116       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28117       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28118       if (TARGET_CALLER_INTERWORKING)
28119         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28120     }
28121   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28122 }
28123
28124 static reg_class_t
28125 arm_preferred_rename_class (reg_class_t rclass)
28126 {
28127   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28128      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28129      and code size can be reduced.  */
28130   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28131     return LO_REGS;
28132   else
28133     return NO_REGS;
28134 }
28135
28136 /* Compute the attribute "length" of insn "*push_multi".
28137    So this function MUST be kept in sync with that insn pattern.  */
28138 int
28139 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28140 {
28141   int i, regno, hi_reg;
28142   int num_saves = XVECLEN (parallel_op, 0);
28143
28144   /* ARM mode.  */
28145   if (TARGET_ARM)
28146     return 4;
28147   /* Thumb1 mode.  */
28148   if (TARGET_THUMB1)
28149     return 2;
28150
28151   /* Thumb2 mode.  */
28152   regno = REGNO (first_op);
28153   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28154      list is 8-bit.  Normally this means all registers in the list must be
28155      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28156      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28157      with 16-bit encoding.  */
28158   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28159   for (i = 1; i < num_saves && !hi_reg; i++)
28160     {
28161       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28162       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28163     }
28164
28165   if (!hi_reg)
28166     return 2;
28167   return 4;
28168 }
28169
28170 /* Compute the attribute "length" of insn.  Currently, this function is used
28171    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28172    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28173    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28174    true if OPERANDS contains insn which explicit updates base register.  */
28175
28176 int
28177 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28178 {
28179   /* ARM mode.  */
28180   if (TARGET_ARM)
28181     return 4;
28182   /* Thumb1 mode.  */
28183   if (TARGET_THUMB1)
28184     return 2;
28185
28186   rtx parallel_op = operands[0];
28187   /* Initialize to elements number of PARALLEL.  */
28188   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28189   /* Initialize the value to base register.  */
28190   unsigned regno = REGNO (operands[1]);
28191   /* Skip return and write back pattern.
28192      We only need register pop pattern for later analysis.  */
28193   unsigned first_indx = 0;
28194   first_indx += return_pc ? 1 : 0;
28195   first_indx += write_back_p ? 1 : 0;
28196
28197   /* A pop operation can be done through LDM or POP.  If the base register is SP
28198      and if it's with write back, then a LDM will be alias of POP.  */
28199   bool pop_p = (regno == SP_REGNUM && write_back_p);
28200   bool ldm_p = !pop_p;
28201
28202   /* Check base register for LDM.  */
28203   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28204     return 4;
28205
28206   /* Check each register in the list.  */
28207   for (; indx >= first_indx; indx--)
28208     {
28209       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28210       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28211          comment in arm_attr_length_push_multi.  */
28212       if (REGNO_REG_CLASS (regno) == HI_REGS
28213           && (regno != PC_REGNUM || ldm_p))
28214         return 4;
28215     }
28216
28217   return 2;
28218 }
28219
28220 /* Compute the number of instructions emitted by output_move_double.  */
28221 int
28222 arm_count_output_move_double_insns (rtx *operands)
28223 {
28224   int count;
28225   rtx ops[2];
28226   /* output_move_double may modify the operands array, so call it
28227      here on a copy of the array.  */
28228   ops[0] = operands[0];
28229   ops[1] = operands[1];
28230   output_move_double (ops, false, &count);
28231   return count;
28232 }
28233
28234 int
28235 vfp3_const_double_for_fract_bits (rtx operand)
28236 {
28237   REAL_VALUE_TYPE r0;
28238
28239   if (!CONST_DOUBLE_P (operand))
28240     return 0;
28241
28242   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28243   if (exact_real_inverse (DFmode, &r0)
28244       && !REAL_VALUE_NEGATIVE (r0))
28245     {
28246       if (exact_real_truncate (DFmode, &r0))
28247         {
28248           HOST_WIDE_INT value = real_to_integer (&r0);
28249           value = value & 0xffffffff;
28250           if ((value != 0) && ( (value & (value - 1)) == 0))
28251             {
28252               int ret = exact_log2 (value);
28253               gcc_assert (IN_RANGE (ret, 0, 31));
28254               return ret;
28255             }
28256         }
28257     }
28258   return 0;
28259 }
28260
28261 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28262    log2 is in [1, 32], return that log2.  Otherwise return -1.
28263    This is used in the patterns for vcvt.s32.f32 floating-point to
28264    fixed-point conversions.  */
28265
28266 int
28267 vfp3_const_double_for_bits (rtx x)
28268 {
28269   const REAL_VALUE_TYPE *r;
28270
28271   if (!CONST_DOUBLE_P (x))
28272     return -1;
28273
28274   r = CONST_DOUBLE_REAL_VALUE (x);
28275
28276   if (REAL_VALUE_NEGATIVE (*r)
28277       || REAL_VALUE_ISNAN (*r)
28278       || REAL_VALUE_ISINF (*r)
28279       || !real_isinteger (r, SFmode))
28280     return -1;
28281
28282   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28283
28284 /* The exact_log2 above will have returned -1 if this is
28285    not an exact log2.  */
28286   if (!IN_RANGE (hwint, 1, 32))
28287     return -1;
28288
28289   return hwint;
28290 }
28291
28292 \f
28293 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28294
28295 static void
28296 arm_pre_atomic_barrier (enum memmodel model)
28297 {
28298   if (need_atomic_barrier_p (model, true))
28299     emit_insn (gen_memory_barrier ());
28300 }
28301
28302 static void
28303 arm_post_atomic_barrier (enum memmodel model)
28304 {
28305   if (need_atomic_barrier_p (model, false))
28306     emit_insn (gen_memory_barrier ());
28307 }
28308
28309 /* Emit the load-exclusive and store-exclusive instructions.
28310    Use acquire and release versions if necessary.  */
28311
28312 static void
28313 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28314 {
28315   rtx (*gen) (rtx, rtx);
28316
28317   if (acq)
28318     {
28319       switch (mode)
28320         {
28321         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28322         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28323         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28324         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28325         default:
28326           gcc_unreachable ();
28327         }
28328     }
28329   else
28330     {
28331       switch (mode)
28332         {
28333         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28334         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28335         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28336         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28337         default:
28338           gcc_unreachable ();
28339         }
28340     }
28341
28342   emit_insn (gen (rval, mem));
28343 }
28344
28345 static void
28346 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28347                           rtx mem, bool rel)
28348 {
28349   rtx (*gen) (rtx, rtx, rtx);
28350
28351   if (rel)
28352     {
28353       switch (mode)
28354         {
28355         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28356         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28357         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28358         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28359         default:
28360           gcc_unreachable ();
28361         }
28362     }
28363   else
28364     {
28365       switch (mode)
28366         {
28367         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28368         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28369         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28370         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28371         default:
28372           gcc_unreachable ();
28373         }
28374     }
28375
28376   emit_insn (gen (bval, rval, mem));
28377 }
28378
28379 /* Mark the previous jump instruction as unlikely.  */
28380
28381 static void
28382 emit_unlikely_jump (rtx insn)
28383 {
28384   rtx_insn *jump = emit_jump_insn (insn);
28385   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28386 }
28387
28388 /* Expand a compare and swap pattern.  */
28389
28390 void
28391 arm_expand_compare_and_swap (rtx operands[])
28392 {
28393   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28394   machine_mode mode;
28395   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28396
28397   bval = operands[0];
28398   rval = operands[1];
28399   mem = operands[2];
28400   oldval = operands[3];
28401   newval = operands[4];
28402   is_weak = operands[5];
28403   mod_s = operands[6];
28404   mod_f = operands[7];
28405   mode = GET_MODE (mem);
28406
28407   /* Normally the succ memory model must be stronger than fail, but in the
28408      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28409      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28410
28411   if (TARGET_HAVE_LDACQ
28412       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28413       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28414     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28415
28416   switch (mode)
28417     {
28418     case E_QImode:
28419     case E_HImode:
28420       /* For narrow modes, we're going to perform the comparison in SImode,
28421          so do the zero-extension now.  */
28422       rval = gen_reg_rtx (SImode);
28423       oldval = convert_modes (SImode, mode, oldval, true);
28424       /* FALLTHRU */
28425
28426     case E_SImode:
28427       /* Force the value into a register if needed.  We waited until after
28428          the zero-extension above to do this properly.  */
28429       if (!arm_add_operand (oldval, SImode))
28430         oldval = force_reg (SImode, oldval);
28431       break;
28432
28433     case E_DImode:
28434       if (!cmpdi_operand (oldval, mode))
28435         oldval = force_reg (mode, oldval);
28436       break;
28437
28438     default:
28439       gcc_unreachable ();
28440     }
28441
28442   if (TARGET_THUMB1)
28443     {
28444       switch (mode)
28445         {
28446         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28447         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28448         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28449         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28450         default:
28451           gcc_unreachable ();
28452         }
28453     }
28454   else
28455     {
28456       switch (mode)
28457         {
28458         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28459         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28460         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28461         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28462         default:
28463           gcc_unreachable ();
28464         }
28465     }
28466
28467   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28468   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28469
28470   if (mode == QImode || mode == HImode)
28471     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28472
28473   /* In all cases, we arrange for success to be signaled by Z set.
28474      This arrangement allows for the boolean result to be used directly
28475      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28476      boolean negation of the result is also stored in bval because Thumb-1
28477      backend lacks dependency tracking for CC flag due to flag-setting not
28478      being represented at RTL level.  */
28479   if (TARGET_THUMB1)
28480       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28481   else
28482     {
28483       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28484       emit_insn (gen_rtx_SET (bval, x));
28485     }
28486 }
28487
28488 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28489    another memory store between the load-exclusive and store-exclusive can
28490    reset the monitor from Exclusive to Open state.  This means we must wait
28491    until after reload to split the pattern, lest we get a register spill in
28492    the middle of the atomic sequence.  Success of the compare and swap is
28493    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28494    for Thumb-1 targets (ie. negation of the boolean value returned by
28495    atomic_compare_and_swapmode standard pattern in operand 0).  */
28496
28497 void
28498 arm_split_compare_and_swap (rtx operands[])
28499 {
28500   rtx rval, mem, oldval, newval, neg_bval;
28501   machine_mode mode;
28502   enum memmodel mod_s, mod_f;
28503   bool is_weak;
28504   rtx_code_label *label1, *label2;
28505   rtx x, cond;
28506
28507   rval = operands[1];
28508   mem = operands[2];
28509   oldval = operands[3];
28510   newval = operands[4];
28511   is_weak = (operands[5] != const0_rtx);
28512   mod_s = memmodel_from_int (INTVAL (operands[6]));
28513   mod_f = memmodel_from_int (INTVAL (operands[7]));
28514   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28515   mode = GET_MODE (mem);
28516
28517   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28518
28519   bool use_acquire = TARGET_HAVE_LDACQ
28520                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28521                           || is_mm_release (mod_s));
28522
28523   bool use_release = TARGET_HAVE_LDACQ
28524                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28525                           || is_mm_acquire (mod_s));
28526
28527   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28528      a full barrier is emitted after the store-release.  */
28529   if (is_armv8_sync)
28530     use_acquire = false;
28531
28532   /* Checks whether a barrier is needed and emits one accordingly.  */
28533   if (!(use_acquire || use_release))
28534     arm_pre_atomic_barrier (mod_s);
28535
28536   label1 = NULL;
28537   if (!is_weak)
28538     {
28539       label1 = gen_label_rtx ();
28540       emit_label (label1);
28541     }
28542   label2 = gen_label_rtx ();
28543
28544   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28545
28546   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28547      as required to communicate with arm_expand_compare_and_swap.  */
28548   if (TARGET_32BIT)
28549     {
28550       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28551       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28552       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28553                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28554       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28555     }
28556   else
28557     {
28558       emit_move_insn (neg_bval, const1_rtx);
28559       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28560       if (thumb1_cmpneg_operand (oldval, SImode))
28561         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28562                                                     label2, cond));
28563       else
28564         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28565     }
28566
28567   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28568
28569   /* Weak or strong, we want EQ to be true for success, so that we
28570      match the flags that we got from the compare above.  */
28571   if (TARGET_32BIT)
28572     {
28573       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28574       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28575       emit_insn (gen_rtx_SET (cond, x));
28576     }
28577
28578   if (!is_weak)
28579     {
28580       /* Z is set to boolean value of !neg_bval, as required to communicate
28581          with arm_expand_compare_and_swap.  */
28582       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28583       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28584     }
28585
28586   if (!is_mm_relaxed (mod_f))
28587     emit_label (label2);
28588
28589   /* Checks whether a barrier is needed and emits one accordingly.  */
28590   if (is_armv8_sync
28591       || !(use_acquire || use_release))
28592     arm_post_atomic_barrier (mod_s);
28593
28594   if (is_mm_relaxed (mod_f))
28595     emit_label (label2);
28596 }
28597
28598 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28599    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28600    operation).  Operation is performed on the content at MEM and on VALUE
28601    following the memory model MODEL_RTX.  The content at MEM before and after
28602    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28603    success of the operation is returned in COND.  Using a scratch register or
28604    an operand register for these determines what result is returned for that
28605    pattern.  */
28606
28607 void
28608 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28609                      rtx value, rtx model_rtx, rtx cond)
28610 {
28611   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28612   machine_mode mode = GET_MODE (mem);
28613   machine_mode wmode = (mode == DImode ? DImode : SImode);
28614   rtx_code_label *label;
28615   bool all_low_regs, bind_old_new;
28616   rtx x;
28617
28618   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28619
28620   bool use_acquire = TARGET_HAVE_LDACQ
28621                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28622                           || is_mm_release (model));
28623
28624   bool use_release = TARGET_HAVE_LDACQ
28625                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28626                           || is_mm_acquire (model));
28627
28628   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28629      a full barrier is emitted after the store-release.  */
28630   if (is_armv8_sync)
28631     use_acquire = false;
28632
28633   /* Checks whether a barrier is needed and emits one accordingly.  */
28634   if (!(use_acquire || use_release))
28635     arm_pre_atomic_barrier (model);
28636
28637   label = gen_label_rtx ();
28638   emit_label (label);
28639
28640   if (new_out)
28641     new_out = gen_lowpart (wmode, new_out);
28642   if (old_out)
28643     old_out = gen_lowpart (wmode, old_out);
28644   else
28645     old_out = new_out;
28646   value = simplify_gen_subreg (wmode, value, mode, 0);
28647
28648   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28649
28650   /* Does the operation require destination and first operand to use the same
28651      register?  This is decided by register constraints of relevant insn
28652      patterns in thumb1.md.  */
28653   gcc_assert (!new_out || REG_P (new_out));
28654   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28655                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28656                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28657   bind_old_new =
28658     (TARGET_THUMB1
28659      && code != SET
28660      && code != MINUS
28661      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28662
28663   /* We want to return the old value while putting the result of the operation
28664      in the same register as the old value so copy the old value over to the
28665      destination register and use that register for the operation.  */
28666   if (old_out && bind_old_new)
28667     {
28668       emit_move_insn (new_out, old_out);
28669       old_out = new_out;
28670     }
28671
28672   switch (code)
28673     {
28674     case SET:
28675       new_out = value;
28676       break;
28677
28678     case NOT:
28679       x = gen_rtx_AND (wmode, old_out, value);
28680       emit_insn (gen_rtx_SET (new_out, x));
28681       x = gen_rtx_NOT (wmode, new_out);
28682       emit_insn (gen_rtx_SET (new_out, x));
28683       break;
28684
28685     case MINUS:
28686       if (CONST_INT_P (value))
28687         {
28688           value = GEN_INT (-INTVAL (value));
28689           code = PLUS;
28690         }
28691       /* FALLTHRU */
28692
28693     case PLUS:
28694       if (mode == DImode)
28695         {
28696           /* DImode plus/minus need to clobber flags.  */
28697           /* The adddi3 and subdi3 patterns are incorrectly written so that
28698              they require matching operands, even when we could easily support
28699              three operands.  Thankfully, this can be fixed up post-splitting,
28700              as the individual add+adc patterns do accept three operands and
28701              post-reload cprop can make these moves go away.  */
28702           emit_move_insn (new_out, old_out);
28703           if (code == PLUS)
28704             x = gen_adddi3 (new_out, new_out, value);
28705           else
28706             x = gen_subdi3 (new_out, new_out, value);
28707           emit_insn (x);
28708           break;
28709         }
28710       /* FALLTHRU */
28711
28712     default:
28713       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28714       emit_insn (gen_rtx_SET (new_out, x));
28715       break;
28716     }
28717
28718   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28719                             use_release);
28720
28721   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28722   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28723
28724   /* Checks whether a barrier is needed and emits one accordingly.  */
28725   if (is_armv8_sync
28726       || !(use_acquire || use_release))
28727     arm_post_atomic_barrier (model);
28728 }
28729 \f
28730 #define MAX_VECT_LEN 16
28731
28732 struct expand_vec_perm_d
28733 {
28734   rtx target, op0, op1;
28735   auto_vec_perm_indices perm;
28736   machine_mode vmode;
28737   bool one_vector_p;
28738   bool testing_p;
28739 };
28740
28741 /* Generate a variable permutation.  */
28742
28743 static void
28744 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28745 {
28746   machine_mode vmode = GET_MODE (target);
28747   bool one_vector_p = rtx_equal_p (op0, op1);
28748
28749   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28750   gcc_checking_assert (GET_MODE (op0) == vmode);
28751   gcc_checking_assert (GET_MODE (op1) == vmode);
28752   gcc_checking_assert (GET_MODE (sel) == vmode);
28753   gcc_checking_assert (TARGET_NEON);
28754
28755   if (one_vector_p)
28756     {
28757       if (vmode == V8QImode)
28758         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28759       else
28760         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28761     }
28762   else
28763     {
28764       rtx pair;
28765
28766       if (vmode == V8QImode)
28767         {
28768           pair = gen_reg_rtx (V16QImode);
28769           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28770           pair = gen_lowpart (TImode, pair);
28771           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28772         }
28773       else
28774         {
28775           pair = gen_reg_rtx (OImode);
28776           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28777           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28778         }
28779     }
28780 }
28781
28782 void
28783 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28784 {
28785   machine_mode vmode = GET_MODE (target);
28786   unsigned int nelt = GET_MODE_NUNITS (vmode);
28787   bool one_vector_p = rtx_equal_p (op0, op1);
28788   rtx mask;
28789
28790   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28791      numbering of elements for big-endian, we must reverse the order.  */
28792   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28793
28794   /* The VTBL instruction does not use a modulo index, so we must take care
28795      of that ourselves.  */
28796   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28797   mask = gen_const_vec_duplicate (vmode, mask);
28798   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28799
28800   arm_expand_vec_perm_1 (target, op0, op1, sel);
28801 }
28802
28803 /* Map lane ordering between architectural lane order, and GCC lane order,
28804    taking into account ABI.  See comment above output_move_neon for details.  */
28805
28806 static int
28807 neon_endian_lane_map (machine_mode mode, int lane)
28808 {
28809   if (BYTES_BIG_ENDIAN)
28810   {
28811     int nelems = GET_MODE_NUNITS (mode);
28812     /* Reverse lane order.  */
28813     lane = (nelems - 1 - lane);
28814     /* Reverse D register order, to match ABI.  */
28815     if (GET_MODE_SIZE (mode) == 16)
28816       lane = lane ^ (nelems / 2);
28817   }
28818   return lane;
28819 }
28820
28821 /* Some permutations index into pairs of vectors, this is a helper function
28822    to map indexes into those pairs of vectors.  */
28823
28824 static int
28825 neon_pair_endian_lane_map (machine_mode mode, int lane)
28826 {
28827   int nelem = GET_MODE_NUNITS (mode);
28828   if (BYTES_BIG_ENDIAN)
28829     lane =
28830       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28831   return lane;
28832 }
28833
28834 /* Generate or test for an insn that supports a constant permutation.  */
28835
28836 /* Recognize patterns for the VUZP insns.  */
28837
28838 static bool
28839 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28840 {
28841   unsigned int i, odd, mask, nelt = d->perm.length ();
28842   rtx out0, out1, in0, in1;
28843   rtx (*gen)(rtx, rtx, rtx, rtx);
28844   int first_elem;
28845   int swap_nelt;
28846
28847   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28848     return false;
28849
28850   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28851      big endian pattern on 64 bit vectors, so we correct for that.  */
28852   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28853     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28854
28855   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28856
28857   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28858     odd = 0;
28859   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28860     odd = 1;
28861   else
28862     return false;
28863   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28864
28865   for (i = 0; i < nelt; i++)
28866     {
28867       unsigned elt =
28868         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28869       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28870         return false;
28871     }
28872
28873   /* Success!  */
28874   if (d->testing_p)
28875     return true;
28876
28877   switch (d->vmode)
28878     {
28879     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28880     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28881     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28882     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28883     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28884     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28885     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28886     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28887     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28888     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28889     default:
28890       gcc_unreachable ();
28891     }
28892
28893   in0 = d->op0;
28894   in1 = d->op1;
28895   if (swap_nelt != 0)
28896     std::swap (in0, in1);
28897
28898   out0 = d->target;
28899   out1 = gen_reg_rtx (d->vmode);
28900   if (odd)
28901     std::swap (out0, out1);
28902
28903   emit_insn (gen (out0, in0, in1, out1));
28904   return true;
28905 }
28906
28907 /* Recognize patterns for the VZIP insns.  */
28908
28909 static bool
28910 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28911 {
28912   unsigned int i, high, mask, nelt = d->perm.length ();
28913   rtx out0, out1, in0, in1;
28914   rtx (*gen)(rtx, rtx, rtx, rtx);
28915   int first_elem;
28916   bool is_swapped;
28917
28918   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28919     return false;
28920
28921   is_swapped = BYTES_BIG_ENDIAN;
28922
28923   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28924
28925   high = nelt / 2;
28926   if (first_elem == neon_endian_lane_map (d->vmode, high))
28927     ;
28928   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28929     high = 0;
28930   else
28931     return false;
28932   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28933
28934   for (i = 0; i < nelt / 2; i++)
28935     {
28936       unsigned elt =
28937         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28938       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28939           != elt)
28940         return false;
28941       elt =
28942         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28943       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28944           != elt)
28945         return false;
28946     }
28947
28948   /* Success!  */
28949   if (d->testing_p)
28950     return true;
28951
28952   switch (d->vmode)
28953     {
28954     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28955     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28956     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28957     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28958     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28959     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28960     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28961     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28962     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28963     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28964     default:
28965       gcc_unreachable ();
28966     }
28967
28968   in0 = d->op0;
28969   in1 = d->op1;
28970   if (is_swapped)
28971     std::swap (in0, in1);
28972
28973   out0 = d->target;
28974   out1 = gen_reg_rtx (d->vmode);
28975   if (high)
28976     std::swap (out0, out1);
28977
28978   emit_insn (gen (out0, in0, in1, out1));
28979   return true;
28980 }
28981
28982 /* Recognize patterns for the VREV insns.  */
28983
28984 static bool
28985 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28986 {
28987   unsigned int i, j, diff, nelt = d->perm.length ();
28988   rtx (*gen)(rtx, rtx);
28989
28990   if (!d->one_vector_p)
28991     return false;
28992
28993   diff = d->perm[0];
28994   switch (diff)
28995     {
28996     case 7:
28997       switch (d->vmode)
28998         {
28999         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29000         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29001         default:
29002           return false;
29003         }
29004       break;
29005     case 3:
29006       switch (d->vmode)
29007         {
29008         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29009         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29010         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29011         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29012         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29013         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29014         default:
29015           return false;
29016         }
29017       break;
29018     case 1:
29019       switch (d->vmode)
29020         {
29021         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29022         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29023         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29024         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29025         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29026         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29027         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29028         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29029         default:
29030           return false;
29031         }
29032       break;
29033     default:
29034       return false;
29035     }
29036
29037   for (i = 0; i < nelt ; i += diff + 1)
29038     for (j = 0; j <= diff; j += 1)
29039       {
29040         /* This is guaranteed to be true as the value of diff
29041            is 7, 3, 1 and we should have enough elements in the
29042            queue to generate this. Getting a vector mask with a
29043            value of diff other than these values implies that
29044            something is wrong by the time we get here.  */
29045         gcc_assert (i + j < nelt);
29046         if (d->perm[i + j] != i + diff - j)
29047           return false;
29048       }
29049
29050   /* Success! */
29051   if (d->testing_p)
29052     return true;
29053
29054   emit_insn (gen (d->target, d->op0));
29055   return true;
29056 }
29057
29058 /* Recognize patterns for the VTRN insns.  */
29059
29060 static bool
29061 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29062 {
29063   unsigned int i, odd, mask, nelt = d->perm.length ();
29064   rtx out0, out1, in0, in1;
29065   rtx (*gen)(rtx, rtx, rtx, rtx);
29066
29067   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29068     return false;
29069
29070   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29071   if (d->perm[0] == 0)
29072     odd = 0;
29073   else if (d->perm[0] == 1)
29074     odd = 1;
29075   else
29076     return false;
29077   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29078
29079   for (i = 0; i < nelt; i += 2)
29080     {
29081       if (d->perm[i] != i + odd)
29082         return false;
29083       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29084         return false;
29085     }
29086
29087   /* Success!  */
29088   if (d->testing_p)
29089     return true;
29090
29091   switch (d->vmode)
29092     {
29093     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29094     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29095     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29096     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29097     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29098     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29099     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29100     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29101     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29102     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29103     default:
29104       gcc_unreachable ();
29105     }
29106
29107   in0 = d->op0;
29108   in1 = d->op1;
29109   if (BYTES_BIG_ENDIAN)
29110     {
29111       std::swap (in0, in1);
29112       odd = !odd;
29113     }
29114
29115   out0 = d->target;
29116   out1 = gen_reg_rtx (d->vmode);
29117   if (odd)
29118     std::swap (out0, out1);
29119
29120   emit_insn (gen (out0, in0, in1, out1));
29121   return true;
29122 }
29123
29124 /* Recognize patterns for the VEXT insns.  */
29125
29126 static bool
29127 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29128 {
29129   unsigned int i, nelt = d->perm.length ();
29130   rtx (*gen) (rtx, rtx, rtx, rtx);
29131   rtx offset;
29132
29133   unsigned int location;
29134
29135   unsigned int next  = d->perm[0] + 1;
29136
29137   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29138   if (BYTES_BIG_ENDIAN)
29139     return false;
29140
29141   /* Check if the extracted indexes are increasing by one.  */
29142   for (i = 1; i < nelt; next++, i++)
29143     {
29144       /* If we hit the most significant element of the 2nd vector in
29145          the previous iteration, no need to test further.  */
29146       if (next == 2 * nelt)
29147         return false;
29148
29149       /* If we are operating on only one vector: it could be a
29150          rotation.  If there are only two elements of size < 64, let
29151          arm_evpc_neon_vrev catch it.  */
29152       if (d->one_vector_p && (next == nelt))
29153         {
29154           if ((nelt == 2) && (d->vmode != V2DImode))
29155             return false;
29156           else
29157             next = 0;
29158         }
29159
29160       if (d->perm[i] != next)
29161         return false;
29162     }
29163
29164   location = d->perm[0];
29165
29166   switch (d->vmode)
29167     {
29168     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29169     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29170     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29171     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29172     case E_V2SImode: gen = gen_neon_vextv2si; break;
29173     case E_V4SImode: gen = gen_neon_vextv4si; break;
29174     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29175     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29176     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29177     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29178     case E_V2DImode: gen = gen_neon_vextv2di; break;
29179     default:
29180       return false;
29181     }
29182
29183   /* Success! */
29184   if (d->testing_p)
29185     return true;
29186
29187   offset = GEN_INT (location);
29188   emit_insn (gen (d->target, d->op0, d->op1, offset));
29189   return true;
29190 }
29191
29192 /* The NEON VTBL instruction is a fully variable permuation that's even
29193    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29194    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29195    can do slightly better by expanding this as a constant where we don't
29196    have to apply a mask.  */
29197
29198 static bool
29199 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29200 {
29201   rtx rperm[MAX_VECT_LEN], sel;
29202   machine_mode vmode = d->vmode;
29203   unsigned int i, nelt = d->perm.length ();
29204
29205   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29206      numbering of elements for big-endian, we must reverse the order.  */
29207   if (BYTES_BIG_ENDIAN)
29208     return false;
29209
29210   if (d->testing_p)
29211     return true;
29212
29213   /* Generic code will try constant permutation twice.  Once with the
29214      original mode and again with the elements lowered to QImode.
29215      So wait and don't do the selector expansion ourselves.  */
29216   if (vmode != V8QImode && vmode != V16QImode)
29217     return false;
29218
29219   for (i = 0; i < nelt; ++i)
29220     rperm[i] = GEN_INT (d->perm[i]);
29221   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29222   sel = force_reg (vmode, sel);
29223
29224   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29225   return true;
29226 }
29227
29228 static bool
29229 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29230 {
29231   /* Check if the input mask matches vext before reordering the
29232      operands.  */
29233   if (TARGET_NEON)
29234     if (arm_evpc_neon_vext (d))
29235       return true;
29236
29237   /* The pattern matching functions above are written to look for a small
29238      number to begin the sequence (0, 1, N/2).  If we begin with an index
29239      from the second operand, we can swap the operands.  */
29240   unsigned int nelt = d->perm.length ();
29241   if (d->perm[0] >= nelt)
29242     {
29243       for (unsigned int i = 0; i < nelt; ++i)
29244         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29245
29246       std::swap (d->op0, d->op1);
29247     }
29248
29249   if (TARGET_NEON)
29250     {
29251       if (arm_evpc_neon_vuzp (d))
29252         return true;
29253       if (arm_evpc_neon_vzip (d))
29254         return true;
29255       if (arm_evpc_neon_vrev (d))
29256         return true;
29257       if (arm_evpc_neon_vtrn (d))
29258         return true;
29259       return arm_evpc_neon_vtbl (d);
29260     }
29261   return false;
29262 }
29263
29264 /* Expand a vec_perm_const pattern.  */
29265
29266 bool
29267 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29268 {
29269   struct expand_vec_perm_d d;
29270   int i, nelt, which;
29271
29272   d.target = target;
29273   d.op0 = op0;
29274   d.op1 = op1;
29275
29276   d.vmode = GET_MODE (target);
29277   gcc_assert (VECTOR_MODE_P (d.vmode));
29278   d.testing_p = false;
29279
29280   nelt = GET_MODE_NUNITS (d.vmode);
29281   d.perm.reserve (nelt);
29282   for (i = which = 0; i < nelt; ++i)
29283     {
29284       rtx e = XVECEXP (sel, 0, i);
29285       int ei = INTVAL (e) & (2 * nelt - 1);
29286       which |= (ei < nelt ? 1 : 2);
29287       d.perm.quick_push (ei);
29288     }
29289
29290   switch (which)
29291     {
29292     default:
29293       gcc_unreachable();
29294
29295     case 3:
29296       d.one_vector_p = false;
29297       if (!rtx_equal_p (op0, op1))
29298         break;
29299
29300       /* The elements of PERM do not suggest that only the first operand
29301          is used, but both operands are identical.  Allow easier matching
29302          of the permutation by folding the permutation into the single
29303          input vector.  */
29304       /* FALLTHRU */
29305     case 2:
29306       for (i = 0; i < nelt; ++i)
29307         d.perm[i] &= nelt - 1;
29308       d.op0 = op1;
29309       d.one_vector_p = true;
29310       break;
29311
29312     case 1:
29313       d.op1 = op0;
29314       d.one_vector_p = true;
29315       break;
29316     }
29317
29318   return arm_expand_vec_perm_const_1 (&d);
29319 }
29320
29321 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29322
29323 static bool
29324 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29325 {
29326   struct expand_vec_perm_d d;
29327   unsigned int i, nelt, which;
29328   bool ret;
29329
29330   d.vmode = vmode;
29331   d.testing_p = true;
29332   d.perm.safe_splice (sel);
29333
29334   /* Categorize the set of elements in the selector.  */
29335   nelt = GET_MODE_NUNITS (d.vmode);
29336   for (i = which = 0; i < nelt; ++i)
29337     {
29338       unsigned int e = d.perm[i];
29339       gcc_assert (e < 2 * nelt);
29340       which |= (e < nelt ? 1 : 2);
29341     }
29342
29343   /* For all elements from second vector, fold the elements to first.  */
29344   if (which == 2)
29345     for (i = 0; i < nelt; ++i)
29346       d.perm[i] -= nelt;
29347
29348   /* Check whether the mask can be applied to the vector type.  */
29349   d.one_vector_p = (which != 3);
29350
29351   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29352   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29353   if (!d.one_vector_p)
29354     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29355
29356   start_sequence ();
29357   ret = arm_expand_vec_perm_const_1 (&d);
29358   end_sequence ();
29359
29360   return ret;
29361 }
29362
29363 bool
29364 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29365 {
29366   /* If we are soft float and we do not have ldrd
29367      then all auto increment forms are ok.  */
29368   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29369     return true;
29370
29371   switch (code)
29372     {
29373       /* Post increment and Pre Decrement are supported for all
29374          instruction forms except for vector forms.  */
29375     case ARM_POST_INC:
29376     case ARM_PRE_DEC:
29377       if (VECTOR_MODE_P (mode))
29378         {
29379           if (code != ARM_PRE_DEC)
29380             return true;
29381           else
29382             return false;
29383         }
29384
29385       return true;
29386
29387     case ARM_POST_DEC:
29388     case ARM_PRE_INC:
29389       /* Without LDRD and mode size greater than
29390          word size, there is no point in auto-incrementing
29391          because ldm and stm will not have these forms.  */
29392       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29393         return false;
29394
29395       /* Vector and floating point modes do not support
29396          these auto increment forms.  */
29397       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29398         return false;
29399
29400       return true;
29401
29402     default:
29403       return false;
29404
29405     }
29406
29407   return false;
29408 }
29409
29410 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29411    on ARM, since we know that shifts by negative amounts are no-ops.
29412    Additionally, the default expansion code is not available or suitable
29413    for post-reload insn splits (this can occur when the register allocator
29414    chooses not to do a shift in NEON).
29415
29416    This function is used in both initial expand and post-reload splits, and
29417    handles all kinds of 64-bit shifts.
29418
29419    Input requirements:
29420     - It is safe for the input and output to be the same register, but
29421       early-clobber rules apply for the shift amount and scratch registers.
29422     - Shift by register requires both scratch registers.  In all other cases
29423       the scratch registers may be NULL.
29424     - Ashiftrt by a register also clobbers the CC register.  */
29425 void
29426 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29427                                rtx amount, rtx scratch1, rtx scratch2)
29428 {
29429   rtx out_high = gen_highpart (SImode, out);
29430   rtx out_low = gen_lowpart (SImode, out);
29431   rtx in_high = gen_highpart (SImode, in);
29432   rtx in_low = gen_lowpart (SImode, in);
29433
29434   /* Terminology:
29435         in = the register pair containing the input value.
29436         out = the destination register pair.
29437         up = the high- or low-part of each pair.
29438         down = the opposite part to "up".
29439      In a shift, we can consider bits to shift from "up"-stream to
29440      "down"-stream, so in a left-shift "up" is the low-part and "down"
29441      is the high-part of each register pair.  */
29442
29443   rtx out_up   = code == ASHIFT ? out_low : out_high;
29444   rtx out_down = code == ASHIFT ? out_high : out_low;
29445   rtx in_up   = code == ASHIFT ? in_low : in_high;
29446   rtx in_down = code == ASHIFT ? in_high : in_low;
29447
29448   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29449   gcc_assert (out
29450               && (REG_P (out) || GET_CODE (out) == SUBREG)
29451               && GET_MODE (out) == DImode);
29452   gcc_assert (in
29453               && (REG_P (in) || GET_CODE (in) == SUBREG)
29454               && GET_MODE (in) == DImode);
29455   gcc_assert (amount
29456               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29457                    && GET_MODE (amount) == SImode)
29458                   || CONST_INT_P (amount)));
29459   gcc_assert (scratch1 == NULL
29460               || (GET_CODE (scratch1) == SCRATCH)
29461               || (GET_MODE (scratch1) == SImode
29462                   && REG_P (scratch1)));
29463   gcc_assert (scratch2 == NULL
29464               || (GET_CODE (scratch2) == SCRATCH)
29465               || (GET_MODE (scratch2) == SImode
29466                   && REG_P (scratch2)));
29467   gcc_assert (!REG_P (out) || !REG_P (amount)
29468               || !HARD_REGISTER_P (out)
29469               || (REGNO (out) != REGNO (amount)
29470                   && REGNO (out) + 1 != REGNO (amount)));
29471
29472   /* Macros to make following code more readable.  */
29473   #define SUB_32(DEST,SRC) \
29474             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29475   #define RSB_32(DEST,SRC) \
29476             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29477   #define SUB_S_32(DEST,SRC) \
29478             gen_addsi3_compare0 ((DEST), (SRC), \
29479                                  GEN_INT (-32))
29480   #define SET(DEST,SRC) \
29481             gen_rtx_SET ((DEST), (SRC))
29482   #define SHIFT(CODE,SRC,AMOUNT) \
29483             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29484   #define LSHIFT(CODE,SRC,AMOUNT) \
29485             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29486                             SImode, (SRC), (AMOUNT))
29487   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29488             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29489                             SImode, (SRC), (AMOUNT))
29490   #define ORR(A,B) \
29491             gen_rtx_IOR (SImode, (A), (B))
29492   #define BRANCH(COND,LABEL) \
29493             gen_arm_cond_branch ((LABEL), \
29494                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29495                                                    const0_rtx), \
29496                                  cc_reg)
29497
29498   /* Shifts by register and shifts by constant are handled separately.  */
29499   if (CONST_INT_P (amount))
29500     {
29501       /* We have a shift-by-constant.  */
29502
29503       /* First, handle out-of-range shift amounts.
29504          In both cases we try to match the result an ARM instruction in a
29505          shift-by-register would give.  This helps reduce execution
29506          differences between optimization levels, but it won't stop other
29507          parts of the compiler doing different things.  This is "undefined
29508          behavior, in any case.  */
29509       if (INTVAL (amount) <= 0)
29510         emit_insn (gen_movdi (out, in));
29511       else if (INTVAL (amount) >= 64)
29512         {
29513           if (code == ASHIFTRT)
29514             {
29515               rtx const31_rtx = GEN_INT (31);
29516               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29517               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29518             }
29519           else
29520             emit_insn (gen_movdi (out, const0_rtx));
29521         }
29522
29523       /* Now handle valid shifts. */
29524       else if (INTVAL (amount) < 32)
29525         {
29526           /* Shifts by a constant less than 32.  */
29527           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29528
29529           /* Clearing the out register in DImode first avoids lots
29530              of spilling and results in less stack usage.
29531              Later this redundant insn is completely removed.
29532              Do that only if "in" and "out" are different registers.  */
29533           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29534             emit_insn (SET (out, const0_rtx));
29535           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29536           emit_insn (SET (out_down,
29537                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29538                                out_down)));
29539           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29540         }
29541       else
29542         {
29543           /* Shifts by a constant greater than 31.  */
29544           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29545
29546           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29547             emit_insn (SET (out, const0_rtx));
29548           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29549           if (code == ASHIFTRT)
29550             emit_insn (gen_ashrsi3 (out_up, in_up,
29551                                     GEN_INT (31)));
29552           else
29553             emit_insn (SET (out_up, const0_rtx));
29554         }
29555     }
29556   else
29557     {
29558       /* We have a shift-by-register.  */
29559       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29560
29561       /* This alternative requires the scratch registers.  */
29562       gcc_assert (scratch1 && REG_P (scratch1));
29563       gcc_assert (scratch2 && REG_P (scratch2));
29564
29565       /* We will need the values "amount-32" and "32-amount" later.
29566          Swapping them around now allows the later code to be more general. */
29567       switch (code)
29568         {
29569         case ASHIFT:
29570           emit_insn (SUB_32 (scratch1, amount));
29571           emit_insn (RSB_32 (scratch2, amount));
29572           break;
29573         case ASHIFTRT:
29574           emit_insn (RSB_32 (scratch1, amount));
29575           /* Also set CC = amount > 32.  */
29576           emit_insn (SUB_S_32 (scratch2, amount));
29577           break;
29578         case LSHIFTRT:
29579           emit_insn (RSB_32 (scratch1, amount));
29580           emit_insn (SUB_32 (scratch2, amount));
29581           break;
29582         default:
29583           gcc_unreachable ();
29584         }
29585
29586       /* Emit code like this:
29587
29588          arithmetic-left:
29589             out_down = in_down << amount;
29590             out_down = (in_up << (amount - 32)) | out_down;
29591             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29592             out_up = in_up << amount;
29593
29594          arithmetic-right:
29595             out_down = in_down >> amount;
29596             out_down = (in_up << (32 - amount)) | out_down;
29597             if (amount < 32)
29598               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29599             out_up = in_up << amount;
29600
29601          logical-right:
29602             out_down = in_down >> amount;
29603             out_down = (in_up << (32 - amount)) | out_down;
29604             if (amount < 32)
29605               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29606             out_up = in_up << amount;
29607
29608           The ARM and Thumb2 variants are the same but implemented slightly
29609           differently.  If this were only called during expand we could just
29610           use the Thumb2 case and let combine do the right thing, but this
29611           can also be called from post-reload splitters.  */
29612
29613       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29614
29615       if (!TARGET_THUMB2)
29616         {
29617           /* Emit code for ARM mode.  */
29618           emit_insn (SET (out_down,
29619                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29620           if (code == ASHIFTRT)
29621             {
29622               rtx_code_label *done_label = gen_label_rtx ();
29623               emit_jump_insn (BRANCH (LT, done_label));
29624               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29625                                              out_down)));
29626               emit_label (done_label);
29627             }
29628           else
29629             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29630                                            out_down)));
29631         }
29632       else
29633         {
29634           /* Emit code for Thumb2 mode.
29635              Thumb2 can't do shift and or in one insn.  */
29636           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29637           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29638
29639           if (code == ASHIFTRT)
29640             {
29641               rtx_code_label *done_label = gen_label_rtx ();
29642               emit_jump_insn (BRANCH (LT, done_label));
29643               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29644               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29645               emit_label (done_label);
29646             }
29647           else
29648             {
29649               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29650               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29651             }
29652         }
29653
29654       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29655     }
29656
29657   #undef SUB_32
29658   #undef RSB_32
29659   #undef SUB_S_32
29660   #undef SET
29661   #undef SHIFT
29662   #undef LSHIFT
29663   #undef REV_LSHIFT
29664   #undef ORR
29665   #undef BRANCH
29666 }
29667
29668 /* Returns true if the pattern is a valid symbolic address, which is either a
29669    symbol_ref or (symbol_ref + addend).
29670
29671    According to the ARM ELF ABI, the initial addend of REL-type relocations
29672    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29673    literal field of the instruction as a 16-bit signed value in the range
29674    -32768 <= A < 32768.  */
29675
29676 bool
29677 arm_valid_symbolic_address_p (rtx addr)
29678 {
29679   rtx xop0, xop1 = NULL_RTX;
29680   rtx tmp = addr;
29681
29682   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29683     return true;
29684
29685   /* (const (plus: symbol_ref const_int))  */
29686   if (GET_CODE (addr) == CONST)
29687     tmp = XEXP (addr, 0);
29688
29689   if (GET_CODE (tmp) == PLUS)
29690     {
29691       xop0 = XEXP (tmp, 0);
29692       xop1 = XEXP (tmp, 1);
29693
29694       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29695           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29696     }
29697
29698   return false;
29699 }
29700
29701 /* Returns true if a valid comparison operation and makes
29702    the operands in a form that is valid.  */
29703 bool
29704 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29705 {
29706   enum rtx_code code = GET_CODE (*comparison);
29707   int code_int;
29708   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29709     ? GET_MODE (*op2) : GET_MODE (*op1);
29710
29711   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29712
29713   if (code == UNEQ || code == LTGT)
29714     return false;
29715
29716   code_int = (int)code;
29717   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29718   PUT_CODE (*comparison, (enum rtx_code)code_int);
29719
29720   switch (mode)
29721     {
29722     case E_SImode:
29723       if (!arm_add_operand (*op1, mode))
29724         *op1 = force_reg (mode, *op1);
29725       if (!arm_add_operand (*op2, mode))
29726         *op2 = force_reg (mode, *op2);
29727       return true;
29728
29729     case E_DImode:
29730       if (!cmpdi_operand (*op1, mode))
29731         *op1 = force_reg (mode, *op1);
29732       if (!cmpdi_operand (*op2, mode))
29733         *op2 = force_reg (mode, *op2);
29734       return true;
29735
29736     case E_HFmode:
29737       if (!TARGET_VFP_FP16INST)
29738         break;
29739       /* FP16 comparisons are done in SF mode.  */
29740       mode = SFmode;
29741       *op1 = convert_to_mode (mode, *op1, 1);
29742       *op2 = convert_to_mode (mode, *op2, 1);
29743       /* Fall through.  */
29744     case E_SFmode:
29745     case E_DFmode:
29746       if (!vfp_compare_operand (*op1, mode))
29747         *op1 = force_reg (mode, *op1);
29748       if (!vfp_compare_operand (*op2, mode))
29749         *op2 = force_reg (mode, *op2);
29750       return true;
29751     default:
29752       break;
29753     }
29754
29755   return false;
29756
29757 }
29758
29759 /* Maximum number of instructions to set block of memory.  */
29760 static int
29761 arm_block_set_max_insns (void)
29762 {
29763   if (optimize_function_for_size_p (cfun))
29764     return 4;
29765   else
29766     return current_tune->max_insns_inline_memset;
29767 }
29768
29769 /* Return TRUE if it's profitable to set block of memory for
29770    non-vectorized case.  VAL is the value to set the memory
29771    with.  LENGTH is the number of bytes to set.  ALIGN is the
29772    alignment of the destination memory in bytes.  UNALIGNED_P
29773    is TRUE if we can only set the memory with instructions
29774    meeting alignment requirements.  USE_STRD_P is TRUE if we
29775    can use strd to set the memory.  */
29776 static bool
29777 arm_block_set_non_vect_profit_p (rtx val,
29778                                  unsigned HOST_WIDE_INT length,
29779                                  unsigned HOST_WIDE_INT align,
29780                                  bool unaligned_p, bool use_strd_p)
29781 {
29782   int num = 0;
29783   /* For leftovers in bytes of 0-7, we can set the memory block using
29784      strb/strh/str with minimum instruction number.  */
29785   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29786
29787   if (unaligned_p)
29788     {
29789       num = arm_const_inline_cost (SET, val);
29790       num += length / align + length % align;
29791     }
29792   else if (use_strd_p)
29793     {
29794       num = arm_const_double_inline_cost (val);
29795       num += (length >> 3) + leftover[length & 7];
29796     }
29797   else
29798     {
29799       num = arm_const_inline_cost (SET, val);
29800       num += (length >> 2) + leftover[length & 3];
29801     }
29802
29803   /* We may be able to combine last pair STRH/STRB into a single STR
29804      by shifting one byte back.  */
29805   if (unaligned_access && length > 3 && (length & 3) == 3)
29806     num--;
29807
29808   return (num <= arm_block_set_max_insns ());
29809 }
29810
29811 /* Return TRUE if it's profitable to set block of memory for
29812    vectorized case.  LENGTH is the number of bytes to set.
29813    ALIGN is the alignment of destination memory in bytes.
29814    MODE is the vector mode used to set the memory.  */
29815 static bool
29816 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29817                              unsigned HOST_WIDE_INT align,
29818                              machine_mode mode)
29819 {
29820   int num;
29821   bool unaligned_p = ((align & 3) != 0);
29822   unsigned int nelt = GET_MODE_NUNITS (mode);
29823
29824   /* Instruction loading constant value.  */
29825   num = 1;
29826   /* Instructions storing the memory.  */
29827   num += (length + nelt - 1) / nelt;
29828   /* Instructions adjusting the address expression.  Only need to
29829      adjust address expression if it's 4 bytes aligned and bytes
29830      leftover can only be stored by mis-aligned store instruction.  */
29831   if (!unaligned_p && (length & 3) != 0)
29832     num++;
29833
29834   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29835   if (!unaligned_p && mode == V16QImode)
29836     num--;
29837
29838   return (num <= arm_block_set_max_insns ());
29839 }
29840
29841 /* Set a block of memory using vectorization instructions for the
29842    unaligned case.  We fill the first LENGTH bytes of the memory
29843    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29844    the alignment requirement of memory.  Return TRUE if succeeded.  */
29845 static bool
29846 arm_block_set_unaligned_vect (rtx dstbase,
29847                               unsigned HOST_WIDE_INT length,
29848                               unsigned HOST_WIDE_INT value,
29849                               unsigned HOST_WIDE_INT align)
29850 {
29851   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29852   rtx dst, mem;
29853   rtx val_vec, reg;
29854   rtx (*gen_func) (rtx, rtx);
29855   machine_mode mode;
29856   unsigned HOST_WIDE_INT v = value;
29857   unsigned int offset = 0;
29858   gcc_assert ((align & 0x3) != 0);
29859   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29860   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29861   if (length >= nelt_v16)
29862     {
29863       mode = V16QImode;
29864       gen_func = gen_movmisalignv16qi;
29865     }
29866   else
29867     {
29868       mode = V8QImode;
29869       gen_func = gen_movmisalignv8qi;
29870     }
29871   nelt_mode = GET_MODE_NUNITS (mode);
29872   gcc_assert (length >= nelt_mode);
29873   /* Skip if it isn't profitable.  */
29874   if (!arm_block_set_vect_profit_p (length, align, mode))
29875     return false;
29876
29877   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29878   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29879
29880   v = sext_hwi (v, BITS_PER_WORD);
29881
29882   reg = gen_reg_rtx (mode);
29883   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29884   /* Emit instruction loading the constant value.  */
29885   emit_move_insn (reg, val_vec);
29886
29887   /* Handle nelt_mode bytes in a vector.  */
29888   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29889     {
29890       emit_insn ((*gen_func) (mem, reg));
29891       if (i + 2 * nelt_mode <= length)
29892         {
29893           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29894           offset += nelt_mode;
29895           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29896         }
29897     }
29898
29899   /* If there are not less than nelt_v8 bytes leftover, we must be in
29900      V16QI mode.  */
29901   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29902
29903   /* Handle (8, 16) bytes leftover.  */
29904   if (i + nelt_v8 < length)
29905     {
29906       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29907       offset += length - i;
29908       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29909
29910       /* We are shifting bytes back, set the alignment accordingly.  */
29911       if ((length & 1) != 0 && align >= 2)
29912         set_mem_align (mem, BITS_PER_UNIT);
29913
29914       emit_insn (gen_movmisalignv16qi (mem, reg));
29915     }
29916   /* Handle (0, 8] bytes leftover.  */
29917   else if (i < length && i + nelt_v8 >= length)
29918     {
29919       if (mode == V16QImode)
29920         reg = gen_lowpart (V8QImode, reg);
29921
29922       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29923                                               + (nelt_mode - nelt_v8))));
29924       offset += (length - i) + (nelt_mode - nelt_v8);
29925       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29926
29927       /* We are shifting bytes back, set the alignment accordingly.  */
29928       if ((length & 1) != 0 && align >= 2)
29929         set_mem_align (mem, BITS_PER_UNIT);
29930
29931       emit_insn (gen_movmisalignv8qi (mem, reg));
29932     }
29933
29934   return true;
29935 }
29936
29937 /* Set a block of memory using vectorization instructions for the
29938    aligned case.  We fill the first LENGTH bytes of the memory area
29939    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29940    alignment requirement of memory.  Return TRUE if succeeded.  */
29941 static bool
29942 arm_block_set_aligned_vect (rtx dstbase,
29943                             unsigned HOST_WIDE_INT length,
29944                             unsigned HOST_WIDE_INT value,
29945                             unsigned HOST_WIDE_INT align)
29946 {
29947   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
29948   rtx dst, addr, mem;
29949   rtx val_vec, reg;
29950   machine_mode mode;
29951   unsigned HOST_WIDE_INT v = value;
29952   unsigned int offset = 0;
29953
29954   gcc_assert ((align & 0x3) == 0);
29955   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29956   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29957   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29958     mode = V16QImode;
29959   else
29960     mode = V8QImode;
29961
29962   nelt_mode = GET_MODE_NUNITS (mode);
29963   gcc_assert (length >= nelt_mode);
29964   /* Skip if it isn't profitable.  */
29965   if (!arm_block_set_vect_profit_p (length, align, mode))
29966     return false;
29967
29968   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29969
29970   v = sext_hwi (v, BITS_PER_WORD);
29971
29972   reg = gen_reg_rtx (mode);
29973   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29974   /* Emit instruction loading the constant value.  */
29975   emit_move_insn (reg, val_vec);
29976
29977   i = 0;
29978   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29979   if (mode == V16QImode)
29980     {
29981       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29982       emit_insn (gen_movmisalignv16qi (mem, reg));
29983       i += nelt_mode;
29984       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29985       if (i + nelt_v8 < length && i + nelt_v16 > length)
29986         {
29987           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29988           offset += length - nelt_mode;
29989           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29990           /* We are shifting bytes back, set the alignment accordingly.  */
29991           if ((length & 0x3) == 0)
29992             set_mem_align (mem, BITS_PER_UNIT * 4);
29993           else if ((length & 0x1) == 0)
29994             set_mem_align (mem, BITS_PER_UNIT * 2);
29995           else
29996             set_mem_align (mem, BITS_PER_UNIT);
29997
29998           emit_insn (gen_movmisalignv16qi (mem, reg));
29999           return true;
30000         }
30001       /* Fall through for bytes leftover.  */
30002       mode = V8QImode;
30003       nelt_mode = GET_MODE_NUNITS (mode);
30004       reg = gen_lowpart (V8QImode, reg);
30005     }
30006
30007   /* Handle 8 bytes in a vector.  */
30008   for (; (i + nelt_mode <= length); i += nelt_mode)
30009     {
30010       addr = plus_constant (Pmode, dst, i);
30011       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30012       emit_move_insn (mem, reg);
30013     }
30014
30015   /* Handle single word leftover by shifting 4 bytes back.  We can
30016      use aligned access for this case.  */
30017   if (i + UNITS_PER_WORD == length)
30018     {
30019       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30020       offset += i - UNITS_PER_WORD;
30021       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30022       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30023       if (align > UNITS_PER_WORD)
30024         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30025
30026       emit_move_insn (mem, reg);
30027     }
30028   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30029      We have to use unaligned access for this case.  */
30030   else if (i < length)
30031     {
30032       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30033       offset += length - nelt_mode;
30034       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30035       /* We are shifting bytes back, set the alignment accordingly.  */
30036       if ((length & 1) == 0)
30037         set_mem_align (mem, BITS_PER_UNIT * 2);
30038       else
30039         set_mem_align (mem, BITS_PER_UNIT);
30040
30041       emit_insn (gen_movmisalignv8qi (mem, reg));
30042     }
30043
30044   return true;
30045 }
30046
30047 /* Set a block of memory using plain strh/strb instructions, only
30048    using instructions allowed by ALIGN on processor.  We fill the
30049    first LENGTH bytes of the memory area starting from DSTBASE
30050    with byte constant VALUE.  ALIGN is the alignment requirement
30051    of memory.  */
30052 static bool
30053 arm_block_set_unaligned_non_vect (rtx dstbase,
30054                                   unsigned HOST_WIDE_INT length,
30055                                   unsigned HOST_WIDE_INT value,
30056                                   unsigned HOST_WIDE_INT align)
30057 {
30058   unsigned int i;
30059   rtx dst, addr, mem;
30060   rtx val_exp, val_reg, reg;
30061   machine_mode mode;
30062   HOST_WIDE_INT v = value;
30063
30064   gcc_assert (align == 1 || align == 2);
30065
30066   if (align == 2)
30067     v |= (value << BITS_PER_UNIT);
30068
30069   v = sext_hwi (v, BITS_PER_WORD);
30070   val_exp = GEN_INT (v);
30071   /* Skip if it isn't profitable.  */
30072   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30073                                         align, true, false))
30074     return false;
30075
30076   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30077   mode = (align == 2 ? HImode : QImode);
30078   val_reg = force_reg (SImode, val_exp);
30079   reg = gen_lowpart (mode, val_reg);
30080
30081   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30082     {
30083       addr = plus_constant (Pmode, dst, i);
30084       mem = adjust_automodify_address (dstbase, mode, addr, i);
30085       emit_move_insn (mem, reg);
30086     }
30087
30088   /* Handle single byte leftover.  */
30089   if (i + 1 == length)
30090     {
30091       reg = gen_lowpart (QImode, val_reg);
30092       addr = plus_constant (Pmode, dst, i);
30093       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30094       emit_move_insn (mem, reg);
30095       i++;
30096     }
30097
30098   gcc_assert (i == length);
30099   return true;
30100 }
30101
30102 /* Set a block of memory using plain strd/str/strh/strb instructions,
30103    to permit unaligned copies on processors which support unaligned
30104    semantics for those instructions.  We fill the first LENGTH bytes
30105    of the memory area starting from DSTBASE with byte constant VALUE.
30106    ALIGN is the alignment requirement of memory.  */
30107 static bool
30108 arm_block_set_aligned_non_vect (rtx dstbase,
30109                                 unsigned HOST_WIDE_INT length,
30110                                 unsigned HOST_WIDE_INT value,
30111                                 unsigned HOST_WIDE_INT align)
30112 {
30113   unsigned int i;
30114   rtx dst, addr, mem;
30115   rtx val_exp, val_reg, reg;
30116   unsigned HOST_WIDE_INT v;
30117   bool use_strd_p;
30118
30119   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30120                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30121
30122   v = (value | (value << 8) | (value << 16) | (value << 24));
30123   if (length < UNITS_PER_WORD)
30124     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30125
30126   if (use_strd_p)
30127     v |= (v << BITS_PER_WORD);
30128   else
30129     v = sext_hwi (v, BITS_PER_WORD);
30130
30131   val_exp = GEN_INT (v);
30132   /* Skip if it isn't profitable.  */
30133   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30134                                         align, false, use_strd_p))
30135     {
30136       if (!use_strd_p)
30137         return false;
30138
30139       /* Try without strd.  */
30140       v = (v >> BITS_PER_WORD);
30141       v = sext_hwi (v, BITS_PER_WORD);
30142       val_exp = GEN_INT (v);
30143       use_strd_p = false;
30144       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30145                                             align, false, use_strd_p))
30146         return false;
30147     }
30148
30149   i = 0;
30150   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30151   /* Handle double words using strd if possible.  */
30152   if (use_strd_p)
30153     {
30154       val_reg = force_reg (DImode, val_exp);
30155       reg = val_reg;
30156       for (; (i + 8 <= length); i += 8)
30157         {
30158           addr = plus_constant (Pmode, dst, i);
30159           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30160           emit_move_insn (mem, reg);
30161         }
30162     }
30163   else
30164     val_reg = force_reg (SImode, val_exp);
30165
30166   /* Handle words.  */
30167   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30168   for (; (i + 4 <= length); i += 4)
30169     {
30170       addr = plus_constant (Pmode, dst, i);
30171       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30172       if ((align & 3) == 0)
30173         emit_move_insn (mem, reg);
30174       else
30175         emit_insn (gen_unaligned_storesi (mem, reg));
30176     }
30177
30178   /* Merge last pair of STRH and STRB into a STR if possible.  */
30179   if (unaligned_access && i > 0 && (i + 3) == length)
30180     {
30181       addr = plus_constant (Pmode, dst, i - 1);
30182       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30183       /* We are shifting one byte back, set the alignment accordingly.  */
30184       if ((align & 1) == 0)
30185         set_mem_align (mem, BITS_PER_UNIT);
30186
30187       /* Most likely this is an unaligned access, and we can't tell at
30188          compilation time.  */
30189       emit_insn (gen_unaligned_storesi (mem, reg));
30190       return true;
30191     }
30192
30193   /* Handle half word leftover.  */
30194   if (i + 2 <= length)
30195     {
30196       reg = gen_lowpart (HImode, val_reg);
30197       addr = plus_constant (Pmode, dst, i);
30198       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30199       if ((align & 1) == 0)
30200         emit_move_insn (mem, reg);
30201       else
30202         emit_insn (gen_unaligned_storehi (mem, reg));
30203
30204       i += 2;
30205     }
30206
30207   /* Handle single byte leftover.  */
30208   if (i + 1 == length)
30209     {
30210       reg = gen_lowpart (QImode, val_reg);
30211       addr = plus_constant (Pmode, dst, i);
30212       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30213       emit_move_insn (mem, reg);
30214     }
30215
30216   return true;
30217 }
30218
30219 /* Set a block of memory using vectorization instructions for both
30220    aligned and unaligned cases.  We fill the first LENGTH bytes of
30221    the memory area starting from DSTBASE with byte constant VALUE.
30222    ALIGN is the alignment requirement of memory.  */
30223 static bool
30224 arm_block_set_vect (rtx dstbase,
30225                     unsigned HOST_WIDE_INT length,
30226                     unsigned HOST_WIDE_INT value,
30227                     unsigned HOST_WIDE_INT align)
30228 {
30229   /* Check whether we need to use unaligned store instruction.  */
30230   if (((align & 3) != 0 || (length & 3) != 0)
30231       /* Check whether unaligned store instruction is available.  */
30232       && (!unaligned_access || BYTES_BIG_ENDIAN))
30233     return false;
30234
30235   if ((align & 3) == 0)
30236     return arm_block_set_aligned_vect (dstbase, length, value, align);
30237   else
30238     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30239 }
30240
30241 /* Expand string store operation.  Firstly we try to do that by using
30242    vectorization instructions, then try with ARM unaligned access and
30243    double-word store if profitable.  OPERANDS[0] is the destination,
30244    OPERANDS[1] is the number of bytes, operands[2] is the value to
30245    initialize the memory, OPERANDS[3] is the known alignment of the
30246    destination.  */
30247 bool
30248 arm_gen_setmem (rtx *operands)
30249 {
30250   rtx dstbase = operands[0];
30251   unsigned HOST_WIDE_INT length;
30252   unsigned HOST_WIDE_INT value;
30253   unsigned HOST_WIDE_INT align;
30254
30255   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30256     return false;
30257
30258   length = UINTVAL (operands[1]);
30259   if (length > 64)
30260     return false;
30261
30262   value = (UINTVAL (operands[2]) & 0xFF);
30263   align = UINTVAL (operands[3]);
30264   if (TARGET_NEON && length >= 8
30265       && current_tune->string_ops_prefer_neon
30266       && arm_block_set_vect (dstbase, length, value, align))
30267     return true;
30268
30269   if (!unaligned_access && (align & 3) != 0)
30270     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30271
30272   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30273 }
30274
30275
30276 static bool
30277 arm_macro_fusion_p (void)
30278 {
30279   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30280 }
30281
30282 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30283    for MOVW / MOVT macro fusion.  */
30284
30285 static bool
30286 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30287 {
30288   /* We are trying to fuse
30289      movw imm / movt imm
30290     instructions as a group that gets scheduled together.  */
30291
30292   rtx set_dest = SET_DEST (curr_set);
30293
30294   if (GET_MODE (set_dest) != SImode)
30295     return false;
30296
30297   /* We are trying to match:
30298      prev (movw)  == (set (reg r0) (const_int imm16))
30299      curr (movt) == (set (zero_extract (reg r0)
30300                                         (const_int 16)
30301                                         (const_int 16))
30302                           (const_int imm16_1))
30303      or
30304      prev (movw) == (set (reg r1)
30305                           (high (symbol_ref ("SYM"))))
30306     curr (movt) == (set (reg r0)
30307                         (lo_sum (reg r1)
30308                                 (symbol_ref ("SYM"))))  */
30309
30310     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30311       {
30312         if (CONST_INT_P (SET_SRC (curr_set))
30313             && CONST_INT_P (SET_SRC (prev_set))
30314             && REG_P (XEXP (set_dest, 0))
30315             && REG_P (SET_DEST (prev_set))
30316             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30317           return true;
30318
30319       }
30320     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30321              && REG_P (SET_DEST (curr_set))
30322              && REG_P (SET_DEST (prev_set))
30323              && GET_CODE (SET_SRC (prev_set)) == HIGH
30324              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30325       return true;
30326
30327   return false;
30328 }
30329
30330 static bool
30331 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30332 {
30333   rtx prev_set = single_set (prev);
30334   rtx curr_set = single_set (curr);
30335
30336   if (!prev_set
30337       || !curr_set)
30338     return false;
30339
30340   if (any_condjump_p (curr))
30341     return false;
30342
30343   if (!arm_macro_fusion_p ())
30344     return false;
30345
30346   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30347       && aarch_crypto_can_dual_issue (prev, curr))
30348     return true;
30349
30350   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30351       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30352     return true;
30353
30354   return false;
30355 }
30356
30357 /* Return true iff the instruction fusion described by OP is enabled.  */
30358 bool
30359 arm_fusion_enabled_p (tune_params::fuse_ops op)
30360 {
30361   return current_tune->fusible_ops & op;
30362 }
30363
30364 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30365    scheduled for speculative execution.  Reject the long-running division
30366    and square-root instructions.  */
30367
30368 static bool
30369 arm_sched_can_speculate_insn (rtx_insn *insn)
30370 {
30371   switch (get_attr_type (insn))
30372     {
30373       case TYPE_SDIV:
30374       case TYPE_UDIV:
30375       case TYPE_FDIVS:
30376       case TYPE_FDIVD:
30377       case TYPE_FSQRTS:
30378       case TYPE_FSQRTD:
30379       case TYPE_NEON_FP_SQRT_S:
30380       case TYPE_NEON_FP_SQRT_D:
30381       case TYPE_NEON_FP_SQRT_S_Q:
30382       case TYPE_NEON_FP_SQRT_D_Q:
30383       case TYPE_NEON_FP_DIV_S:
30384       case TYPE_NEON_FP_DIV_D:
30385       case TYPE_NEON_FP_DIV_S_Q:
30386       case TYPE_NEON_FP_DIV_D_Q:
30387         return false;
30388       default:
30389         return true;
30390     }
30391 }
30392
30393 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30394
30395 static unsigned HOST_WIDE_INT
30396 arm_asan_shadow_offset (void)
30397 {
30398   return HOST_WIDE_INT_1U << 29;
30399 }
30400
30401
30402 /* This is a temporary fix for PR60655.  Ideally we need
30403    to handle most of these cases in the generic part but
30404    currently we reject minus (..) (sym_ref).  We try to
30405    ameliorate the case with minus (sym_ref1) (sym_ref2)
30406    where they are in the same section.  */
30407
30408 static bool
30409 arm_const_not_ok_for_debug_p (rtx p)
30410 {
30411   tree decl_op0 = NULL;
30412   tree decl_op1 = NULL;
30413
30414   if (GET_CODE (p) == UNSPEC)
30415     return true;
30416   if (GET_CODE (p) == MINUS)
30417     {
30418       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30419         {
30420           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30421           if (decl_op1
30422               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30423               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30424             {
30425               if ((VAR_P (decl_op1)
30426                    || TREE_CODE (decl_op1) == CONST_DECL)
30427                   && (VAR_P (decl_op0)
30428                       || TREE_CODE (decl_op0) == CONST_DECL))
30429                 return (get_variable_section (decl_op1, false)
30430                         != get_variable_section (decl_op0, false));
30431
30432               if (TREE_CODE (decl_op1) == LABEL_DECL
30433                   && TREE_CODE (decl_op0) == LABEL_DECL)
30434                 return (DECL_CONTEXT (decl_op1)
30435                         != DECL_CONTEXT (decl_op0));
30436             }
30437
30438           return true;
30439         }
30440     }
30441
30442   return false;
30443 }
30444
30445 /* return TRUE if x is a reference to a value in a constant pool */
30446 extern bool
30447 arm_is_constant_pool_ref (rtx x)
30448 {
30449   return (MEM_P (x)
30450           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30451           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30452 }
30453
30454 /* Remember the last target of arm_set_current_function.  */
30455 static GTY(()) tree arm_previous_fndecl;
30456
30457 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30458
30459 void
30460 save_restore_target_globals (tree new_tree)
30461 {
30462   /* If we have a previous state, use it.  */
30463   if (TREE_TARGET_GLOBALS (new_tree))
30464     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30465   else if (new_tree == target_option_default_node)
30466     restore_target_globals (&default_target_globals);
30467   else
30468     {
30469       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30470       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30471     }
30472
30473   arm_option_params_internal ();
30474 }
30475
30476 /* Invalidate arm_previous_fndecl.  */
30477
30478 void
30479 arm_reset_previous_fndecl (void)
30480 {
30481   arm_previous_fndecl = NULL_TREE;
30482 }
30483
30484 /* Establish appropriate back-end context for processing the function
30485    FNDECL.  The argument might be NULL to indicate processing at top
30486    level, outside of any function scope.  */
30487
30488 static void
30489 arm_set_current_function (tree fndecl)
30490 {
30491   if (!fndecl || fndecl == arm_previous_fndecl)
30492     return;
30493
30494   tree old_tree = (arm_previous_fndecl
30495                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30496                    : NULL_TREE);
30497
30498   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30499
30500   /* If current function has no attributes but previous one did,
30501      use the default node.  */
30502   if (! new_tree && old_tree)
30503     new_tree = target_option_default_node;
30504
30505   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30506      the default have been handled by save_restore_target_globals from
30507      arm_pragma_target_parse.  */
30508   if (old_tree == new_tree)
30509     return;
30510
30511   arm_previous_fndecl = fndecl;
30512
30513   /* First set the target options.  */
30514   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30515
30516   save_restore_target_globals (new_tree);
30517 }
30518
30519 /* Implement TARGET_OPTION_PRINT.  */
30520
30521 static void
30522 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30523 {
30524   int flags = ptr->x_target_flags;
30525   const char *fpu_name;
30526
30527   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30528               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30529
30530   fprintf (file, "%*sselected isa %s\n", indent, "",
30531            TARGET_THUMB2_P (flags) ? "thumb2" :
30532            TARGET_THUMB_P (flags) ? "thumb1" :
30533            "arm");
30534
30535   if (ptr->x_arm_arch_string)
30536     fprintf (file, "%*sselected architecture %s\n", indent, "",
30537              ptr->x_arm_arch_string);
30538
30539   if (ptr->x_arm_cpu_string)
30540     fprintf (file, "%*sselected CPU %s\n", indent, "",
30541              ptr->x_arm_cpu_string);
30542
30543   if (ptr->x_arm_tune_string)
30544     fprintf (file, "%*sselected tune %s\n", indent, "",
30545              ptr->x_arm_tune_string);
30546
30547   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30548 }
30549
30550 /* Hook to determine if one function can safely inline another.  */
30551
30552 static bool
30553 arm_can_inline_p (tree caller, tree callee)
30554 {
30555   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30556   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30557   bool can_inline = true;
30558
30559   struct cl_target_option *caller_opts
30560         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30561                                            : target_option_default_node);
30562
30563   struct cl_target_option *callee_opts
30564         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30565                                            : target_option_default_node);
30566
30567   if (callee_opts == caller_opts)
30568     return true;
30569
30570   /* Callee's ISA features should be a subset of the caller's.  */
30571   struct arm_build_target caller_target;
30572   struct arm_build_target callee_target;
30573   caller_target.isa = sbitmap_alloc (isa_num_bits);
30574   callee_target.isa = sbitmap_alloc (isa_num_bits);
30575
30576   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30577                               false);
30578   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30579                               false);
30580   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30581     can_inline = false;
30582
30583   sbitmap_free (caller_target.isa);
30584   sbitmap_free (callee_target.isa);
30585
30586   /* OK to inline between different modes.
30587      Function with mode specific instructions, e.g using asm,
30588      must be explicitly protected with noinline.  */
30589   return can_inline;
30590 }
30591
30592 /* Hook to fix function's alignment affected by target attribute.  */
30593
30594 static void
30595 arm_relayout_function (tree fndecl)
30596 {
30597   if (DECL_USER_ALIGN (fndecl))
30598     return;
30599
30600   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30601
30602   if (!callee_tree)
30603     callee_tree = target_option_default_node;
30604
30605   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30606   SET_DECL_ALIGN
30607     (fndecl,
30608      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30609 }
30610
30611 /* Inner function to process the attribute((target(...))), take an argument and
30612    set the current options from the argument.  If we have a list, recursively
30613    go over the list.  */
30614
30615 static bool
30616 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30617 {
30618   if (TREE_CODE (args) == TREE_LIST)
30619     {
30620       bool ret = true;
30621
30622       for (; args; args = TREE_CHAIN (args))
30623         if (TREE_VALUE (args)
30624             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30625           ret = false;
30626       return ret;
30627     }
30628
30629   else if (TREE_CODE (args) != STRING_CST)
30630     {
30631       error ("attribute %<target%> argument not a string");
30632       return false;
30633     }
30634
30635   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30636   char *q;
30637
30638   while ((q = strtok (argstr, ",")) != NULL)
30639     {
30640       while (ISSPACE (*q)) ++q;
30641
30642       argstr = NULL;
30643       if (!strncmp (q, "thumb", 5))
30644           opts->x_target_flags |= MASK_THUMB;
30645
30646       else if (!strncmp (q, "arm", 3))
30647           opts->x_target_flags &= ~MASK_THUMB;
30648
30649       else if (!strncmp (q, "fpu=", 4))
30650         {
30651           int fpu_index;
30652           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30653                                        &fpu_index, CL_TARGET))
30654             {
30655               error ("invalid fpu for attribute(target(\"%s\"))", q);
30656               return false;
30657             }
30658           if (fpu_index == TARGET_FPU_auto)
30659             {
30660               /* This doesn't really make sense until we support
30661                  general dynamic selection of the architecture and all
30662                  sub-features.  */
30663               sorry ("auto fpu selection not currently permitted here");
30664               return false;
30665             }
30666           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30667         }
30668       else
30669         {
30670           error ("attribute(target(\"%s\")) is unknown", q);
30671           return false;
30672         }
30673     }
30674
30675   return true;
30676 }
30677
30678 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30679
30680 tree
30681 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30682                                  struct gcc_options *opts_set)
30683 {
30684   struct cl_target_option cl_opts;
30685
30686   if (!arm_valid_target_attribute_rec (args, opts))
30687     return NULL_TREE;
30688
30689   cl_target_option_save (&cl_opts, opts);
30690   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30691   arm_option_check_internal (opts);
30692   /* Do any overrides, such as global options arch=xxx.  */
30693   arm_option_override_internal (opts, opts_set);
30694
30695   return build_target_option_node (opts);
30696 }
30697
30698 static void
30699 add_attribute  (const char * mode, tree *attributes)
30700 {
30701   size_t len = strlen (mode);
30702   tree value = build_string (len, mode);
30703
30704   TREE_TYPE (value) = build_array_type (char_type_node,
30705                                         build_index_type (size_int (len)));
30706
30707   *attributes = tree_cons (get_identifier ("target"),
30708                            build_tree_list (NULL_TREE, value),
30709                            *attributes);
30710 }
30711
30712 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30713
30714 static void
30715 arm_insert_attributes (tree fndecl, tree * attributes)
30716 {
30717   const char *mode;
30718
30719   if (! TARGET_FLIP_THUMB)
30720     return;
30721
30722   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30723       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30724    return;
30725
30726   /* Nested definitions must inherit mode.  */
30727   if (current_function_decl)
30728    {
30729      mode = TARGET_THUMB ? "thumb" : "arm";
30730      add_attribute (mode, attributes);
30731      return;
30732    }
30733
30734   /* If there is already a setting don't change it.  */
30735   if (lookup_attribute ("target", *attributes) != NULL)
30736     return;
30737
30738   mode = thumb_flipper ? "thumb" : "arm";
30739   add_attribute (mode, attributes);
30740
30741   thumb_flipper = !thumb_flipper;
30742 }
30743
30744 /* Hook to validate attribute((target("string"))).  */
30745
30746 static bool
30747 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30748                               tree args, int ARG_UNUSED (flags))
30749 {
30750   bool ret = true;
30751   struct gcc_options func_options;
30752   tree cur_tree, new_optimize;
30753   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30754
30755   /* Get the optimization options of the current function.  */
30756   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30757
30758   /* If the function changed the optimization levels as well as setting target
30759      options, start with the optimizations specified.  */
30760   if (!func_optimize)
30761     func_optimize = optimization_default_node;
30762
30763   /* Init func_options.  */
30764   memset (&func_options, 0, sizeof (func_options));
30765   init_options_struct (&func_options, NULL);
30766   lang_hooks.init_options_struct (&func_options);
30767
30768   /* Initialize func_options to the defaults.  */
30769   cl_optimization_restore (&func_options,
30770                            TREE_OPTIMIZATION (func_optimize));
30771
30772   cl_target_option_restore (&func_options,
30773                             TREE_TARGET_OPTION (target_option_default_node));
30774
30775   /* Set func_options flags with new target mode.  */
30776   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30777                                               &global_options_set);
30778
30779   if (cur_tree == NULL_TREE)
30780     ret = false;
30781
30782   new_optimize = build_optimization_node (&func_options);
30783
30784   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30785
30786   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30787
30788   finalize_options_struct (&func_options);
30789
30790   return ret;
30791 }
30792
30793 /* Match an ISA feature bitmap to a named FPU.  We always use the
30794    first entry that exactly matches the feature set, so that we
30795    effectively canonicalize the FPU name for the assembler.  */
30796 static const char*
30797 arm_identify_fpu_from_isa (sbitmap isa)
30798 {
30799   auto_sbitmap fpubits (isa_num_bits);
30800   auto_sbitmap cand_fpubits (isa_num_bits);
30801
30802   bitmap_and (fpubits, isa, isa_all_fpubits);
30803
30804   /* If there are no ISA feature bits relating to the FPU, we must be
30805      doing soft-float.  */
30806   if (bitmap_empty_p (fpubits))
30807     return "softvfp";
30808
30809   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30810     {
30811       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30812       if (bitmap_equal_p (fpubits, cand_fpubits))
30813         return all_fpus[i].name;
30814     }
30815   /* We must find an entry, or things have gone wrong.  */
30816   gcc_unreachable ();
30817 }
30818
30819 void
30820 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30821 {
30822
30823   fprintf (stream, "\t.syntax unified\n");
30824
30825   if (TARGET_THUMB)
30826     {
30827       if (is_called_in_ARM_mode (decl)
30828           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30829               && cfun->is_thunk))
30830         fprintf (stream, "\t.code 32\n");
30831       else if (TARGET_THUMB1)
30832         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30833       else
30834         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30835     }
30836   else
30837     fprintf (stream, "\t.arm\n");
30838
30839   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30840                (TARGET_SOFT_FLOAT
30841                 ? "softvfp"
30842                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30843
30844   if (TARGET_POKE_FUNCTION_NAME)
30845     arm_poke_function_name (stream, (const char *) name);
30846 }
30847
30848 /* If MEM is in the form of [base+offset], extract the two parts
30849    of address and set to BASE and OFFSET, otherwise return false
30850    after clearing BASE and OFFSET.  */
30851
30852 static bool
30853 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30854 {
30855   rtx addr;
30856
30857   gcc_assert (MEM_P (mem));
30858
30859   addr = XEXP (mem, 0);
30860
30861   /* Strip off const from addresses like (const (addr)).  */
30862   if (GET_CODE (addr) == CONST)
30863     addr = XEXP (addr, 0);
30864
30865   if (GET_CODE (addr) == REG)
30866     {
30867       *base = addr;
30868       *offset = const0_rtx;
30869       return true;
30870     }
30871
30872   if (GET_CODE (addr) == PLUS
30873       && GET_CODE (XEXP (addr, 0)) == REG
30874       && CONST_INT_P (XEXP (addr, 1)))
30875     {
30876       *base = XEXP (addr, 0);
30877       *offset = XEXP (addr, 1);
30878       return true;
30879     }
30880
30881   *base = NULL_RTX;
30882   *offset = NULL_RTX;
30883
30884   return false;
30885 }
30886
30887 /* If INSN is a load or store of address in the form of [base+offset],
30888    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30889    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30890    otherwise return FALSE.  */
30891
30892 static bool
30893 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30894 {
30895   rtx x, dest, src;
30896
30897   gcc_assert (INSN_P (insn));
30898   x = PATTERN (insn);
30899   if (GET_CODE (x) != SET)
30900     return false;
30901
30902   src = SET_SRC (x);
30903   dest = SET_DEST (x);
30904   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30905     {
30906       *is_load = false;
30907       extract_base_offset_in_addr (dest, base, offset);
30908     }
30909   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30910     {
30911       *is_load = true;
30912       extract_base_offset_in_addr (src, base, offset);
30913     }
30914   else
30915     return false;
30916
30917   return (*base != NULL_RTX && *offset != NULL_RTX);
30918 }
30919
30920 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30921
30922    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30923    and PRI are only calculated for these instructions.  For other instruction,
30924    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30925    instruction fusion can be supported by returning different priorities.
30926
30927    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30928
30929 static void
30930 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30931                            int *fusion_pri, int *pri)
30932 {
30933   int tmp, off_val;
30934   bool is_load;
30935   rtx base, offset;
30936
30937   gcc_assert (INSN_P (insn));
30938
30939   tmp = max_pri - 1;
30940   if (!fusion_load_store (insn, &base, &offset, &is_load))
30941     {
30942       *pri = tmp;
30943       *fusion_pri = tmp;
30944       return;
30945     }
30946
30947   /* Load goes first.  */
30948   if (is_load)
30949     *fusion_pri = tmp - 1;
30950   else
30951     *fusion_pri = tmp - 2;
30952
30953   tmp /= 2;
30954
30955   /* INSN with smaller base register goes first.  */
30956   tmp -= ((REGNO (base) & 0xff) << 20);
30957
30958   /* INSN with smaller offset goes first.  */
30959   off_val = (int)(INTVAL (offset));
30960   if (off_val >= 0)
30961     tmp -= (off_val & 0xfffff);
30962   else
30963     tmp += ((- off_val) & 0xfffff);
30964
30965   *pri = tmp;
30966   return;
30967 }
30968
30969
30970 /* Construct and return a PARALLEL RTX vector with elements numbering the
30971    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30972    the vector - from the perspective of the architecture.  This does not
30973    line up with GCC's perspective on lane numbers, so we end up with
30974    different masks depending on our target endian-ness.  The diagram
30975    below may help.  We must draw the distinction when building masks
30976    which select one half of the vector.  An instruction selecting
30977    architectural low-lanes for a big-endian target, must be described using
30978    a mask selecting GCC high-lanes.
30979
30980                  Big-Endian             Little-Endian
30981
30982 GCC             0   1   2   3           3   2   1   0
30983               | x | x | x | x |       | x | x | x | x |
30984 Architecture    3   2   1   0           3   2   1   0
30985
30986 Low Mask:         { 2, 3 }                { 0, 1 }
30987 High Mask:        { 0, 1 }                { 2, 3 }
30988 */
30989
30990 rtx
30991 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30992 {
30993   int nunits = GET_MODE_NUNITS (mode);
30994   rtvec v = rtvec_alloc (nunits / 2);
30995   int high_base = nunits / 2;
30996   int low_base = 0;
30997   int base;
30998   rtx t1;
30999   int i;
31000
31001   if (BYTES_BIG_ENDIAN)
31002     base = high ? low_base : high_base;
31003   else
31004     base = high ? high_base : low_base;
31005
31006   for (i = 0; i < nunits / 2; i++)
31007     RTVEC_ELT (v, i) = GEN_INT (base + i);
31008
31009   t1 = gen_rtx_PARALLEL (mode, v);
31010   return t1;
31011 }
31012
31013 /* Check OP for validity as a PARALLEL RTX vector with elements
31014    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31015    from the perspective of the architecture.  See the diagram above
31016    arm_simd_vect_par_cnst_half_p for more details.  */
31017
31018 bool
31019 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31020                                        bool high)
31021 {
31022   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31023   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31024   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31025   int i = 0;
31026
31027   if (!VECTOR_MODE_P (mode))
31028     return false;
31029
31030   if (count_op != count_ideal)
31031     return false;
31032
31033   for (i = 0; i < count_ideal; i++)
31034     {
31035       rtx elt_op = XVECEXP (op, 0, i);
31036       rtx elt_ideal = XVECEXP (ideal, 0, i);
31037
31038       if (!CONST_INT_P (elt_op)
31039           || INTVAL (elt_ideal) != INTVAL (elt_op))
31040         return false;
31041     }
31042   return true;
31043 }
31044
31045 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31046    in Thumb1.  */
31047 static bool
31048 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31049                          const_tree)
31050 {
31051   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31052   if (vcall_offset && TARGET_THUMB1)
31053     return false;
31054
31055   /* Otherwise ok.  */
31056   return true;
31057 }
31058
31059 /* Generate RTL for a conditional branch with rtx comparison CODE in
31060    mode CC_MODE. The destination of the unlikely conditional branch
31061    is LABEL_REF.  */
31062
31063 void
31064 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31065                           rtx label_ref)
31066 {
31067   rtx x;
31068   x = gen_rtx_fmt_ee (code, VOIDmode,
31069                       gen_rtx_REG (cc_mode, CC_REGNUM),
31070                       const0_rtx);
31071
31072   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31073                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31074                             pc_rtx);
31075   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31076 }
31077
31078 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31079
31080    For pure-code sections there is no letter code for this attribute, so
31081    output all the section flags numerically when this is needed.  */
31082
31083 static bool
31084 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31085 {
31086
31087   if (flags & SECTION_ARM_PURECODE)
31088     {
31089       *num = 0x20000000;
31090
31091       if (!(flags & SECTION_DEBUG))
31092         *num |= 0x2;
31093       if (flags & SECTION_EXCLUDE)
31094         *num |= 0x80000000;
31095       if (flags & SECTION_WRITE)
31096         *num |= 0x1;
31097       if (flags & SECTION_CODE)
31098         *num |= 0x4;
31099       if (flags & SECTION_MERGE)
31100         *num |= 0x10;
31101       if (flags & SECTION_STRINGS)
31102         *num |= 0x20;
31103       if (flags & SECTION_TLS)
31104         *num |= 0x400;
31105       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31106         *num |= 0x200;
31107
31108         return true;
31109     }
31110
31111   return false;
31112 }
31113
31114 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31115
31116    If pure-code is passed as an option, make sure all functions are in
31117    sections that have the SHF_ARM_PURECODE attribute.  */
31118
31119 static section *
31120 arm_function_section (tree decl, enum node_frequency freq,
31121                       bool startup, bool exit)
31122 {
31123   const char * section_name;
31124   section * sec;
31125
31126   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31127     return default_function_section (decl, freq, startup, exit);
31128
31129   if (!target_pure_code)
31130     return default_function_section (decl, freq, startup, exit);
31131
31132
31133   section_name = DECL_SECTION_NAME (decl);
31134
31135   /* If a function is not in a named section then it falls under the 'default'
31136      text section, also known as '.text'.  We can preserve previous behavior as
31137      the default text section already has the SHF_ARM_PURECODE section
31138      attribute.  */
31139   if (!section_name)
31140     {
31141       section *default_sec = default_function_section (decl, freq, startup,
31142                                                        exit);
31143
31144       /* If default_sec is not null, then it must be a special section like for
31145          example .text.startup.  We set the pure-code attribute and return the
31146          same section to preserve existing behavior.  */
31147       if (default_sec)
31148           default_sec->common.flags |= SECTION_ARM_PURECODE;
31149       return default_sec;
31150     }
31151
31152   /* Otherwise look whether a section has already been created with
31153      'section_name'.  */
31154   sec = get_named_section (decl, section_name, 0);
31155   if (!sec)
31156     /* If that is not the case passing NULL as the section's name to
31157        'get_named_section' will create a section with the declaration's
31158        section name.  */
31159     sec = get_named_section (decl, NULL, 0);
31160
31161   /* Set the SHF_ARM_PURECODE attribute.  */
31162   sec->common.flags |= SECTION_ARM_PURECODE;
31163
31164   return sec;
31165 }
31166
31167 /* Implements the TARGET_SECTION_FLAGS hook.
31168
31169    If DECL is a function declaration and pure-code is passed as an option
31170    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31171    section's name and RELOC indicates whether the declarations initializer may
31172    contain runtime relocations.  */
31173
31174 static unsigned int
31175 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31176 {
31177   unsigned int flags = default_section_type_flags (decl, name, reloc);
31178
31179   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31180     flags |= SECTION_ARM_PURECODE;
31181
31182   return flags;
31183 }
31184
31185 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31186
31187 static void
31188 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31189                            rtx op0, rtx op1,
31190                            rtx *quot_p, rtx *rem_p)
31191 {
31192   if (mode == SImode)
31193     gcc_assert (!TARGET_IDIV);
31194
31195   scalar_int_mode libval_mode
31196     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31197
31198   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31199                                         libval_mode,
31200                                         op0, GET_MODE (op0),
31201                                         op1, GET_MODE (op1));
31202
31203   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31204   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31205                                        GET_MODE_SIZE (mode));
31206
31207   gcc_assert (quotient);
31208   gcc_assert (remainder);
31209
31210   *quot_p = quotient;
31211   *rem_p = remainder;
31212 }
31213
31214 /*  This function checks for the availability of the coprocessor builtin passed
31215     in BUILTIN for the current target.  Returns true if it is available and
31216     false otherwise.  If a BUILTIN is passed for which this function has not
31217     been implemented it will cause an exception.  */
31218
31219 bool
31220 arm_coproc_builtin_available (enum unspecv builtin)
31221 {
31222   /* None of these builtins are available in Thumb mode if the target only
31223      supports Thumb-1.  */
31224   if (TARGET_THUMB1)
31225     return false;
31226
31227   switch (builtin)
31228     {
31229       case VUNSPEC_CDP:
31230       case VUNSPEC_LDC:
31231       case VUNSPEC_LDCL:
31232       case VUNSPEC_STC:
31233       case VUNSPEC_STCL:
31234       case VUNSPEC_MCR:
31235       case VUNSPEC_MRC:
31236         if (arm_arch4)
31237           return true;
31238         break;
31239       case VUNSPEC_CDP2:
31240       case VUNSPEC_LDC2:
31241       case VUNSPEC_LDC2L:
31242       case VUNSPEC_STC2:
31243       case VUNSPEC_STC2L:
31244       case VUNSPEC_MCR2:
31245       case VUNSPEC_MRC2:
31246         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31247            ARMv8-{A,M}.  */
31248         if (arm_arch5)
31249           return true;
31250         break;
31251       case VUNSPEC_MCRR:
31252       case VUNSPEC_MRRC:
31253         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31254            ARMv8-{A,M}.  */
31255         if (arm_arch6 || arm_arch5te)
31256           return true;
31257         break;
31258       case VUNSPEC_MCRR2:
31259       case VUNSPEC_MRRC2:
31260         if (arm_arch6)
31261           return true;
31262         break;
31263       default:
31264         gcc_unreachable ();
31265     }
31266   return false;
31267 }
31268
31269 /* This function returns true if OP is a valid memory operand for the ldc and
31270    stc coprocessor instructions and false otherwise.  */
31271
31272 bool
31273 arm_coproc_ldc_stc_legitimate_address (rtx op)
31274 {
31275   HOST_WIDE_INT range;
31276   /* Has to be a memory operand.  */
31277   if (!MEM_P (op))
31278     return false;
31279
31280   op = XEXP (op, 0);
31281
31282   /* We accept registers.  */
31283   if (REG_P (op))
31284     return true;
31285
31286   switch GET_CODE (op)
31287     {
31288       case PLUS:
31289         {
31290           /* Or registers with an offset.  */
31291           if (!REG_P (XEXP (op, 0)))
31292             return false;
31293
31294           op = XEXP (op, 1);
31295
31296           /* The offset must be an immediate though.  */
31297           if (!CONST_INT_P (op))
31298             return false;
31299
31300           range = INTVAL (op);
31301
31302           /* Within the range of [-1020,1020].  */
31303           if (!IN_RANGE (range, -1020, 1020))
31304             return false;
31305
31306           /* And a multiple of 4.  */
31307           return (range % 4) == 0;
31308         }
31309       case PRE_INC:
31310       case POST_INC:
31311       case PRE_DEC:
31312       case POST_DEC:
31313         return REG_P (XEXP (op, 0));
31314       default:
31315         gcc_unreachable ();
31316     }
31317   return false;
31318 }
31319
31320 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31321
31322    In VFPv1, VFP registers could only be accessed in the mode they were
31323    set, so subregs would be invalid there.  However, we don't support
31324    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31325
31326    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31327    VFP registers in little-endian order.  We can't describe that accurately to
31328    GCC, so avoid taking subregs of such values.
31329
31330    The only exception is going from a 128-bit to a 64-bit type.  In that
31331    case the data layout happens to be consistent for big-endian, so we
31332    explicitly allow that case.  */
31333
31334 static bool
31335 arm_can_change_mode_class (machine_mode from, machine_mode to,
31336                            reg_class_t rclass)
31337 {
31338   if (TARGET_BIG_END
31339       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31340       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31341           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31342       && reg_classes_intersect_p (VFP_REGS, rclass))
31343     return false;
31344   return true;
31345 }
31346
31347 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31348    strcpy from constants will be faster.  */
31349
31350 static HOST_WIDE_INT
31351 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31352 {
31353   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31354   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31355     return MAX (align, BITS_PER_WORD * factor);
31356   return align;
31357 }
31358
31359 #if CHECKING_P
31360 namespace selftest {
31361
31362 /* Scan the static data tables generated by parsecpu.awk looking for
31363    potential issues with the data.  We primarily check for
31364    inconsistencies in the option extensions at present (extensions
31365    that duplicate others but aren't marked as aliases).  Furthermore,
31366    for correct canonicalization later options must never be a subset
31367    of an earlier option.  Any extension should also only specify other
31368    feature bits and never an architecture bit.  The architecture is inferred
31369    from the declaration of the extension.  */
31370 static void
31371 arm_test_cpu_arch_data (void)
31372 {
31373   const arch_option *arch;
31374   const cpu_option *cpu;
31375   auto_sbitmap target_isa (isa_num_bits);
31376   auto_sbitmap isa1 (isa_num_bits);
31377   auto_sbitmap isa2 (isa_num_bits);
31378
31379   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31380     {
31381       const cpu_arch_extension *ext1, *ext2;
31382
31383       if (arch->common.extensions == NULL)
31384         continue;
31385
31386       arm_initialize_isa (target_isa, arch->common.isa_bits);
31387
31388       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31389         {
31390           if (ext1->alias)
31391             continue;
31392
31393           arm_initialize_isa (isa1, ext1->isa_bits);
31394           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31395             {
31396               if (ext2->alias || ext1->remove != ext2->remove)
31397                 continue;
31398
31399               arm_initialize_isa (isa2, ext2->isa_bits);
31400               /* If the option is a subset of the parent option, it doesn't
31401                  add anything and so isn't useful.  */
31402               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31403
31404               /* If the extension specifies any architectural bits then
31405                  disallow it.  Extensions should only specify feature bits.  */
31406               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31407             }
31408         }
31409     }
31410
31411   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31412     {
31413       const cpu_arch_extension *ext1, *ext2;
31414
31415       if (cpu->common.extensions == NULL)
31416         continue;
31417
31418       arm_initialize_isa (target_isa, arch->common.isa_bits);
31419
31420       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31421         {
31422           if (ext1->alias)
31423             continue;
31424
31425           arm_initialize_isa (isa1, ext1->isa_bits);
31426           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31427             {
31428               if (ext2->alias || ext1->remove != ext2->remove)
31429                 continue;
31430
31431               arm_initialize_isa (isa2, ext2->isa_bits);
31432               /* If the option is a subset of the parent option, it doesn't
31433                  add anything and so isn't useful.  */
31434               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31435
31436               /* If the extension specifies any architectural bits then
31437                  disallow it.  Extensions should only specify feature bits.  */
31438               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31439             }
31440         }
31441     }
31442 }
31443
31444 /* Scan the static data tables generated by parsecpu.awk looking for
31445    potential issues with the data.  Here we check for consistency between the
31446    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31447    a feature bit that is not defined by any FPU flag.  */
31448 static void
31449 arm_test_fpu_data (void)
31450 {
31451   auto_sbitmap isa_all_fpubits (isa_num_bits);
31452   auto_sbitmap fpubits (isa_num_bits);
31453   auto_sbitmap tmpset (isa_num_bits);
31454
31455   static const enum isa_feature fpu_bitlist[]
31456     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31457   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31458
31459   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31460   {
31461     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31462     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31463     bitmap_clear (isa_all_fpubits);
31464     bitmap_copy (isa_all_fpubits, tmpset);
31465   }
31466
31467   if (!bitmap_empty_p (isa_all_fpubits))
31468     {
31469         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31470                          " group that are not defined by any FPU.\n"
31471                          "       Check your arm-cpus.in.\n");
31472         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31473     }
31474 }
31475
31476 static void
31477 arm_run_selftests (void)
31478 {
31479   arm_test_cpu_arch_data ();
31480   arm_test_fpu_data ();
31481 }
31482 } /* Namespace selftest.  */
31483
31484 #undef TARGET_RUN_TARGET_SELFTESTS
31485 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31486 #endif /* CHECKING_P */
31487
31488 struct gcc_target targetm = TARGET_INITIALIZER;
31489
31490 #include "gt-arm.h"